6829187 Wdiff src/cpu/x86/vm/x86_32.ad

Print this page

rev 1025 : imported patch indy.compiler.patch

Split	Close
Expand all
Collapse all

          --- old/src/cpu/x86/vm/x86_32.ad
          +++ new/src/cpu/x86/vm/x86_32.ad

   1    1  //
   2    2  // Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3    3  // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4  //
   5    5  // This code is free software; you can redistribute it and/or modify it
   6    6  // under the terms of the GNU General Public License version 2 only, as
   7    7  // published by the Free Software Foundation.
   8    8  //
   9    9  // This code is distributed in the hope that it will be useful, but WITHOUT
  10   10  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11  // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12  // version 2 for more details (a copy is included in the LICENSE file that
  13   13  // accompanied this code).
  14   14  //
  15   15  // You should have received a copy of the GNU General Public License version
  16   16  // 2 along with this work; if not, write to the Free Software Foundation,
  17   17  // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18  //
  19   19  // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20   20  // CA 95054 USA or visit www.sun.com if you need additional information or
  21   21  // have any questions.
  22   22  //
  23   23  //
  24   24  
  25   25  // X86 Architecture Description File
  26   26  
  27   27  //----------REGISTER DEFINITION BLOCK------------------------------------------
  28   28  // This information is used by the matcher and the register allocator to
  29   29  // describe individual registers and classes of registers within the target
  30   30  // archtecture.
  31   31  
  32   32  register %{
  33   33  //----------Architecture Description Register Definitions----------------------
  34   34  // General Registers
  35   35  // "reg_def"  name ( register save type, C convention save type,
  36   36  //                   ideal register type, encoding );
  37   37  // Register Save Types:
  38   38  //
  39   39  // NS  = No-Save:       The register allocator assumes that these registers
  40   40  //                      can be used without saving upon entry to the method, &
  41   41  //                      that they do not need to be saved at call sites.
  42   42  //
  43   43  // SOC = Save-On-Call:  The register allocator assumes that these registers
  44   44  //                      can be used without saving upon entry to the method,
  45   45  //                      but that they must be saved at call sites.
  46   46  //
  47   47  // SOE = Save-On-Entry: The register allocator assumes that these registers
  48   48  //                      must be saved before using them upon entry to the
  49   49  //                      method, but they do not need to be saved at call
  50   50  //                      sites.
  51   51  //
  52   52  // AS  = Always-Save:   The register allocator assumes that these registers
  53   53  //                      must be saved before using them upon entry to the
  54   54  //                      method, & that they must be saved at call sites.
  55   55  //
  56   56  // Ideal Register Type is used to determine how to save & restore a
  57   57  // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58   58  // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59   59  //
  60   60  // The encoding number is the actual bit-pattern placed into the opcodes.
  61   61  
  62   62  // General Registers
  63   63  // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64   64  // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65   65  // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66   66  
  67   67  reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68   68  reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69   69  reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70   70  reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71   71  // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72   72  reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73   73  reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74   74  reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75   75  reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76   76  
  77   77  // Special Registers
  78   78  reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
  79   79  
  80   80  // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  81   81  // allocator, and only shows up in the encodings.
  82   82  reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  83   83  reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  84   84  // Ok so here's the trick FPR1 is really st(0) except in the midst
  85   85  // of emission of assembly for a machnode. During the emission the fpu stack
  86   86  // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  87   87  // the stack will not have this element so FPR1 == st(0) from the
  88   88  // oopMap viewpoint. This same weirdness with numbering causes
  89   89  // instruction encoding to have to play games with the register
  90   90  // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  91   91  // where it does flt->flt moves to see an example
  92   92  //
  93   93  reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  94   94  reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  95   95  reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  96   96  reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  97   97  reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  98   98  reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  99   99  reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
 100  100  reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
 101  101  reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
 102  102  reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 103  103  reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 104  104  reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 105  105  reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 106  106  reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 107  107  
 108  108  // XMM registers.  128-bit registers or 4 words each, labeled a-d.
 109  109  // Word a in each register holds a Float, words ab hold a Double.
 110  110  // We currently do not use the SIMD capabilities, so registers cd
 111  111  // are unused at the moment.
 112  112  reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
 113  113  reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
 114  114  reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
 115  115  reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
 116  116  reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 117  117  reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
 118  118  reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 119  119  reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
 120  120  reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 121  121  reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
 122  122  reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 123  123  reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
 124  124  reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 125  125  reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
 126  126  reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 127  127  reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
 128  128  
 129  129  // Specify priority of register selection within phases of register
 130  130  // allocation.  Highest priority is first.  A useful heuristic is to
 131  131  // give registers a low priority when they are required by machine
 132  132  // instructions, like EAX and EDX.  Registers which are used as
 133  133  // pairs must fall on an even boundary (witness the FPR#L's in this list).
 134  134  // For the Intel integer registers, the equivalent Long pairs are
 135  135  // EDX:EAX, EBX:ECX, and EDI:EBP.
 136  136  alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 137  137                      FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 138  138                      FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 139  139                      FPR6L, FPR6H, FPR7L, FPR7H );
 140  140  
 141  141  alloc_class chunk1( XMM0a, XMM0b,
 142  142                      XMM1a, XMM1b,
 143  143                      XMM2a, XMM2b,
 144  144                      XMM3a, XMM3b,
 145  145                      XMM4a, XMM4b,
 146  146                      XMM5a, XMM5b,
 147  147                      XMM6a, XMM6b,
 148  148                      XMM7a, XMM7b, EFLAGS);
 149  149  
 150  150  
 151  151  //----------Architecture Description Register Classes--------------------------
 152  152  // Several register classes are automatically defined based upon information in
 153  153  // this architecture description.
 154  154  // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 155  155  // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 156  156  // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 157  157  // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 158  158  //
 159  159  // Class for all registers
 160  160  reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 161  161  // Class for general registers
 162  162  reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 163  163  // Class for general registers which may be used for implicit null checks on win95
 164  164  // Also safe for use by tailjump. We don't want to allocate in rbp,
 165  165  reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
 166  166  // Class of "X" registers
 167  167  reg_class x_reg(EBX, ECX, EDX, EAX);
 168  168  // Class of registers that can appear in an address with no offset.
 169  169  // EBP and ESP require an extra instruction byte for zero offset.
 170  170  // Used in fast-unlock
 171  171  reg_class p_reg(EDX, EDI, ESI, EBX);
 172  172  // Class for general registers not including ECX
 173  173  reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
 174  174  // Class for general registers not including EAX
 175  175  reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 176  176  // Class for general registers not including EAX or EBX.
 177  177  reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
 178  178  // Class of EAX (for multiply and divide operations)
 179  179  reg_class eax_reg(EAX);
 180  180  // Class of EBX (for atomic add)
 181  181  reg_class ebx_reg(EBX);
 182  182  // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 183  183  reg_class ecx_reg(ECX);
 184  184  // Class of EDX (for multiply and divide operations)
 185  185  reg_class edx_reg(EDX);
 186  186  // Class of EDI (for synchronization)
 187  187  reg_class edi_reg(EDI);
 188  188  // Class of ESI (for synchronization)
 189  189  reg_class esi_reg(ESI);
 190  190  // Singleton class for interpreter's stack pointer
 191  191  reg_class ebp_reg(EBP);
 192  192  // Singleton class for stack pointer
 193  193  reg_class sp_reg(ESP);
 194  194  // Singleton class for instruction pointer
 195  195  // reg_class ip_reg(EIP);
 196  196  // Singleton class for condition codes
 197  197  reg_class int_flags(EFLAGS);
 198  198  // Class of integer register pairs
 199  199  reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
 200  200  // Class of integer register pairs that aligns with calling convention
 201  201  reg_class eadx_reg( EAX,EDX );
 202  202  reg_class ebcx_reg( ECX,EBX );
 203  203  // Not AX or DX, used in divides
 204  204  reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
 205  205  
 206  206  // Floating point registers.  Notice FPR0 is not a choice.
 207  207  // FPR0 is not ever allocated; we use clever encodings to fake
 208  208  // a 2-address instructions out of Intels FP stack.
 209  209  reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 210  210  
 211  211  // make a register class for SSE registers
 212  212  reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
 213  213  
 214  214  // make a double register class for SSE2 registers
 215  215  reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
 216  216                    XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
 217  217  
 218  218  reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 219  219                     FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 220  220                     FPR7L,FPR7H );
 221  221  
 222  222  reg_class flt_reg0( FPR1L );
 223  223  reg_class dbl_reg0( FPR1L,FPR1H );
 224  224  reg_class dbl_reg1( FPR2L,FPR2H );
 225  225  reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 226  226                         FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 227  227  
 228  228  // XMM6 and XMM7 could be used as temporary registers for long, float and
 229  229  // double values for SSE2.
 230  230  reg_class xdb_reg6( XMM6a,XMM6b );
 231  231  reg_class xdb_reg7( XMM7a,XMM7b );
 232  232  %}
 233  233  
 234  234  
 235  235  //----------SOURCE BLOCK-------------------------------------------------------
 236  236  // This is a block of C++ code which provides values, functions, and
 237  237  // definitions necessary in the rest of the architecture description
 238  238  source %{
 239  239  #define   RELOC_IMM32    Assembler::imm_operand
 240  240  #define   RELOC_DISP32   Assembler::disp32_operand
 241  241  
 242  242  #define __ _masm.
 243  243  
 244  244  // How to find the high register of a Long pair, given the low register
 245  245  #define   HIGH_FROM_LOW(x) ((x)+2)
 246  246  
 247  247  // These masks are used to provide 128-bit aligned bitmasks to the XMM
 248  248  // instructions, to allow sign-masking or sign-bit flipping.  They allow
 249  249  // fast versions of NegF/NegD and AbsF/AbsD.
 250  250  
 251  251  // Note: 'double' and 'long long' have 32-bits alignment on x86.
 252  252  static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 253  253    // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 254  254    // of 128-bits operands for SSE instructions.
 255  255    jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 256  256    // Store the value to a 128-bits operand.
 257  257    operand[0] = lo;
 258  258    operand[1] = hi;
 259  259    return operand;
 260  260  }

↓ open down ↓

260 lines elided

↑ open up ↑

 261  261  
 262  262  // Buffer for 128-bits masks used by SSE instructions.
 263  263  static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 264  264  
 265  265  // Static initialization during VM startup.
 266  266  static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 267  267  static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 268  268  static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 269  269  static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 270  270  
      271 +// Offset hacking within calls.
      272 +static int pre_call_FPU_size() {
      273 +  if (Compile::current()->in_24_bit_fp_mode())
      274 +    return 6; // fldcw
      275 +  return 0;
      276 +}
      277 +
      278 +static int preserve_SP_size() {
      279 +  return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
      280 +}
      281 +
 271  282  // !!!!! Special hack to get all type of calls to specify the byte offset
 272  283  //       from the start of the call to the point where the return address
 273  284  //       will point.
 274  285  int MachCallStaticJavaNode::ret_addr_offset() {
 275      -  return 5 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0);  // 5 bytes from start of call to where return address points
      286 +  int offset = 5 + pre_call_FPU_size();  // 5 bytes from start of call to where return address points
      287 +  if (_method_handle_invoke)
      288 +    offset += preserve_SP_size();
      289 +  return offset;
 276  290  }
 277  291  
 278  292  int MachCallDynamicJavaNode::ret_addr_offset() {
 279      -  return 10 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0);  // 10 bytes from start of call to where return address points
      293 +  return 10 + pre_call_FPU_size();  // 10 bytes from start of call to where return address points
 280  294  }
 281  295  
 282  296  static int sizeof_FFree_Float_Stack_All = -1;
 283  297  
 284  298  int MachCallRuntimeNode::ret_addr_offset() {
 285  299    assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 286      -  return sizeof_FFree_Float_Stack_All + 5 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0);
      300 +  return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
 287  301  }
 288  302  
 289  303  // Indicate if the safepoint node needs the polling page as an input.
 290  304  // Since x86 does have absolute addressing, it doesn't.
 291  305  bool SafePointNode::needs_polling_address_input() {
 292  306    return false;
 293  307  }
 294  308  
 295  309  //
 296  310  // Compute padding required for nodes which need alignment
 297  311  //
 298  312  
 299  313  // The address of the call instruction needs to be 4-byte aligned to
 300  314  // ensure that it does not span a cache line so that it can be patched.
 301  315  int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 302      -  if (Compile::current()->in_24_bit_fp_mode())
 303      -    current_offset += 6;    // skip fldcw in pre_call_FPU, if any
      316 +  current_offset += pre_call_FPU_size();  // skip fldcw, if any
      317 +  current_offset += 1;      // skip call opcode byte
      318 +  return round_to(current_offset, alignment_required()) - current_offset;
      319 +}
      320 +
      321 +// The address of the call instruction needs to be 4-byte aligned to
      322 +// ensure that it does not span a cache line so that it can be patched.
      323 +int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
      324 +  current_offset += pre_call_FPU_size();  // skip fldcw, if any
      325 +  current_offset += preserve_SP_size();   // skip mov rbp, rsp
 304  326    current_offset += 1;      // skip call opcode byte
 305  327    return round_to(current_offset, alignment_required()) - current_offset;
 306  328  }
 307  329  
 308  330  // The address of the call instruction needs to be 4-byte aligned to
 309  331  // ensure that it does not span a cache line so that it can be patched.
 310  332  int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 311      -  if (Compile::current()->in_24_bit_fp_mode())
 312      -    current_offset += 6;    // skip fldcw in pre_call_FPU, if any
      333 +  current_offset += pre_call_FPU_size();  // skip fldcw, if any
 313  334    current_offset += 5;      // skip MOV instruction
 314  335    current_offset += 1;      // skip call opcode byte
 315  336    return round_to(current_offset, alignment_required()) - current_offset;
 316  337  }
 317  338  
 318  339  #ifndef PRODUCT
 319  340  void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
 320  341    st->print("INT3");
 321  342  }
 322  343  #endif

 323  344  
 324  345  // EMIT_RM()
 325  346  void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 326  347    unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 327  348    *(cbuf.code_end()) = c;
 328  349    cbuf.set_code_end(cbuf.code_end() + 1);
 329  350  }
 330  351  
 331  352  // EMIT_CC()
 332  353  void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 333  354    unsigned char c = (unsigned char)( f1 | f2 );
 334  355    *(cbuf.code_end()) = c;
 335  356    cbuf.set_code_end(cbuf.code_end() + 1);
 336  357  }
 337  358  
 338  359  // EMIT_OPCODE()
 339  360  void emit_opcode(CodeBuffer &cbuf, int code) {
 340  361    *(cbuf.code_end()) = (unsigned char)code;
 341  362    cbuf.set_code_end(cbuf.code_end() + 1);
 342  363  }
 343  364  
 344  365  // EMIT_OPCODE() w/ relocation information
 345  366  void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 346  367    cbuf.relocate(cbuf.inst_mark() + offset, reloc);
 347  368    emit_opcode(cbuf, code);
 348  369  }
 349  370  
 350  371  // EMIT_D8()
 351  372  void emit_d8(CodeBuffer &cbuf, int d8) {
 352  373    *(cbuf.code_end()) = (unsigned char)d8;
 353  374    cbuf.set_code_end(cbuf.code_end() + 1);
 354  375  }
 355  376  
 356  377  // EMIT_D16()
 357  378  void emit_d16(CodeBuffer &cbuf, int d16) {
 358  379    *((short *)(cbuf.code_end())) = d16;
 359  380    cbuf.set_code_end(cbuf.code_end() + 2);
 360  381  }
 361  382  
 362  383  // EMIT_D32()
 363  384  void emit_d32(CodeBuffer &cbuf, int d32) {
 364  385    *((int *)(cbuf.code_end())) = d32;
 365  386    cbuf.set_code_end(cbuf.code_end() + 4);
 366  387  }
 367  388  
 368  389  // emit 32 bit value and construct relocation entry from relocInfo::relocType
 369  390  void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 370  391          int format) {
 371  392    cbuf.relocate(cbuf.inst_mark(), reloc, format);
 372  393  
 373  394    *((int *)(cbuf.code_end())) = d32;
 374  395    cbuf.set_code_end(cbuf.code_end() + 4);
 375  396  }
 376  397  
 377  398  // emit 32 bit value and construct relocation entry from RelocationHolder
 378  399  void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 379  400          int format) {
 380  401  #ifdef ASSERT
 381  402    if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 382  403      assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 383  404    }
 384  405  #endif
 385  406    cbuf.relocate(cbuf.inst_mark(), rspec, format);
 386  407  
 387  408    *((int *)(cbuf.code_end())) = d32;
 388  409    cbuf.set_code_end(cbuf.code_end() + 4);
 389  410  }
 390  411  
 391  412  // Access stack slot for load or store
 392  413  void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 393  414    emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 394  415    if( -128 <= disp && disp <= 127 ) {
 395  416      emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 396  417      emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 397  418      emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 398  419    } else {
 399  420      emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 400  421      emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 401  422      emit_d32(cbuf, disp);     // Displacement  // R/M byte
 402  423    }
 403  424  }
 404  425  
 405  426     // eRegI ereg, memory mem) %{    // emit_reg_mem
 406  427  void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
 407  428    // There is no index & no scale, use form without SIB byte
 408  429    if ((index == 0x4) &&
 409  430        (scale == 0) && (base != ESP_enc)) {
 410  431      // If no displacement, mode is 0x0; unless base is [EBP]
 411  432      if ( (displace == 0) && (base != EBP_enc) ) {
 412  433        emit_rm(cbuf, 0x0, reg_encoding, base);
 413  434      }
 414  435      else {                    // If 8-bit displacement, mode 0x1
 415  436        if ((displace >= -128) && (displace <= 127)
 416  437            && !(displace_is_oop) ) {
 417  438          emit_rm(cbuf, 0x1, reg_encoding, base);
 418  439          emit_d8(cbuf, displace);
 419  440        }
 420  441        else {                  // If 32-bit displacement
 421  442          if (base == -1) { // Special flag for absolute address
 422  443            emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 423  444            // (manual lies; no SIB needed here)
 424  445            if ( displace_is_oop ) {
 425  446              emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 426  447            } else {
 427  448              emit_d32      (cbuf, displace);
 428  449            }
 429  450          }
 430  451          else {                // Normal base + offset
 431  452            emit_rm(cbuf, 0x2, reg_encoding, base);
 432  453            if ( displace_is_oop ) {
 433  454              emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 434  455            } else {
 435  456              emit_d32      (cbuf, displace);
 436  457            }
 437  458          }
 438  459        }
 439  460      }
 440  461    }
 441  462    else {                      // Else, encode with the SIB byte
 442  463      // If no displacement, mode is 0x0; unless base is [EBP]
 443  464      if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 444  465        emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 445  466        emit_rm(cbuf, scale, index, base);
 446  467      }
 447  468      else {                    // If 8-bit displacement, mode 0x1
 448  469        if ((displace >= -128) && (displace <= 127)
 449  470            && !(displace_is_oop) ) {
 450  471          emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 451  472          emit_rm(cbuf, scale, index, base);
 452  473          emit_d8(cbuf, displace);
 453  474        }
 454  475        else {                  // If 32-bit displacement
 455  476          if (base == 0x04 ) {
 456  477            emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 457  478            emit_rm(cbuf, scale, index, 0x04);
 458  479          } else {
 459  480            emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 460  481            emit_rm(cbuf, scale, index, base);
 461  482          }
 462  483          if ( displace_is_oop ) {
 463  484            emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 464  485          } else {
 465  486            emit_d32      (cbuf, displace);
 466  487          }
 467  488        }
 468  489      }
 469  490    }
 470  491  }
 471  492  
 472  493  
 473  494  void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 474  495    if( dst_encoding == src_encoding ) {
 475  496      // reg-reg copy, use an empty encoding
 476  497    } else {
 477  498      emit_opcode( cbuf, 0x8B );
 478  499      emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 479  500    }
 480  501  }
 481  502  
 482  503  void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 483  504    if( dst_encoding == src_encoding ) {
 484  505      // reg-reg copy, use an empty encoding
 485  506    } else {
 486  507      MacroAssembler _masm(&cbuf);
 487  508  
 488  509      __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 489  510    }
 490  511  }
 491  512  
 492  513  
 493  514  //=============================================================================
 494  515  #ifndef PRODUCT
 495  516  void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 496  517    Compile* C = ra_->C;
 497  518    if( C->in_24_bit_fp_mode() ) {
 498  519      st->print("FLDCW  24 bit fpu control word");
 499  520      st->print_cr(""); st->print("\t");
 500  521    }
 501  522  
 502  523    int framesize = C->frame_slots() << LogBytesPerInt;
 503  524    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 504  525    // Remove two words for return addr and rbp,
 505  526    framesize -= 2*wordSize;
 506  527  
 507  528    // Calls to C2R adapters often do not accept exceptional returns.
 508  529    // We require that their callers must bang for them.  But be careful, because
 509  530    // some VM calls (such as call site linkage) can use several kilobytes of
 510  531    // stack.  But the stack safety zone should account for that.
 511  532    // See bugs 4446381, 4468289, 4497237.
 512  533    if (C->need_stack_bang(framesize)) {
 513  534      st->print_cr("# stack bang"); st->print("\t");
 514  535    }
 515  536    st->print_cr("PUSHL  EBP"); st->print("\t");
 516  537  
 517  538    if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
 518  539      st->print("PUSH   0xBADB100D\t# Majik cookie for stack depth check");
 519  540      st->print_cr(""); st->print("\t");
 520  541      framesize -= wordSize;
 521  542    }
 522  543  
 523  544    if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
 524  545      if (framesize) {
 525  546        st->print("SUB    ESP,%d\t# Create frame",framesize);
 526  547      }
 527  548    } else {
 528  549      st->print("SUB    ESP,%d\t# Create frame",framesize);
 529  550    }
 530  551  }
 531  552  #endif
 532  553  
 533  554  
 534  555  void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 535  556    Compile* C = ra_->C;
 536  557  
 537  558    if (UseSSE >= 2 && VerifyFPU) {
 538  559      MacroAssembler masm(&cbuf);
 539  560      masm.verify_FPU(0, "FPU stack must be clean on entry");
 540  561    }
 541  562  
 542  563    // WARNING: Initial instruction MUST be 5 bytes or longer so that
 543  564    // NativeJump::patch_verified_entry will be able to patch out the entry
 544  565    // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 545  566    // depth is ok at 5 bytes, the frame allocation can be either 3 or
 546  567    // 6 bytes. So if we don't do the fldcw or the push then we must
 547  568    // use the 6 byte frame allocation even if we have no frame. :-(
 548  569    // If method sets FPU control word do it now
 549  570    if( C->in_24_bit_fp_mode() ) {
 550  571      MacroAssembler masm(&cbuf);
 551  572      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
 552  573    }
 553  574  
 554  575    int framesize = C->frame_slots() << LogBytesPerInt;
 555  576    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 556  577    // Remove two words for return addr and rbp,
 557  578    framesize -= 2*wordSize;
 558  579  
 559  580    // Calls to C2R adapters often do not accept exceptional returns.
 560  581    // We require that their callers must bang for them.  But be careful, because
 561  582    // some VM calls (such as call site linkage) can use several kilobytes of
 562  583    // stack.  But the stack safety zone should account for that.
 563  584    // See bugs 4446381, 4468289, 4497237.
 564  585    if (C->need_stack_bang(framesize)) {
 565  586      MacroAssembler masm(&cbuf);
 566  587      masm.generate_stack_overflow_check(framesize);
 567  588    }
 568  589  
 569  590    // We always push rbp, so that on return to interpreter rbp, will be
 570  591    // restored correctly and we can correct the stack.
 571  592    emit_opcode(cbuf, 0x50 | EBP_enc);
 572  593  
 573  594    if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
 574  595      emit_opcode(cbuf, 0x68); // push 0xbadb100d
 575  596      emit_d32(cbuf, 0xbadb100d);
 576  597      framesize -= wordSize;
 577  598    }
 578  599  
 579  600    if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
 580  601      if (framesize) {
 581  602        emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 582  603        emit_rm(cbuf, 0x3, 0x05, ESP_enc);
 583  604        emit_d8(cbuf, framesize);
 584  605      }
 585  606    } else {
 586  607      emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 587  608      emit_rm(cbuf, 0x3, 0x05, ESP_enc);
 588  609      emit_d32(cbuf, framesize);
 589  610    }
 590  611    C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
 591  612  
 592  613  #ifdef ASSERT
 593  614    if (VerifyStackAtCalls) {
 594  615      Label L;
 595  616      MacroAssembler masm(&cbuf);
 596  617      masm.push(rax);
 597  618      masm.mov(rax, rsp);
 598  619      masm.andptr(rax, StackAlignmentInBytes-1);
 599  620      masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 600  621      masm.pop(rax);
 601  622      masm.jcc(Assembler::equal, L);
 602  623      masm.stop("Stack is not properly aligned!");
 603  624      masm.bind(L);
 604  625    }
 605  626  #endif
 606  627  
 607  628  }
 608  629  
 609  630  uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 610  631    return MachNode::size(ra_); // too many variables; just compute it the hard way
 611  632  }
 612  633  
 613  634  int MachPrologNode::reloc() const {
 614  635    return 0; // a large enough number
 615  636  }
 616  637  
 617  638  //=============================================================================
 618  639  #ifndef PRODUCT
 619  640  void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 620  641    Compile *C = ra_->C;
 621  642    int framesize = C->frame_slots() << LogBytesPerInt;
 622  643    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 623  644    // Remove two words for return addr and rbp,
 624  645    framesize -= 2*wordSize;
 625  646  
 626  647    if( C->in_24_bit_fp_mode() ) {
 627  648      st->print("FLDCW  standard control word");
 628  649      st->cr(); st->print("\t");
 629  650    }
 630  651    if( framesize ) {
 631  652      st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 632  653      st->cr(); st->print("\t");
 633  654    }
 634  655    st->print_cr("POPL   EBP"); st->print("\t");
 635  656    if( do_polling() && C->is_method_compilation() ) {
 636  657      st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 637  658      st->cr(); st->print("\t");
 638  659    }
 639  660  }
 640  661  #endif
 641  662  
 642  663  void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 643  664    Compile *C = ra_->C;
 644  665  
 645  666    // If method set FPU control word, restore to standard control word
 646  667    if( C->in_24_bit_fp_mode() ) {
 647  668      MacroAssembler masm(&cbuf);
 648  669      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 649  670    }
 650  671  
 651  672    int framesize = C->frame_slots() << LogBytesPerInt;
 652  673    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 653  674    // Remove two words for return addr and rbp,
 654  675    framesize -= 2*wordSize;
 655  676  
 656  677    // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 657  678  
 658  679    if( framesize >= 128 ) {
 659  680      emit_opcode(cbuf, 0x81); // add  SP, #framesize
 660  681      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 661  682      emit_d32(cbuf, framesize);
 662  683    }
 663  684    else if( framesize ) {
 664  685      emit_opcode(cbuf, 0x83); // add  SP, #framesize
 665  686      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 666  687      emit_d8(cbuf, framesize);
 667  688    }
 668  689  
 669  690    emit_opcode(cbuf, 0x58 | EBP_enc);
 670  691  
 671  692    if( do_polling() && C->is_method_compilation() ) {
 672  693      cbuf.relocate(cbuf.code_end(), relocInfo::poll_return_type, 0);
 673  694      emit_opcode(cbuf,0x85);
 674  695      emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 675  696      emit_d32(cbuf, (intptr_t)os::get_polling_page());
 676  697    }
 677  698  }
 678  699  
 679  700  uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 680  701    Compile *C = ra_->C;
 681  702    // If method set FPU control word, restore to standard control word
 682  703    int size = C->in_24_bit_fp_mode() ? 6 : 0;
 683  704    if( do_polling() && C->is_method_compilation() ) size += 6;
 684  705  
 685  706    int framesize = C->frame_slots() << LogBytesPerInt;
 686  707    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687  708    // Remove two words for return addr and rbp,
 688  709    framesize -= 2*wordSize;
 689  710  
 690  711    size++; // popl rbp,
 691  712  
 692  713    if( framesize >= 128 ) {
 693  714      size += 6;
 694  715    } else {
 695  716      size += framesize ? 3 : 0;
 696  717    }
 697  718    return size;
 698  719  }
 699  720  
 700  721  int MachEpilogNode::reloc() const {
 701  722    return 0; // a large enough number
 702  723  }
 703  724  
 704  725  const Pipeline * MachEpilogNode::pipeline() const {
 705  726    return MachNode::pipeline_class();
 706  727  }
 707  728  
 708  729  int MachEpilogNode::safepoint_offset() const { return 0; }
 709  730  
 710  731  //=============================================================================
 711  732  
 712  733  enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 713  734  static enum RC rc_class( OptoReg::Name reg ) {
 714  735  
 715  736    if( !OptoReg::is_valid(reg)  ) return rc_bad;
 716  737    if (OptoReg::is_stack(reg)) return rc_stack;
 717  738  
 718  739    VMReg r = OptoReg::as_VMReg(reg);
 719  740    if (r->is_Register()) return rc_int;
 720  741    if (r->is_FloatRegister()) {
 721  742      assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 722  743      return rc_float;
 723  744    }
 724  745    assert(r->is_XMMRegister(), "must be");
 725  746    return rc_xmm;
 726  747  }
 727  748  
 728  749  static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 729  750                          int opcode, const char *op_str, int size, outputStream* st ) {
 730  751    if( cbuf ) {
 731  752      emit_opcode  (*cbuf, opcode );
 732  753      encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
 733  754  #ifndef PRODUCT
 734  755    } else if( !do_size ) {
 735  756      if( size != 0 ) st->print("\n\t");
 736  757      if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 737  758        if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 738  759        else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 739  760      } else { // FLD, FST, PUSH, POP
 740  761        st->print("%s [ESP + #%d]",op_str,offset);
 741  762      }
 742  763  #endif
 743  764    }
 744  765    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 745  766    return size+3+offset_size;
 746  767  }
 747  768  
 748  769  // Helper for XMM registers.  Extra opcode bits, limited syntax.
 749  770  static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 750  771                           int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 751  772    if( cbuf ) {
 752  773      if( reg_lo+1 == reg_hi ) { // double move?
 753  774        if( is_load && !UseXmmLoadAndClearUpper )
 754  775          emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
 755  776        else
 756  777          emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
 757  778      } else {
 758  779        emit_opcode(*cbuf, 0xF3 );
 759  780      }
 760  781      emit_opcode(*cbuf, 0x0F );
 761  782      if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
 762  783        emit_opcode(*cbuf, 0x12 );   // use 'movlpd' for load
 763  784      else
 764  785        emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
 765  786      encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
 766  787  #ifndef PRODUCT
 767  788    } else if( !do_size ) {
 768  789      if( size != 0 ) st->print("\n\t");
 769  790      if( reg_lo+1 == reg_hi ) { // double move?
 770  791        if( is_load ) st->print("%s %s,[ESP + #%d]",
 771  792                                 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 772  793                                 Matcher::regName[reg_lo], offset);
 773  794        else          st->print("MOVSD  [ESP + #%d],%s",
 774  795                                 offset, Matcher::regName[reg_lo]);
 775  796      } else {
 776  797        if( is_load ) st->print("MOVSS  %s,[ESP + #%d]",
 777  798                                 Matcher::regName[reg_lo], offset);
 778  799        else          st->print("MOVSS  [ESP + #%d],%s",
 779  800                                 offset, Matcher::regName[reg_lo]);
 780  801      }
 781  802  #endif
 782  803    }
 783  804    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 784  805    return size+5+offset_size;
 785  806  }
 786  807  
 787  808  
 788  809  static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 789  810                              int src_hi, int dst_hi, int size, outputStream* st ) {
 790  811    if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
 791  812      if( cbuf ) {
 792  813        if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
 793  814          emit_opcode(*cbuf, 0x66 );
 794  815        }
 795  816        emit_opcode(*cbuf, 0x0F );
 796  817        emit_opcode(*cbuf, 0x28 );
 797  818        emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 798  819  #ifndef PRODUCT
 799  820      } else if( !do_size ) {
 800  821        if( size != 0 ) st->print("\n\t");
 801  822        if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 802  823          st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 803  824        } else {
 804  825          st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 805  826        }
 806  827  #endif
 807  828      }
 808  829      return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
 809  830    } else {
 810  831      if( cbuf ) {
 811  832        emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
 812  833        emit_opcode(*cbuf, 0x0F );
 813  834        emit_opcode(*cbuf, 0x10 );
 814  835        emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 815  836  #ifndef PRODUCT
 816  837      } else if( !do_size ) {
 817  838        if( size != 0 ) st->print("\n\t");
 818  839        if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 819  840          st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 820  841        } else {
 821  842          st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 822  843        }
 823  844  #endif
 824  845      }
 825  846      return size+4;
 826  847    }
 827  848  }
 828  849  
 829  850  static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 830  851    if( cbuf ) {
 831  852      emit_opcode(*cbuf, 0x8B );
 832  853      emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 833  854  #ifndef PRODUCT
 834  855    } else if( !do_size ) {
 835  856      if( size != 0 ) st->print("\n\t");
 836  857      st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 837  858  #endif
 838  859    }
 839  860    return size+2;
 840  861  }
 841  862  
 842  863  static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 843  864                                   int offset, int size, outputStream* st ) {
 844  865    if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 845  866      if( cbuf ) {
 846  867        emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 847  868        emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 848  869  #ifndef PRODUCT
 849  870      } else if( !do_size ) {
 850  871        if( size != 0 ) st->print("\n\t");
 851  872        st->print("FLD    %s",Matcher::regName[src_lo]);
 852  873  #endif
 853  874      }
 854  875      size += 2;
 855  876    }
 856  877  
 857  878    int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 858  879    const char *op_str;
 859  880    int op;
 860  881    if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 861  882      op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 862  883      op = 0xDD;
 863  884    } else {                   // 32-bit store
 864  885      op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 865  886      op = 0xD9;
 866  887      assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 867  888    }
 868  889  
 869  890    return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 870  891  }
 871  892  
 872  893  uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 873  894    // Get registers to move
 874  895    OptoReg::Name src_second = ra_->get_reg_second(in(1));
 875  896    OptoReg::Name src_first = ra_->get_reg_first(in(1));
 876  897    OptoReg::Name dst_second = ra_->get_reg_second(this );
 877  898    OptoReg::Name dst_first = ra_->get_reg_first(this );
 878  899  
 879  900    enum RC src_second_rc = rc_class(src_second);
 880  901    enum RC src_first_rc = rc_class(src_first);
 881  902    enum RC dst_second_rc = rc_class(dst_second);
 882  903    enum RC dst_first_rc = rc_class(dst_first);
 883  904  
 884  905    assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 885  906  
 886  907    // Generate spill code!
 887  908    int size = 0;
 888  909  
 889  910    if( src_first == dst_first && src_second == dst_second )
 890  911      return size;            // Self copy, no move
 891  912  
 892  913    // --------------------------------------
 893  914    // Check for mem-mem move.  push/pop to move.
 894  915    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 895  916      if( src_second == dst_first ) { // overlapping stack copy ranges
 896  917        assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 897  918        size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 898  919        size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 899  920        src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 900  921      }
 901  922      // move low bits
 902  923      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 903  924      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 904  925      if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 905  926        size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 906  927        size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 907  928      }
 908  929      return size;
 909  930    }
 910  931  
 911  932    // --------------------------------------
 912  933    // Check for integer reg-reg copy
 913  934    if( src_first_rc == rc_int && dst_first_rc == rc_int )
 914  935      size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 915  936  
 916  937    // Check for integer store
 917  938    if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 918  939      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 919  940  
 920  941    // Check for integer load
 921  942    if( dst_first_rc == rc_int && src_first_rc == rc_stack )
 922  943      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 923  944  
 924  945    // --------------------------------------
 925  946    // Check for float reg-reg copy
 926  947    if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 927  948      assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 928  949              (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 929  950      if( cbuf ) {
 930  951  
 931  952        // Note the mucking with the register encode to compensate for the 0/1
 932  953        // indexing issue mentioned in a comment in the reg_def sections
 933  954        // for FPR registers many lines above here.
 934  955  
 935  956        if( src_first != FPR1L_num ) {
 936  957          emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 937  958          emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 938  959          emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 939  960          emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 940  961       } else {
 941  962          emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 942  963          emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 943  964       }
 944  965  #ifndef PRODUCT
 945  966      } else if( !do_size ) {
 946  967        if( size != 0 ) st->print("\n\t");
 947  968        if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 948  969        else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 949  970  #endif
 950  971      }
 951  972      return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 952  973    }
 953  974  
 954  975    // Check for float store
 955  976    if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 956  977      return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 957  978    }
 958  979  
 959  980    // Check for float load
 960  981    if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 961  982      int offset = ra_->reg2offset(src_first);
 962  983      const char *op_str;
 963  984      int op;
 964  985      if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 965  986        op_str = "FLD_D";
 966  987        op = 0xDD;
 967  988      } else {                   // 32-bit load
 968  989        op_str = "FLD_S";
 969  990        op = 0xD9;
 970  991        assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 971  992      }
 972  993      if( cbuf ) {
 973  994        emit_opcode  (*cbuf, op );
 974  995        encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
 975  996        emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 976  997        emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 977  998  #ifndef PRODUCT
 978  999      } else if( !do_size ) {
 979 1000        if( size != 0 ) st->print("\n\t");
 980 1001        st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 981 1002  #endif
 982 1003      }
 983 1004      int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 984 1005      return size + 3+offset_size+2;
 985 1006    }
 986 1007  
 987 1008    // Check for xmm reg-reg copy
 988 1009    if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 989 1010      assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 990 1011              (src_first+1 == src_second && dst_first+1 == dst_second),
 991 1012              "no non-adjacent float-moves" );
 992 1013      return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 993 1014    }
 994 1015  
 995 1016    // Check for xmm store
 996 1017    if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 997 1018      return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
 998 1019    }
 999 1020  
1000 1021    // Check for float xmm load
1001 1022    if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1002 1023      return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1003 1024    }
1004 1025  
1005 1026    // Copy from float reg to xmm reg
1006 1027    if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1007 1028      // copy to the top of stack from floating point reg
1008 1029      // and use LEA to preserve flags
1009 1030      if( cbuf ) {
1010 1031        emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1011 1032        emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1012 1033        emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1013 1034        emit_d8(*cbuf,0xF8);
1014 1035  #ifndef PRODUCT
1015 1036      } else if( !do_size ) {
1016 1037        if( size != 0 ) st->print("\n\t");
1017 1038        st->print("LEA    ESP,[ESP-8]");
1018 1039  #endif
1019 1040      }
1020 1041      size += 4;
1021 1042  
1022 1043      size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1023 1044  
1024 1045      // Copy from the temp memory to the xmm reg.
1025 1046      size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1026 1047  
1027 1048      if( cbuf ) {
1028 1049        emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1029 1050        emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1030 1051        emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1031 1052        emit_d8(*cbuf,0x08);
1032 1053  #ifndef PRODUCT
1033 1054      } else if( !do_size ) {
1034 1055        if( size != 0 ) st->print("\n\t");
1035 1056        st->print("LEA    ESP,[ESP+8]");
1036 1057  #endif
1037 1058      }
1038 1059      size += 4;
1039 1060      return size;
1040 1061    }
1041 1062  
1042 1063    assert( size > 0, "missed a case" );
1043 1064  
1044 1065    // --------------------------------------------------------------------
1045 1066    // Check for second bits still needing moving.
1046 1067    if( src_second == dst_second )
1047 1068      return size;               // Self copy; no move
1048 1069    assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1049 1070  
1050 1071    // Check for second word int-int move
1051 1072    if( src_second_rc == rc_int && dst_second_rc == rc_int )
1052 1073      return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1053 1074  
1054 1075    // Check for second word integer store
1055 1076    if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1056 1077      return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1057 1078  
1058 1079    // Check for second word integer load
1059 1080    if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1060 1081      return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1061 1082  
1062 1083  
1063 1084    Unimplemented();
1064 1085  }
1065 1086  
1066 1087  #ifndef PRODUCT
1067 1088  void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1068 1089    implementation( NULL, ra_, false, st );
1069 1090  }
1070 1091  #endif
1071 1092  
1072 1093  void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1073 1094    implementation( &cbuf, ra_, false, NULL );
1074 1095  }
1075 1096  
1076 1097  uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1077 1098    return implementation( NULL, ra_, true, NULL );
1078 1099  }
1079 1100  
1080 1101  //=============================================================================
1081 1102  #ifndef PRODUCT
1082 1103  void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
1083 1104    st->print("NOP \t# %d bytes pad for loops and calls", _count);
1084 1105  }
1085 1106  #endif
1086 1107  
1087 1108  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
1088 1109    MacroAssembler _masm(&cbuf);
1089 1110    __ nop(_count);
1090 1111  }
1091 1112  
1092 1113  uint MachNopNode::size(PhaseRegAlloc *) const {
1093 1114    return _count;
1094 1115  }
1095 1116  
1096 1117  
1097 1118  //=============================================================================
1098 1119  #ifndef PRODUCT
1099 1120  void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1100 1121    int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1101 1122    int reg = ra_->get_reg_first(this);
1102 1123    st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1103 1124  }
1104 1125  #endif
1105 1126  
1106 1127  void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1107 1128    int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1108 1129    int reg = ra_->get_encode(this);
1109 1130    if( offset >= 128 ) {
1110 1131      emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1111 1132      emit_rm(cbuf, 0x2, reg, 0x04);
1112 1133      emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1113 1134      emit_d32(cbuf, offset);
1114 1135    }
1115 1136    else {
1116 1137      emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1117 1138      emit_rm(cbuf, 0x1, reg, 0x04);
1118 1139      emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1119 1140      emit_d8(cbuf, offset);
1120 1141    }
1121 1142  }
1122 1143  
1123 1144  uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1124 1145    int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1125 1146    if( offset >= 128 ) {
1126 1147      return 7;
1127 1148    }
1128 1149    else {
1129 1150      return 4;
1130 1151    }
1131 1152  }
1132 1153  
1133 1154  //=============================================================================
1134 1155  
1135 1156  // emit call stub, compiled java to interpreter
1136 1157  void emit_java_to_interp(CodeBuffer &cbuf ) {
1137 1158    // Stub is fixed up when the corresponding call is converted from calling
1138 1159    // compiled code to calling interpreted code.
1139 1160    // mov rbx,0
1140 1161    // jmp -1
1141 1162  
1142 1163    address mark = cbuf.inst_mark();  // get mark within main instrs section
1143 1164  
1144 1165    // Note that the code buffer's inst_mark is always relative to insts.
1145 1166    // That's why we must use the macroassembler to generate a stub.
1146 1167    MacroAssembler _masm(&cbuf);
1147 1168  
1148 1169    address base =
1149 1170    __ start_a_stub(Compile::MAX_stubs_size);
1150 1171    if (base == NULL)  return;  // CodeBuffer::expand failed
1151 1172    // static stub relocation stores the instruction address of the call
1152 1173    __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1153 1174    // static stub relocation also tags the methodOop in the code-stream.
1154 1175    __ movoop(rbx, (jobject)NULL);  // method is zapped till fixup time
1155 1176    // This is recognized as unresolved by relocs/nativeInst/ic code
1156 1177    __ jump(RuntimeAddress(__ pc()));
1157 1178  
1158 1179    __ end_a_stub();
1159 1180    // Update current stubs pointer and restore code_end.
1160 1181  }
1161 1182  // size of call stub, compiled java to interpretor
1162 1183  uint size_java_to_interp() {
1163 1184    return 10;  // movl; jmp
1164 1185  }
1165 1186  // relocation entries for call stub, compiled java to interpretor
1166 1187  uint reloc_java_to_interp() {
1167 1188    return 4;  // 3 in emit_java_to_interp + 1 in Java_Static_Call
1168 1189  }
1169 1190  
1170 1191  //=============================================================================
1171 1192  #ifndef PRODUCT
1172 1193  void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1173 1194    st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1174 1195    st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1175 1196    st->print_cr("\tNOP");
1176 1197    st->print_cr("\tNOP");
1177 1198    if( !OptoBreakpoint )
1178 1199      st->print_cr("\tNOP");
1179 1200  }
1180 1201  #endif
1181 1202  
1182 1203  void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1183 1204    MacroAssembler masm(&cbuf);
1184 1205  #ifdef ASSERT
1185 1206    uint code_size = cbuf.code_size();
1186 1207  #endif
1187 1208    masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1188 1209    masm.jump_cc(Assembler::notEqual,
1189 1210                 RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1190 1211    /* WARNING these NOPs are critical so that verified entry point is properly
1191 1212       aligned for patching by NativeJump::patch_verified_entry() */
1192 1213    int nops_cnt = 2;
1193 1214    if( !OptoBreakpoint ) // Leave space for int3
1194 1215       nops_cnt += 1;
1195 1216    masm.nop(nops_cnt);
1196 1217  
1197 1218    assert(cbuf.code_size() - code_size == size(ra_), "checking code size of inline cache node");
1198 1219  }
1199 1220  
1200 1221  uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1201 1222    return OptoBreakpoint ? 11 : 12;
1202 1223  }
1203 1224  
1204 1225  
1205 1226  //=============================================================================
1206 1227  uint size_exception_handler() {
1207 1228    // NativeCall instruction size is the same as NativeJump.
1208 1229    // exception handler starts out as jump and can be patched to
1209 1230    // a call be deoptimization.  (4932387)
1210 1231    // Note that this value is also credited (in output.cpp) to
1211 1232    // the size of the code section.
1212 1233    return NativeJump::instruction_size;
1213 1234  }
1214 1235  
1215 1236  // Emit exception handler code.  Stuff framesize into a register
1216 1237  // and call a VM stub routine.
1217 1238  int emit_exception_handler(CodeBuffer& cbuf) {
1218 1239  
1219 1240    // Note that the code buffer's inst_mark is always relative to insts.
1220 1241    // That's why we must use the macroassembler to generate a handler.
1221 1242    MacroAssembler _masm(&cbuf);
1222 1243    address base =
1223 1244    __ start_a_stub(size_exception_handler());
1224 1245    if (base == NULL)  return 0;  // CodeBuffer::expand failed
1225 1246    int offset = __ offset();
1226 1247    __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1227 1248    assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1228 1249    __ end_a_stub();
1229 1250    return offset;
1230 1251  }
1231 1252  
1232 1253  uint size_deopt_handler() {
1233 1254    // NativeCall instruction size is the same as NativeJump.
1234 1255    // exception handler starts out as jump and can be patched to
1235 1256    // a call be deoptimization.  (4932387)
1236 1257    // Note that this value is also credited (in output.cpp) to
1237 1258    // the size of the code section.
1238 1259    return 5 + NativeJump::instruction_size; // pushl(); jmp;
1239 1260  }
1240 1261  
1241 1262  // Emit deopt handler code.
1242 1263  int emit_deopt_handler(CodeBuffer& cbuf) {
1243 1264  
1244 1265    // Note that the code buffer's inst_mark is always relative to insts.
1245 1266    // That's why we must use the macroassembler to generate a handler.
1246 1267    MacroAssembler _masm(&cbuf);
1247 1268    address base =
1248 1269    __ start_a_stub(size_exception_handler());
1249 1270    if (base == NULL)  return 0;  // CodeBuffer::expand failed
1250 1271    int offset = __ offset();
1251 1272    InternalAddress here(__ pc());
1252 1273    __ pushptr(here.addr());
1253 1274  
1254 1275    __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1255 1276    assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1256 1277    __ end_a_stub();
1257 1278    return offset;
1258 1279  }
1259 1280  
1260 1281  
1261 1282  static void emit_double_constant(CodeBuffer& cbuf, double x) {
1262 1283    int mark = cbuf.insts()->mark_off();
1263 1284    MacroAssembler _masm(&cbuf);
1264 1285    address double_address = __ double_constant(x);
1265 1286    cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1266 1287    emit_d32_reloc(cbuf,
1267 1288                   (int)double_address,
1268 1289                   internal_word_Relocation::spec(double_address),
1269 1290                   RELOC_DISP32);
1270 1291  }
1271 1292  
1272 1293  static void emit_float_constant(CodeBuffer& cbuf, float x) {
1273 1294    int mark = cbuf.insts()->mark_off();
1274 1295    MacroAssembler _masm(&cbuf);
1275 1296    address float_address = __ float_constant(x);
1276 1297    cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1277 1298    emit_d32_reloc(cbuf,
1278 1299                   (int)float_address,
1279 1300                   internal_word_Relocation::spec(float_address),
1280 1301                   RELOC_DISP32);
1281 1302  }
1282 1303  
1283 1304  
1284 1305  const bool Matcher::match_rule_supported(int opcode) {
1285 1306    if (!has_match_rule(opcode))
1286 1307      return false;
1287 1308  
1288 1309    return true;  // Per default match rules are supported.
1289 1310  }
1290 1311  
1291 1312  int Matcher::regnum_to_fpu_offset(int regnum) {
1292 1313    return regnum - 32; // The FP registers are in the second chunk
1293 1314  }
1294 1315  
1295 1316  bool is_positive_zero_float(jfloat f) {
1296 1317    return jint_cast(f) == jint_cast(0.0F);
1297 1318  }
1298 1319  
1299 1320  bool is_positive_one_float(jfloat f) {
1300 1321    return jint_cast(f) == jint_cast(1.0F);
1301 1322  }
1302 1323  
1303 1324  bool is_positive_zero_double(jdouble d) {
1304 1325    return jlong_cast(d) == jlong_cast(0.0);
1305 1326  }
1306 1327  
1307 1328  bool is_positive_one_double(jdouble d) {
1308 1329    return jlong_cast(d) == jlong_cast(1.0);
1309 1330  }
1310 1331  
1311 1332  // This is UltraSparc specific, true just means we have fast l2f conversion
1312 1333  const bool Matcher::convL2FSupported(void) {
1313 1334    return true;
1314 1335  }
1315 1336  
1316 1337  // Vector width in bytes
1317 1338  const uint Matcher::vector_width_in_bytes(void) {
1318 1339    return UseSSE >= 2 ? 8 : 0;
1319 1340  }
1320 1341  
1321 1342  // Vector ideal reg
1322 1343  const uint Matcher::vector_ideal_reg(void) {
1323 1344    return Op_RegD;
1324 1345  }
1325 1346  
1326 1347  // Is this branch offset short enough that a short branch can be used?
1327 1348  //
1328 1349  // NOTE: If the platform does not provide any short branch variants, then
1329 1350  //       this method should return false for offset 0.
1330 1351  bool Matcher::is_short_branch_offset(int rule, int offset) {
1331 1352    // the short version of jmpConUCF2 contains multiple branches,
1332 1353    // making the reach slightly less
1333 1354    if (rule == jmpConUCF2_rule)
1334 1355      return (-126 <= offset && offset <= 125);
1335 1356    return (-128 <= offset && offset <= 127);
1336 1357  }
1337 1358  
1338 1359  const bool Matcher::isSimpleConstant64(jlong value) {
1339 1360    // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1340 1361    return false;
1341 1362  }
1342 1363  
1343 1364  // The ecx parameter to rep stos for the ClearArray node is in dwords.
1344 1365  const bool Matcher::init_array_count_is_in_bytes = false;
1345 1366  
1346 1367  // Threshold size for cleararray.
1347 1368  const int Matcher::init_array_short_size = 8 * BytesPerLong;
1348 1369  
1349 1370  // Should the Matcher clone shifts on addressing modes, expecting them to
1350 1371  // be subsumed into complex addressing expressions or compute them into
1351 1372  // registers?  True for Intel but false for most RISCs
1352 1373  const bool Matcher::clone_shift_expressions = true;
1353 1374  
1354 1375  // Is it better to copy float constants, or load them directly from memory?
1355 1376  // Intel can load a float constant from a direct address, requiring no
1356 1377  // extra registers.  Most RISCs will have to materialize an address into a
1357 1378  // register first, so they would do better to copy the constant from stack.
1358 1379  const bool Matcher::rematerialize_float_constants = true;
1359 1380  
1360 1381  // If CPU can load and store mis-aligned doubles directly then no fixup is
1361 1382  // needed.  Else we split the double into 2 integer pieces and move it
1362 1383  // piece-by-piece.  Only happens when passing doubles into C code as the
1363 1384  // Java calling convention forces doubles to be aligned.
1364 1385  const bool Matcher::misaligned_doubles_ok = true;
1365 1386  
1366 1387  
1367 1388  void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1368 1389    // Get the memory operand from the node
1369 1390    uint numopnds = node->num_opnds();        // Virtual call for number of operands
1370 1391    uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1371 1392    assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1372 1393    uint opcnt     = 1;                 // First operand
1373 1394    uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1374 1395    while( idx >= skipped+num_edges ) {
1375 1396      skipped += num_edges;
1376 1397      opcnt++;                          // Bump operand count
1377 1398      assert( opcnt < numopnds, "Accessing non-existent operand" );
1378 1399      num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1379 1400    }
1380 1401  
1381 1402    MachOper *memory = node->_opnds[opcnt];
1382 1403    MachOper *new_memory = NULL;
1383 1404    switch (memory->opcode()) {
1384 1405    case DIRECT:
1385 1406    case INDOFFSET32X:
1386 1407      // No transformation necessary.
1387 1408      return;
1388 1409    case INDIRECT:
1389 1410      new_memory = new (C) indirect_win95_safeOper( );
1390 1411      break;
1391 1412    case INDOFFSET8:
1392 1413      new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1393 1414      break;
1394 1415    case INDOFFSET32:
1395 1416      new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1396 1417      break;
1397 1418    case INDINDEXOFFSET:
1398 1419      new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1399 1420      break;
1400 1421    case INDINDEXSCALE:
1401 1422      new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1402 1423      break;
1403 1424    case INDINDEXSCALEOFFSET:
1404 1425      new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1405 1426      break;
1406 1427    case LOAD_LONG_INDIRECT:
1407 1428    case LOAD_LONG_INDOFFSET32:
1408 1429      // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1409 1430      return;
1410 1431    default:
1411 1432      assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1412 1433      return;
1413 1434    }
1414 1435    node->_opnds[opcnt] = new_memory;
1415 1436  }
1416 1437  
1417 1438  // Advertise here if the CPU requires explicit rounding operations
1418 1439  // to implement the UseStrictFP mode.
1419 1440  const bool Matcher::strict_fp_requires_explicit_rounding = true;
1420 1441  
1421 1442  // Do floats take an entire double register or just half?
1422 1443  const bool Matcher::float_in_double = true;
1423 1444  // Do ints take an entire long register or just half?
1424 1445  const bool Matcher::int_in_long = false;
1425 1446  
1426 1447  // Return whether or not this register is ever used as an argument.  This
1427 1448  // function is used on startup to build the trampoline stubs in generateOptoStub.
1428 1449  // Registers not mentioned will be killed by the VM call in the trampoline, and
1429 1450  // arguments in those registers not be available to the callee.
1430 1451  bool Matcher::can_be_java_arg( int reg ) {
1431 1452    if(  reg == ECX_num   || reg == EDX_num   ) return true;
1432 1453    if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1433 1454    if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1434 1455    return false;
1435 1456  }
1436 1457  
1437 1458  bool Matcher::is_spillable_arg( int reg ) {
1438 1459    return can_be_java_arg(reg);
1439 1460  }
1440 1461  
1441 1462  // Register for DIVI projection of divmodI
1442 1463  RegMask Matcher::divI_proj_mask() {
1443 1464    return EAX_REG_mask;
1444 1465  }
1445 1466  
1446 1467  // Register for MODI projection of divmodI
1447 1468  RegMask Matcher::modI_proj_mask() {
1448 1469    return EDX_REG_mask;
1449 1470  }
1450 1471  
1451 1472  // Register for DIVL projection of divmodL
1452 1473  RegMask Matcher::divL_proj_mask() {

↓ open down ↓

1130 lines elided

↑ open up ↑

1453 1474    ShouldNotReachHere();
1454 1475    return RegMask();
1455 1476  }
1456 1477  
1457 1478  // Register for MODL projection of divmodL
1458 1479  RegMask Matcher::modL_proj_mask() {
1459 1480    ShouldNotReachHere();
1460 1481    return RegMask();
1461 1482  }
1462 1483  
     1484 +const RegMask Matcher::method_handle_invoke_SP_save_mask() {
     1485 +  return EBP_REG_mask;
     1486 +}
     1487 +
1463 1488  %}
1464 1489  
1465 1490  //----------ENCODING BLOCK-----------------------------------------------------
1466 1491  // This block specifies the encoding classes used by the compiler to output
1467 1492  // byte streams.  Encoding classes generate functions which are called by
1468 1493  // Machine Instruction Nodes in order to generate the bit encoding of the
1469 1494  // instruction.  Operands specify their base encoding interface with the
1470 1495  // interface keyword.  There are currently supported four interfaces,
1471 1496  // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1472 1497  // operand to generate a function which returns its register number when

1473 1498  // queried.   CONST_INTER causes an operand to generate a function which
1474 1499  // returns the value of the constant when queried.  MEMORY_INTER causes an
1475 1500  // operand to generate four functions which return the Base Register, the
1476 1501  // Index Register, the Scale Value, and the Offset Value of the operand when
1477 1502  // queried.  COND_INTER causes an operand to generate six functions which
1478 1503  // return the encoding code (ie - encoding bits for the instruction)
1479 1504  // associated with each basic boolean condition for a conditional instruction.
1480 1505  // Instructions specify two basic values for encoding.  They use the
1481 1506  // ins_encode keyword to specify their encoding class (which must be one of
1482 1507  // the class names specified in the encoding block), and they use the
1483 1508  // opcode keyword to specify, in order, their primary, secondary, and
1484 1509  // tertiary opcode.  Only the opcode sections which a particular instruction
1485 1510  // needs for encoding need to be specified.
1486 1511  encode %{
1487 1512    // Build emit functions for each basic byte or larger field in the intel
1488 1513    // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1489 1514    // code in the enc_class source block.  Emit functions will live in the
1490 1515    // main source block for now.  In future, we can generalize this by
1491 1516    // adding a syntax that specifies the sizes of fields in an order,
1492 1517    // so that the adlc can build the emit functions automagically
1493 1518  
1494 1519    // Emit primary opcode
1495 1520    enc_class OpcP %{
1496 1521      emit_opcode(cbuf, $primary);
1497 1522    %}
1498 1523  
1499 1524    // Emit secondary opcode
1500 1525    enc_class OpcS %{
1501 1526      emit_opcode(cbuf, $secondary);
1502 1527    %}
1503 1528  
1504 1529    // Emit opcode directly
1505 1530    enc_class Opcode(immI d8) %{
1506 1531      emit_opcode(cbuf, $d8$$constant);
1507 1532    %}
1508 1533  
1509 1534    enc_class SizePrefix %{
1510 1535      emit_opcode(cbuf,0x66);
1511 1536    %}
1512 1537  
1513 1538    enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
1514 1539      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1515 1540    %}
1516 1541  
1517 1542    enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{    // OpcRegReg(Many)
1518 1543      emit_opcode(cbuf,$opcode$$constant);
1519 1544      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1520 1545    %}
1521 1546  
1522 1547    enc_class mov_r32_imm0( eRegI dst ) %{
1523 1548      emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1524 1549      emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1525 1550    %}
1526 1551  
1527 1552    enc_class cdq_enc %{
1528 1553      // Full implementation of Java idiv and irem; checks for
1529 1554      // special case as described in JVM spec., p.243 & p.271.
1530 1555      //
1531 1556      //         normal case                           special case
1532 1557      //
1533 1558      // input : rax,: dividend                         min_int
1534 1559      //         reg: divisor                          -1
1535 1560      //
1536 1561      // output: rax,: quotient  (= rax, idiv reg)       min_int
1537 1562      //         rdx: remainder (= rax, irem reg)       0
1538 1563      //
1539 1564      //  Code sequnce:
1540 1565      //
1541 1566      //  81 F8 00 00 00 80    cmp         rax,80000000h
1542 1567      //  0F 85 0B 00 00 00    jne         normal_case
1543 1568      //  33 D2                xor         rdx,edx
1544 1569      //  83 F9 FF             cmp         rcx,0FFh
1545 1570      //  0F 84 03 00 00 00    je          done
1546 1571      //                  normal_case:
1547 1572      //  99                   cdq
1548 1573      //  F7 F9                idiv        rax,ecx
1549 1574      //                  done:
1550 1575      //
1551 1576      emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1552 1577      emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1553 1578      emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1554 1579      emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1555 1580      emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1556 1581      emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1557 1582      emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1558 1583      emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1559 1584      emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1560 1585      emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1561 1586      emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1562 1587      // normal_case:
1563 1588      emit_opcode(cbuf,0x99);                                         // cdq
1564 1589      // idiv (note: must be emitted by the user of this rule)
1565 1590      // normal:
1566 1591    %}
1567 1592  
1568 1593    // Dense encoding for older common ops
1569 1594    enc_class Opc_plus(immI opcode, eRegI reg) %{
1570 1595      emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1571 1596    %}
1572 1597  
1573 1598  
1574 1599    // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1575 1600    enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1576 1601      // Check for 8-bit immediate, and set sign extend bit in opcode
1577 1602      if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1578 1603        emit_opcode(cbuf, $primary | 0x02);
1579 1604      }
1580 1605      else {                          // If 32-bit immediate
1581 1606        emit_opcode(cbuf, $primary);
1582 1607      }
1583 1608    %}
1584 1609  
1585 1610    enc_class OpcSErm (eRegI dst, immI imm) %{    // OpcSEr/m
1586 1611      // Emit primary opcode and set sign-extend bit
1587 1612      // Check for 8-bit immediate, and set sign extend bit in opcode
1588 1613      if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1589 1614        emit_opcode(cbuf, $primary | 0x02);    }
1590 1615      else {                          // If 32-bit immediate
1591 1616        emit_opcode(cbuf, $primary);
1592 1617      }
1593 1618      // Emit r/m byte with secondary opcode, after primary opcode.
1594 1619      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1595 1620    %}
1596 1621  
1597 1622    enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1598 1623      // Check for 8-bit immediate, and set sign extend bit in opcode
1599 1624      if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1600 1625        $$$emit8$imm$$constant;
1601 1626      }
1602 1627      else {                          // If 32-bit immediate
1603 1628        // Output immediate
1604 1629        $$$emit32$imm$$constant;
1605 1630      }
1606 1631    %}
1607 1632  
1608 1633    enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1609 1634      // Emit primary opcode and set sign-extend bit
1610 1635      // Check for 8-bit immediate, and set sign extend bit in opcode
1611 1636      int con = (int)$imm$$constant; // Throw away top bits
1612 1637      emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1613 1638      // Emit r/m byte with secondary opcode, after primary opcode.
1614 1639      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1615 1640      if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1616 1641      else                               emit_d32(cbuf,con);
1617 1642    %}
1618 1643  
1619 1644    enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1620 1645      // Emit primary opcode and set sign-extend bit
1621 1646      // Check for 8-bit immediate, and set sign extend bit in opcode
1622 1647      int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1623 1648      emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1624 1649      // Emit r/m byte with tertiary opcode, after primary opcode.
1625 1650      emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1626 1651      if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1627 1652      else                               emit_d32(cbuf,con);
1628 1653    %}
1629 1654  
1630 1655    enc_class Lbl (label labl) %{ // JMP, CALL
1631 1656      Label *l = $labl$$label;
1632 1657      emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0);
1633 1658    %}
1634 1659  
1635 1660    enc_class LblShort (label labl) %{ // JMP, CALL
1636 1661      Label *l = $labl$$label;
1637 1662      int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0;
1638 1663      assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1639 1664      emit_d8(cbuf, disp);
1640 1665    %}
1641 1666  
1642 1667    enc_class OpcSReg (eRegI dst) %{    // BSWAP
1643 1668      emit_cc(cbuf, $secondary, $dst$$reg );
1644 1669    %}
1645 1670  
1646 1671    enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1647 1672      int destlo = $dst$$reg;
1648 1673      int desthi = HIGH_FROM_LOW(destlo);
1649 1674      // bswap lo
1650 1675      emit_opcode(cbuf, 0x0F);
1651 1676      emit_cc(cbuf, 0xC8, destlo);
1652 1677      // bswap hi
1653 1678      emit_opcode(cbuf, 0x0F);
1654 1679      emit_cc(cbuf, 0xC8, desthi);
1655 1680      // xchg lo and hi
1656 1681      emit_opcode(cbuf, 0x87);
1657 1682      emit_rm(cbuf, 0x3, destlo, desthi);
1658 1683    %}
1659 1684  
1660 1685    enc_class RegOpc (eRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1661 1686      emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1662 1687    %}
1663 1688  
1664 1689    enc_class Jcc (cmpOp cop, label labl) %{    // JCC
1665 1690      Label *l = $labl$$label;
1666 1691      $$$emit8$primary;
1667 1692      emit_cc(cbuf, $secondary, $cop$$cmpcode);
1668 1693      emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0);
1669 1694    %}
1670 1695  
1671 1696    enc_class JccShort (cmpOp cop, label labl) %{    // JCC
1672 1697      Label *l = $labl$$label;
1673 1698      emit_cc(cbuf, $primary, $cop$$cmpcode);
1674 1699      int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0;
1675 1700      assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1676 1701      emit_d8(cbuf, disp);
1677 1702    %}
1678 1703  
1679 1704    enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1680 1705      $$$emit8$primary;
1681 1706      emit_cc(cbuf, $secondary, $cop$$cmpcode);
1682 1707    %}
1683 1708  
1684 1709    enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
1685 1710      int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1686 1711      emit_d8(cbuf, op >> 8 );
1687 1712      emit_d8(cbuf, op & 255);
1688 1713    %}
1689 1714  
1690 1715    // emulate a CMOV with a conditional branch around a MOV
1691 1716    enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1692 1717      // Invert sense of branch from sense of CMOV
1693 1718      emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1694 1719      emit_d8( cbuf, $brOffs$$constant );
1695 1720    %}
1696 1721  
1697 1722    enc_class enc_PartialSubtypeCheck( ) %{
1698 1723      Register Redi = as_Register(EDI_enc); // result register
1699 1724      Register Reax = as_Register(EAX_enc); // super class
1700 1725      Register Recx = as_Register(ECX_enc); // killed
1701 1726      Register Resi = as_Register(ESI_enc); // sub class
1702 1727      Label miss;
1703 1728  
1704 1729      MacroAssembler _masm(&cbuf);
1705 1730      __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1706 1731                                       NULL, &miss,
1707 1732                                       /*set_cond_codes:*/ true);
1708 1733      if ($primary) {
1709 1734        __ xorptr(Redi, Redi);
1710 1735      }
1711 1736      __ bind(miss);
1712 1737    %}
1713 1738  
1714 1739    enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1715 1740      MacroAssembler masm(&cbuf);
1716 1741      int start = masm.offset();
1717 1742      if (UseSSE >= 2) {
1718 1743        if (VerifyFPU) {
1719 1744          masm.verify_FPU(0, "must be empty in SSE2+ mode");
1720 1745        }
1721 1746      } else {
1722 1747        // External c_calling_convention expects the FPU stack to be 'clean'.
1723 1748        // Compiled code leaves it dirty.  Do cleanup now.
1724 1749        masm.empty_FPU_stack();
1725 1750      }
1726 1751      if (sizeof_FFree_Float_Stack_All == -1) {
1727 1752        sizeof_FFree_Float_Stack_All = masm.offset() - start;
1728 1753      } else {
1729 1754        assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1730 1755      }
1731 1756    %}
1732 1757  
1733 1758    enc_class Verify_FPU_For_Leaf %{
1734 1759      if( VerifyFPU ) {
1735 1760        MacroAssembler masm(&cbuf);
1736 1761        masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1737 1762      }
1738 1763    %}
1739 1764  
1740 1765    enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1741 1766      // This is the instruction starting address for relocation info.
1742 1767      cbuf.set_inst_mark();
1743 1768      $$$emit8$primary;
1744 1769      // CALL directly to the runtime
1745 1770      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1746 1771                  runtime_call_Relocation::spec(), RELOC_IMM32 );
1747 1772  
1748 1773      if (UseSSE >= 2) {
1749 1774        MacroAssembler _masm(&cbuf);
1750 1775        BasicType rt = tf()->return_type();
1751 1776  
1752 1777        if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1753 1778          // A C runtime call where the return value is unused.  In SSE2+
1754 1779          // mode the result needs to be removed from the FPU stack.  It's
1755 1780          // likely that this function call could be removed by the
1756 1781          // optimizer if the C function is a pure function.
1757 1782          __ ffree(0);
1758 1783        } else if (rt == T_FLOAT) {
1759 1784          __ lea(rsp, Address(rsp, -4));
1760 1785          __ fstp_s(Address(rsp, 0));
1761 1786          __ movflt(xmm0, Address(rsp, 0));
1762 1787          __ lea(rsp, Address(rsp,  4));
1763 1788        } else if (rt == T_DOUBLE) {
1764 1789          __ lea(rsp, Address(rsp, -8));

↓ open down ↓

292 lines elided

↑ open up ↑

1765 1790          __ fstp_d(Address(rsp, 0));
1766 1791          __ movdbl(xmm0, Address(rsp, 0));
1767 1792          __ lea(rsp, Address(rsp,  8));
1768 1793        }
1769 1794      }
1770 1795    %}
1771 1796  
1772 1797  
1773 1798    enc_class pre_call_FPU %{
1774 1799      // If method sets FPU control word restore it here
     1800 +    debug_only(int off0 = cbuf.code_size());
1775 1801      if( Compile::current()->in_24_bit_fp_mode() ) {
1776 1802        MacroAssembler masm(&cbuf);
1777 1803        masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1778 1804      }
     1805 +    debug_only(int off1 = cbuf.code_size());
     1806 +    assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
1779 1807    %}
1780 1808  
1781 1809    enc_class post_call_FPU %{
1782 1810      // If method sets FPU control word do it here also
1783 1811      if( Compile::current()->in_24_bit_fp_mode() ) {
1784 1812        MacroAssembler masm(&cbuf);
1785 1813        masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1786 1814      }
1787 1815    %}
1788 1816  
     1817 +  enc_class preserve_SP %{
     1818 +    debug_only(int off0 = cbuf.code_size());
     1819 +    MacroAssembler _masm(&cbuf);
     1820 +    // RBP is preserved across all calls, even compiled calls.
     1821 +    // Use it to preserve RSP in places where the callee might change the SP.
     1822 +    __ movptr(rbp, rsp);
     1823 +    debug_only(int off1 = cbuf.code_size());
     1824 +    assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
     1825 +  %}
     1826 +
     1827 +  enc_class restore_SP %{
     1828 +    MacroAssembler _masm(&cbuf);
     1829 +    __ movptr(rsp, rbp);
     1830 +  %}
     1831 +
1789 1832    enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1790 1833      // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1791 1834      // who we intended to call.
1792 1835      cbuf.set_inst_mark();
1793 1836      $$$emit8$primary;
1794 1837      if ( !_method ) {
1795 1838        emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1796 1839                       runtime_call_Relocation::spec(), RELOC_IMM32 );
1797 1840      } else if(_optimized_virtual) {
1798 1841        emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),

1799 1842                       opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1800 1843      } else {
1801 1844        emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1802 1845                       static_call_Relocation::spec(), RELOC_IMM32 );
1803 1846      }
1804 1847      if( _method ) {  // Emit stub for static call
1805 1848        emit_java_to_interp(cbuf);
1806 1849      }
1807 1850    %}
1808 1851  
1809 1852    enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1810 1853      // !!!!!
1811 1854      // Generate  "Mov EAX,0x00", placeholder instruction to load oop-info
1812 1855      // emit_call_dynamic_prologue( cbuf );
1813 1856      cbuf.set_inst_mark();
1814 1857      emit_opcode(cbuf, 0xB8 + EAX_enc);        // mov    EAX,-1
1815 1858      emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1816 1859      address  virtual_call_oop_addr = cbuf.inst_mark();
1817 1860      // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1818 1861      // who we intended to call.
1819 1862      cbuf.set_inst_mark();
1820 1863      $$$emit8$primary;
1821 1864      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1822 1865                  virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1823 1866    %}
1824 1867  
1825 1868    enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1826 1869      int disp = in_bytes(methodOopDesc::from_compiled_offset());
1827 1870      assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1828 1871  
1829 1872      // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1830 1873      cbuf.set_inst_mark();
1831 1874      $$$emit8$primary;
1832 1875      emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1833 1876      emit_d8(cbuf, disp);             // Displacement
1834 1877  
1835 1878    %}
1836 1879  
1837 1880    enc_class Xor_Reg (eRegI dst) %{
1838 1881      emit_opcode(cbuf, 0x33);
1839 1882      emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1840 1883    %}
1841 1884  
1842 1885  //   Following encoding is no longer used, but may be restored if calling
1843 1886  //   convention changes significantly.
1844 1887  //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1845 1888  //
1846 1889  //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1847 1890  //     // int ic_reg     = Matcher::inline_cache_reg();
1848 1891  //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1849 1892  //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1850 1893  //     // int imo_encode = Matcher::_regEncode[imo_reg];
1851 1894  //
1852 1895  //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1853 1896  //     // // so we load it immediately before the call
1854 1897  //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1855 1898  //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1856 1899  //
1857 1900  //     // xor rbp,ebp
1858 1901  //     emit_opcode(cbuf, 0x33);
1859 1902  //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1860 1903  //
1861 1904  //     // CALL to interpreter.
1862 1905  //     cbuf.set_inst_mark();
1863 1906  //     $$$emit8$primary;
1864 1907  //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.code_end()) - 4),
1865 1908  //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1866 1909  //   %}
1867 1910  
1868 1911    enc_class RegOpcImm (eRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1869 1912      $$$emit8$primary;
1870 1913      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1871 1914      $$$emit8$shift$$constant;
1872 1915    %}
1873 1916  
1874 1917    enc_class LdImmI (eRegI dst, immI src) %{    // Load Immediate
1875 1918      // Load immediate does not have a zero or sign extended version
1876 1919      // for 8-bit immediates
1877 1920      emit_opcode(cbuf, 0xB8 + $dst$$reg);
1878 1921      $$$emit32$src$$constant;
1879 1922    %}
1880 1923  
1881 1924    enc_class LdImmP (eRegI dst, immI src) %{    // Load Immediate
1882 1925      // Load immediate does not have a zero or sign extended version
1883 1926      // for 8-bit immediates
1884 1927      emit_opcode(cbuf, $primary + $dst$$reg);
1885 1928      $$$emit32$src$$constant;
1886 1929    %}
1887 1930  
1888 1931    enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1889 1932      // Load immediate does not have a zero or sign extended version
1890 1933      // for 8-bit immediates
1891 1934      int dst_enc = $dst$$reg;
1892 1935      int src_con = $src$$constant & 0x0FFFFFFFFL;
1893 1936      if (src_con == 0) {
1894 1937        // xor dst, dst
1895 1938        emit_opcode(cbuf, 0x33);
1896 1939        emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1897 1940      } else {
1898 1941        emit_opcode(cbuf, $primary + dst_enc);
1899 1942        emit_d32(cbuf, src_con);
1900 1943      }
1901 1944    %}
1902 1945  
1903 1946    enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1904 1947      // Load immediate does not have a zero or sign extended version
1905 1948      // for 8-bit immediates
1906 1949      int dst_enc = $dst$$reg + 2;
1907 1950      int src_con = ((julong)($src$$constant)) >> 32;
1908 1951      if (src_con == 0) {
1909 1952        // xor dst, dst
1910 1953        emit_opcode(cbuf, 0x33);
1911 1954        emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1912 1955      } else {
1913 1956        emit_opcode(cbuf, $primary + dst_enc);
1914 1957        emit_d32(cbuf, src_con);
1915 1958      }
1916 1959    %}
1917 1960  
1918 1961  
1919 1962    enc_class LdImmD (immD src) %{    // Load Immediate
1920 1963      if( is_positive_zero_double($src$$constant)) {
1921 1964        // FLDZ
1922 1965        emit_opcode(cbuf,0xD9);
1923 1966        emit_opcode(cbuf,0xEE);
1924 1967      } else if( is_positive_one_double($src$$constant)) {
1925 1968        // FLD1
1926 1969        emit_opcode(cbuf,0xD9);
1927 1970        emit_opcode(cbuf,0xE8);
1928 1971      } else {
1929 1972        emit_opcode(cbuf,0xDD);
1930 1973        emit_rm(cbuf, 0x0, 0x0, 0x5);
1931 1974        emit_double_constant(cbuf, $src$$constant);
1932 1975      }
1933 1976    %}
1934 1977  
1935 1978  
1936 1979    enc_class LdImmF (immF src) %{    // Load Immediate
1937 1980      if( is_positive_zero_float($src$$constant)) {
1938 1981        emit_opcode(cbuf,0xD9);
1939 1982        emit_opcode(cbuf,0xEE);
1940 1983      } else if( is_positive_one_float($src$$constant)) {
1941 1984        emit_opcode(cbuf,0xD9);
1942 1985        emit_opcode(cbuf,0xE8);
1943 1986      } else {
1944 1987        $$$emit8$primary;
1945 1988        // Load immediate does not have a zero or sign extended version
1946 1989        // for 8-bit immediates
1947 1990        // First load to TOS, then move to dst
1948 1991        emit_rm(cbuf, 0x0, 0x0, 0x5);
1949 1992        emit_float_constant(cbuf, $src$$constant);
1950 1993      }
1951 1994    %}
1952 1995  
1953 1996    enc_class LdImmX (regX dst, immXF con) %{    // Load Immediate
1954 1997      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
1955 1998      emit_float_constant(cbuf, $con$$constant);
1956 1999    %}
1957 2000  
1958 2001    enc_class LdImmXD (regXD dst, immXD con) %{    // Load Immediate
1959 2002      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
1960 2003      emit_double_constant(cbuf, $con$$constant);
1961 2004    %}
1962 2005  
1963 2006    enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant
1964 2007      // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
1965 2008      emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1966 2009      emit_opcode(cbuf, 0x0F);
1967 2010      emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1968 2011      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
1969 2012      emit_double_constant(cbuf, $con$$constant);
1970 2013    %}
1971 2014  
1972 2015    enc_class Opc_MemImm_F(immF src) %{
1973 2016      cbuf.set_inst_mark();
1974 2017      $$$emit8$primary;
1975 2018      emit_rm(cbuf, 0x0, $secondary, 0x5);
1976 2019      emit_float_constant(cbuf, $src$$constant);
1977 2020    %}
1978 2021  
1979 2022  
1980 2023    enc_class MovI2X_reg(regX dst, eRegI src) %{
1981 2024      emit_opcode(cbuf, 0x66 );     // MOVD dst,src
1982 2025      emit_opcode(cbuf, 0x0F );
1983 2026      emit_opcode(cbuf, 0x6E );
1984 2027      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1985 2028    %}
1986 2029  
1987 2030    enc_class MovX2I_reg(eRegI dst, regX src) %{
1988 2031      emit_opcode(cbuf, 0x66 );     // MOVD dst,src
1989 2032      emit_opcode(cbuf, 0x0F );
1990 2033      emit_opcode(cbuf, 0x7E );
1991 2034      emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
1992 2035    %}
1993 2036  
1994 2037    enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
1995 2038      { // MOVD $dst,$src.lo
1996 2039        emit_opcode(cbuf,0x66);
1997 2040        emit_opcode(cbuf,0x0F);
1998 2041        emit_opcode(cbuf,0x6E);
1999 2042        emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2000 2043      }
2001 2044      { // MOVD $tmp,$src.hi
2002 2045        emit_opcode(cbuf,0x66);
2003 2046        emit_opcode(cbuf,0x0F);
2004 2047        emit_opcode(cbuf,0x6E);
2005 2048        emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2006 2049      }
2007 2050      { // PUNPCKLDQ $dst,$tmp
2008 2051        emit_opcode(cbuf,0x66);
2009 2052        emit_opcode(cbuf,0x0F);
2010 2053        emit_opcode(cbuf,0x62);
2011 2054        emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2012 2055       }
2013 2056    %}
2014 2057  
2015 2058    enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2016 2059      { // MOVD $dst.lo,$src
2017 2060        emit_opcode(cbuf,0x66);
2018 2061        emit_opcode(cbuf,0x0F);
2019 2062        emit_opcode(cbuf,0x7E);
2020 2063        emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2021 2064      }
2022 2065      { // PSHUFLW $tmp,$src,0x4E  (01001110b)
2023 2066        emit_opcode(cbuf,0xF2);
2024 2067        emit_opcode(cbuf,0x0F);
2025 2068        emit_opcode(cbuf,0x70);
2026 2069        emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2027 2070        emit_d8(cbuf, 0x4E);
2028 2071      }
2029 2072      { // MOVD $dst.hi,$tmp
2030 2073        emit_opcode(cbuf,0x66);
2031 2074        emit_opcode(cbuf,0x0F);
2032 2075        emit_opcode(cbuf,0x7E);
2033 2076        emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2034 2077      }
2035 2078    %}
2036 2079  
2037 2080  
2038 2081    // Encode a reg-reg copy.  If it is useless, then empty encoding.
2039 2082    enc_class enc_Copy( eRegI dst, eRegI src ) %{
2040 2083      encode_Copy( cbuf, $dst$$reg, $src$$reg );
2041 2084    %}
2042 2085  
2043 2086    enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2044 2087      encode_Copy( cbuf, $dst$$reg, $src$$reg );
2045 2088    %}
2046 2089  
2047 2090    // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2048 2091    enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2049 2092      encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2050 2093    %}
2051 2094  
2052 2095    enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
2053 2096      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2054 2097    %}
2055 2098  
2056 2099    enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2057 2100      $$$emit8$primary;
2058 2101      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2059 2102    %}
2060 2103  
2061 2104    enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2062 2105      $$$emit8$secondary;
2063 2106      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2064 2107    %}
2065 2108  
2066 2109    enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2067 2110      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2068 2111    %}
2069 2112  
2070 2113    enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2071 2114      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2072 2115    %}
2073 2116  
2074 2117    enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2075 2118      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2076 2119    %}
2077 2120  
2078 2121    enc_class Con32 (immI src) %{    // Con32(storeImmI)
2079 2122      // Output immediate
2080 2123      $$$emit32$src$$constant;
2081 2124    %}
2082 2125  
2083 2126    enc_class Con32F_as_bits(immF src) %{        // storeF_imm
2084 2127      // Output Float immediate bits
2085 2128      jfloat jf = $src$$constant;
2086 2129      int    jf_as_bits = jint_cast( jf );
2087 2130      emit_d32(cbuf, jf_as_bits);
2088 2131    %}
2089 2132  
2090 2133    enc_class Con32XF_as_bits(immXF src) %{      // storeX_imm
2091 2134      // Output Float immediate bits
2092 2135      jfloat jf = $src$$constant;
2093 2136      int    jf_as_bits = jint_cast( jf );
2094 2137      emit_d32(cbuf, jf_as_bits);
2095 2138    %}
2096 2139  
2097 2140    enc_class Con16 (immI src) %{    // Con16(storeImmI)
2098 2141      // Output immediate
2099 2142      $$$emit16$src$$constant;
2100 2143    %}
2101 2144  
2102 2145    enc_class Con_d32(immI src) %{
2103 2146      emit_d32(cbuf,$src$$constant);
2104 2147    %}
2105 2148  
2106 2149    enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2107 2150      // Output immediate memory reference
2108 2151      emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2109 2152      emit_d32(cbuf, 0x00);
2110 2153    %}
2111 2154  
2112 2155    enc_class lock_prefix( ) %{
2113 2156      if( os::is_MP() )
2114 2157        emit_opcode(cbuf,0xF0);         // [Lock]
2115 2158    %}
2116 2159  
2117 2160    // Cmp-xchg long value.
2118 2161    // Note: we need to swap rbx, and rcx before and after the
2119 2162    //       cmpxchg8 instruction because the instruction uses
2120 2163    //       rcx as the high order word of the new value to store but
2121 2164    //       our register encoding uses rbx,.
2122 2165    enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2123 2166  
2124 2167      // XCHG  rbx,ecx
2125 2168      emit_opcode(cbuf,0x87);
2126 2169      emit_opcode(cbuf,0xD9);
2127 2170      // [Lock]
2128 2171      if( os::is_MP() )
2129 2172        emit_opcode(cbuf,0xF0);
2130 2173      // CMPXCHG8 [Eptr]
2131 2174      emit_opcode(cbuf,0x0F);
2132 2175      emit_opcode(cbuf,0xC7);
2133 2176      emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2134 2177      // XCHG  rbx,ecx
2135 2178      emit_opcode(cbuf,0x87);
2136 2179      emit_opcode(cbuf,0xD9);
2137 2180    %}
2138 2181  
2139 2182    enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2140 2183      // [Lock]
2141 2184      if( os::is_MP() )
2142 2185        emit_opcode(cbuf,0xF0);
2143 2186  
2144 2187      // CMPXCHG [Eptr]
2145 2188      emit_opcode(cbuf,0x0F);
2146 2189      emit_opcode(cbuf,0xB1);
2147 2190      emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2148 2191    %}
2149 2192  
2150 2193    enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2151 2194      int res_encoding = $res$$reg;
2152 2195  
2153 2196      // MOV  res,0
2154 2197      emit_opcode( cbuf, 0xB8 + res_encoding);
2155 2198      emit_d32( cbuf, 0 );
2156 2199      // JNE,s  fail
2157 2200      emit_opcode(cbuf,0x75);
2158 2201      emit_d8(cbuf, 5 );
2159 2202      // MOV  res,1
2160 2203      emit_opcode( cbuf, 0xB8 + res_encoding);
2161 2204      emit_d32( cbuf, 1 );
2162 2205      // fail:
2163 2206    %}
2164 2207  
2165 2208    enc_class set_instruction_start( ) %{
2166 2209      cbuf.set_inst_mark();            // Mark start of opcode for reloc info in mem operand
2167 2210    %}
2168 2211  
2169 2212    enc_class RegMem (eRegI ereg, memory mem) %{    // emit_reg_mem
2170 2213      int reg_encoding = $ereg$$reg;
2171 2214      int base  = $mem$$base;
2172 2215      int index = $mem$$index;
2173 2216      int scale = $mem$$scale;
2174 2217      int displace = $mem$$disp;
2175 2218      bool disp_is_oop = $mem->disp_is_oop();
2176 2219      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2177 2220    %}
2178 2221  
2179 2222    enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2180 2223      int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2181 2224      int base  = $mem$$base;
2182 2225      int index = $mem$$index;
2183 2226      int scale = $mem$$scale;
2184 2227      int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2185 2228      assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2186 2229      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2187 2230    %}
2188 2231  
2189 2232    enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2190 2233      int r1, r2;
2191 2234      if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2192 2235      else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2193 2236      emit_opcode(cbuf,0x0F);
2194 2237      emit_opcode(cbuf,$tertiary);
2195 2238      emit_rm(cbuf, 0x3, r1, r2);
2196 2239      emit_d8(cbuf,$cnt$$constant);
2197 2240      emit_d8(cbuf,$primary);
2198 2241      emit_rm(cbuf, 0x3, $secondary, r1);
2199 2242      emit_d8(cbuf,$cnt$$constant);
2200 2243    %}
2201 2244  
2202 2245    enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2203 2246      emit_opcode( cbuf, 0x8B ); // Move
2204 2247      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2205 2248      emit_d8(cbuf,$primary);
2206 2249      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2207 2250      emit_d8(cbuf,$cnt$$constant-32);
2208 2251      emit_d8(cbuf,$primary);
2209 2252      emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2210 2253      emit_d8(cbuf,31);
2211 2254    %}
2212 2255  
2213 2256    enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2214 2257      int r1, r2;
2215 2258      if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2216 2259      else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2217 2260  
2218 2261      emit_opcode( cbuf, 0x8B ); // Move r1,r2
2219 2262      emit_rm(cbuf, 0x3, r1, r2);
2220 2263      if( $cnt$$constant > 32 ) { // Shift, if not by zero
2221 2264        emit_opcode(cbuf,$primary);
2222 2265        emit_rm(cbuf, 0x3, $secondary, r1);
2223 2266        emit_d8(cbuf,$cnt$$constant-32);
2224 2267      }
2225 2268      emit_opcode(cbuf,0x33);  // XOR r2,r2
2226 2269      emit_rm(cbuf, 0x3, r2, r2);
2227 2270    %}
2228 2271  
2229 2272    // Clone of RegMem but accepts an extra parameter to access each
2230 2273    // half of a double in memory; it never needs relocation info.
2231 2274    enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2232 2275      emit_opcode(cbuf,$opcode$$constant);
2233 2276      int reg_encoding = $rm_reg$$reg;
2234 2277      int base     = $mem$$base;
2235 2278      int index    = $mem$$index;
2236 2279      int scale    = $mem$$scale;
2237 2280      int displace = $mem$$disp + $disp_for_half$$constant;
2238 2281      bool disp_is_oop = false;
2239 2282      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2240 2283    %}
2241 2284  
2242 2285    // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2243 2286    //
2244 2287    // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2245 2288    // and it never needs relocation information.
2246 2289    // Frequently used to move data between FPU's Stack Top and memory.
2247 2290    enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2248 2291      int rm_byte_opcode = $rm_opcode$$constant;
2249 2292      int base     = $mem$$base;
2250 2293      int index    = $mem$$index;
2251 2294      int scale    = $mem$$scale;
2252 2295      int displace = $mem$$disp;
2253 2296      assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2254 2297      encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2255 2298    %}
2256 2299  
2257 2300    enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2258 2301      int rm_byte_opcode = $rm_opcode$$constant;
2259 2302      int base     = $mem$$base;
2260 2303      int index    = $mem$$index;
2261 2304      int scale    = $mem$$scale;
2262 2305      int displace = $mem$$disp;
2263 2306      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2264 2307      encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2265 2308    %}
2266 2309  
2267 2310    enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{    // emit_reg_lea
2268 2311      int reg_encoding = $dst$$reg;
2269 2312      int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2270 2313      int index        = 0x04;            // 0x04 indicates no index
2271 2314      int scale        = 0x00;            // 0x00 indicates no scale
2272 2315      int displace     = $src1$$constant; // 0x00 indicates no displacement
2273 2316      bool disp_is_oop = false;
2274 2317      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2275 2318    %}
2276 2319  
2277 2320    enc_class min_enc (eRegI dst, eRegI src) %{    // MIN
2278 2321      // Compare dst,src
2279 2322      emit_opcode(cbuf,0x3B);
2280 2323      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2281 2324      // jmp dst < src around move
2282 2325      emit_opcode(cbuf,0x7C);
2283 2326      emit_d8(cbuf,2);
2284 2327      // move dst,src
2285 2328      emit_opcode(cbuf,0x8B);
2286 2329      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2287 2330    %}
2288 2331  
2289 2332    enc_class max_enc (eRegI dst, eRegI src) %{    // MAX
2290 2333      // Compare dst,src
2291 2334      emit_opcode(cbuf,0x3B);
2292 2335      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2293 2336      // jmp dst > src around move
2294 2337      emit_opcode(cbuf,0x7F);
2295 2338      emit_d8(cbuf,2);
2296 2339      // move dst,src
2297 2340      emit_opcode(cbuf,0x8B);
2298 2341      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2299 2342    %}
2300 2343  
2301 2344    enc_class enc_FP_store(memory mem, regD src) %{
2302 2345      // If src is FPR1, we can just FST to store it.
2303 2346      // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2304 2347      int reg_encoding = 0x2; // Just store
2305 2348      int base  = $mem$$base;
2306 2349      int index = $mem$$index;
2307 2350      int scale = $mem$$scale;
2308 2351      int displace = $mem$$disp;
2309 2352      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2310 2353      if( $src$$reg != FPR1L_enc ) {
2311 2354        reg_encoding = 0x3;  // Store & pop
2312 2355        emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2313 2356        emit_d8( cbuf, 0xC0-1+$src$$reg );
2314 2357      }
2315 2358      cbuf.set_inst_mark();       // Mark start of opcode for reloc info in mem operand
2316 2359      emit_opcode(cbuf,$primary);
2317 2360      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2318 2361    %}
2319 2362  
2320 2363    enc_class neg_reg(eRegI dst) %{
2321 2364      // NEG $dst
2322 2365      emit_opcode(cbuf,0xF7);
2323 2366      emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2324 2367    %}
2325 2368  
2326 2369    enc_class setLT_reg(eCXRegI dst) %{
2327 2370      // SETLT $dst
2328 2371      emit_opcode(cbuf,0x0F);
2329 2372      emit_opcode(cbuf,0x9C);
2330 2373      emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2331 2374    %}
2332 2375  
2333 2376    enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2334 2377      int tmpReg = $tmp$$reg;
2335 2378  
2336 2379      // SUB $p,$q
2337 2380      emit_opcode(cbuf,0x2B);
2338 2381      emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2339 2382      // SBB $tmp,$tmp
2340 2383      emit_opcode(cbuf,0x1B);
2341 2384      emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2342 2385      // AND $tmp,$y
2343 2386      emit_opcode(cbuf,0x23);
2344 2387      emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2345 2388      // ADD $p,$tmp
2346 2389      emit_opcode(cbuf,0x03);
2347 2390      emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2348 2391    %}
2349 2392  
2350 2393    enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
2351 2394      int tmpReg = $tmp$$reg;
2352 2395  
2353 2396      // SUB $p,$q
2354 2397      emit_opcode(cbuf,0x2B);
2355 2398      emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2356 2399      // SBB $tmp,$tmp
2357 2400      emit_opcode(cbuf,0x1B);
2358 2401      emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2359 2402      // AND $tmp,$y
2360 2403      cbuf.set_inst_mark();       // Mark start of opcode for reloc info in mem operand
2361 2404      emit_opcode(cbuf,0x23);
2362 2405      int reg_encoding = tmpReg;
2363 2406      int base  = $mem$$base;
2364 2407      int index = $mem$$index;
2365 2408      int scale = $mem$$scale;
2366 2409      int displace = $mem$$disp;
2367 2410      bool disp_is_oop = $mem->disp_is_oop();
2368 2411      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2369 2412      // ADD $p,$tmp
2370 2413      emit_opcode(cbuf,0x03);
2371 2414      emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2372 2415    %}
2373 2416  
2374 2417    enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2375 2418      // TEST shift,32
2376 2419      emit_opcode(cbuf,0xF7);
2377 2420      emit_rm(cbuf, 0x3, 0, ECX_enc);
2378 2421      emit_d32(cbuf,0x20);
2379 2422      // JEQ,s small
2380 2423      emit_opcode(cbuf, 0x74);
2381 2424      emit_d8(cbuf, 0x04);
2382 2425      // MOV    $dst.hi,$dst.lo
2383 2426      emit_opcode( cbuf, 0x8B );
2384 2427      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2385 2428      // CLR    $dst.lo
2386 2429      emit_opcode(cbuf, 0x33);
2387 2430      emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2388 2431  // small:
2389 2432      // SHLD   $dst.hi,$dst.lo,$shift
2390 2433      emit_opcode(cbuf,0x0F);
2391 2434      emit_opcode(cbuf,0xA5);
2392 2435      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2393 2436      // SHL    $dst.lo,$shift"
2394 2437      emit_opcode(cbuf,0xD3);
2395 2438      emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2396 2439    %}
2397 2440  
2398 2441    enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2399 2442      // TEST shift,32
2400 2443      emit_opcode(cbuf,0xF7);
2401 2444      emit_rm(cbuf, 0x3, 0, ECX_enc);
2402 2445      emit_d32(cbuf,0x20);
2403 2446      // JEQ,s small
2404 2447      emit_opcode(cbuf, 0x74);
2405 2448      emit_d8(cbuf, 0x04);
2406 2449      // MOV    $dst.lo,$dst.hi
2407 2450      emit_opcode( cbuf, 0x8B );
2408 2451      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2409 2452      // CLR    $dst.hi
2410 2453      emit_opcode(cbuf, 0x33);
2411 2454      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2412 2455  // small:
2413 2456      // SHRD   $dst.lo,$dst.hi,$shift
2414 2457      emit_opcode(cbuf,0x0F);
2415 2458      emit_opcode(cbuf,0xAD);
2416 2459      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2417 2460      // SHR    $dst.hi,$shift"
2418 2461      emit_opcode(cbuf,0xD3);
2419 2462      emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2420 2463    %}
2421 2464  
2422 2465    enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2423 2466      // TEST shift,32
2424 2467      emit_opcode(cbuf,0xF7);
2425 2468      emit_rm(cbuf, 0x3, 0, ECX_enc);
2426 2469      emit_d32(cbuf,0x20);
2427 2470      // JEQ,s small
2428 2471      emit_opcode(cbuf, 0x74);
2429 2472      emit_d8(cbuf, 0x05);
2430 2473      // MOV    $dst.lo,$dst.hi
2431 2474      emit_opcode( cbuf, 0x8B );
2432 2475      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2433 2476      // SAR    $dst.hi,31
2434 2477      emit_opcode(cbuf, 0xC1);
2435 2478      emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2436 2479      emit_d8(cbuf, 0x1F );
2437 2480  // small:
2438 2481      // SHRD   $dst.lo,$dst.hi,$shift
2439 2482      emit_opcode(cbuf,0x0F);
2440 2483      emit_opcode(cbuf,0xAD);
2441 2484      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2442 2485      // SAR    $dst.hi,$shift"
2443 2486      emit_opcode(cbuf,0xD3);
2444 2487      emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2445 2488    %}
2446 2489  
2447 2490  
2448 2491    // ----------------- Encodings for floating point unit -----------------
2449 2492    // May leave result in FPU-TOS or FPU reg depending on opcodes
2450 2493    enc_class OpcReg_F (regF src) %{    // FMUL, FDIV
2451 2494      $$$emit8$primary;
2452 2495      emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2453 2496    %}
2454 2497  
2455 2498    // Pop argument in FPR0 with FSTP ST(0)
2456 2499    enc_class PopFPU() %{
2457 2500      emit_opcode( cbuf, 0xDD );
2458 2501      emit_d8( cbuf, 0xD8 );
2459 2502    %}
2460 2503  
2461 2504    // !!!!! equivalent to Pop_Reg_F
2462 2505    enc_class Pop_Reg_D( regD dst ) %{
2463 2506      emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2464 2507      emit_d8( cbuf, 0xD8+$dst$$reg );
2465 2508    %}
2466 2509  
2467 2510    enc_class Push_Reg_D( regD dst ) %{
2468 2511      emit_opcode( cbuf, 0xD9 );
2469 2512      emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2470 2513    %}
2471 2514  
2472 2515    enc_class strictfp_bias1( regD dst ) %{
2473 2516      emit_opcode( cbuf, 0xDB );           // FLD m80real
2474 2517      emit_opcode( cbuf, 0x2D );
2475 2518      emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2476 2519      emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2477 2520      emit_opcode( cbuf, 0xC8+$dst$$reg );
2478 2521    %}
2479 2522  
2480 2523    enc_class strictfp_bias2( regD dst ) %{
2481 2524      emit_opcode( cbuf, 0xDB );           // FLD m80real
2482 2525      emit_opcode( cbuf, 0x2D );
2483 2526      emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2484 2527      emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2485 2528      emit_opcode( cbuf, 0xC8+$dst$$reg );
2486 2529    %}
2487 2530  
2488 2531    // Special case for moving an integer register to a stack slot.
2489 2532    enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2490 2533      store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2491 2534    %}
2492 2535  
2493 2536    // Special case for moving a register to a stack slot.
2494 2537    enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2495 2538      // Opcode already emitted
2496 2539      emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2497 2540      emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2498 2541      emit_d32(cbuf, $dst$$disp);   // Displacement
2499 2542    %}
2500 2543  
2501 2544    // Push the integer in stackSlot 'src' onto FP-stack
2502 2545    enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2503 2546      store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2504 2547    %}
2505 2548  
2506 2549    // Push the float in stackSlot 'src' onto FP-stack
2507 2550    enc_class Push_Mem_F( memory src ) %{    // FLD_S   [ESP+src]
2508 2551      store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2509 2552    %}
2510 2553  
2511 2554    // Push the double in stackSlot 'src' onto FP-stack
2512 2555    enc_class Push_Mem_D( memory src ) %{    // FLD_D   [ESP+src]
2513 2556      store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2514 2557    %}
2515 2558  
2516 2559    // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2517 2560    enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2518 2561      store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2519 2562    %}
2520 2563  
2521 2564    // Same as Pop_Mem_F except for opcode
2522 2565    // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2523 2566    enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2524 2567      store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2525 2568    %}
2526 2569  
2527 2570    enc_class Pop_Reg_F( regF dst ) %{
2528 2571      emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2529 2572      emit_d8( cbuf, 0xD8+$dst$$reg );
2530 2573    %}
2531 2574  
2532 2575    enc_class Push_Reg_F( regF dst ) %{
2533 2576      emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2534 2577      emit_d8( cbuf, 0xC0-1+$dst$$reg );
2535 2578    %}
2536 2579  
2537 2580    // Push FPU's float to a stack-slot, and pop FPU-stack
2538 2581    enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
2539 2582      int pop = 0x02;
2540 2583      if ($src$$reg != FPR1L_enc) {
2541 2584        emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2542 2585        emit_d8( cbuf, 0xC0-1+$src$$reg );
2543 2586        pop = 0x03;
2544 2587      }
2545 2588      store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2546 2589    %}
2547 2590  
2548 2591    // Push FPU's double to a stack-slot, and pop FPU-stack
2549 2592    enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
2550 2593      int pop = 0x02;
2551 2594      if ($src$$reg != FPR1L_enc) {
2552 2595        emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2553 2596        emit_d8( cbuf, 0xC0-1+$src$$reg );
2554 2597        pop = 0x03;
2555 2598      }
2556 2599      store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2557 2600    %}
2558 2601  
2559 2602    // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2560 2603    enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
2561 2604      int pop = 0xD0 - 1; // -1 since we skip FLD
2562 2605      if ($src$$reg != FPR1L_enc) {
2563 2606        emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2564 2607        emit_d8( cbuf, 0xC0-1+$src$$reg );
2565 2608        pop = 0xD8;
2566 2609      }
2567 2610      emit_opcode( cbuf, 0xDD );
2568 2611      emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2569 2612    %}
2570 2613  
2571 2614  
2572 2615    enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
2573 2616      MacroAssembler masm(&cbuf);
2574 2617      masm.fld_s(  $src1$$reg-1);   // nothing at TOS, load TOS from src1.reg
2575 2618      masm.fmul(   $src2$$reg+0);   // value at TOS
2576 2619      masm.fadd(   $src$$reg+0);    // value at TOS
2577 2620      masm.fstp_d( $dst$$reg+0);    // value at TOS, popped off after store
2578 2621    %}
2579 2622  
2580 2623  
2581 2624    enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2582 2625      // load dst in FPR0
2583 2626      emit_opcode( cbuf, 0xD9 );
2584 2627      emit_d8( cbuf, 0xC0-1+$dst$$reg );
2585 2628      if ($src$$reg != FPR1L_enc) {
2586 2629        // fincstp
2587 2630        emit_opcode (cbuf, 0xD9);
2588 2631        emit_opcode (cbuf, 0xF7);
2589 2632        // swap src with FPR1:
2590 2633        // FXCH FPR1 with src
2591 2634        emit_opcode(cbuf, 0xD9);
2592 2635        emit_d8(cbuf, 0xC8-1+$src$$reg );
2593 2636        // fdecstp
2594 2637        emit_opcode (cbuf, 0xD9);
2595 2638        emit_opcode (cbuf, 0xF6);
2596 2639      }
2597 2640    %}
2598 2641  
2599 2642    enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2600 2643      // Allocate a word
2601 2644      emit_opcode(cbuf,0x83);            // SUB ESP,8
2602 2645      emit_opcode(cbuf,0xEC);
2603 2646      emit_d8(cbuf,0x08);
2604 2647  
2605 2648      emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src1
2606 2649      emit_opcode  (cbuf, 0x0F );
2607 2650      emit_opcode  (cbuf, 0x11 );
2608 2651      encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2609 2652  
2610 2653      emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2611 2654      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2612 2655  
2613 2656      emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src0
2614 2657      emit_opcode  (cbuf, 0x0F );
2615 2658      emit_opcode  (cbuf, 0x11 );
2616 2659      encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2617 2660  
2618 2661      emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2619 2662      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2620 2663  
2621 2664    %}
2622 2665  
2623 2666    enc_class Push_ModX_encoding( regX src0, regX src1) %{
2624 2667      // Allocate a word
2625 2668      emit_opcode(cbuf,0x83);            // SUB ESP,4
2626 2669      emit_opcode(cbuf,0xEC);
2627 2670      emit_d8(cbuf,0x04);
2628 2671  
2629 2672      emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src1
2630 2673      emit_opcode  (cbuf, 0x0F );
2631 2674      emit_opcode  (cbuf, 0x11 );
2632 2675      encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2633 2676  
2634 2677      emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2635 2678      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2636 2679  
2637 2680      emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src0
2638 2681      emit_opcode  (cbuf, 0x0F );
2639 2682      emit_opcode  (cbuf, 0x11 );
2640 2683      encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2641 2684  
2642 2685      emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2643 2686      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2644 2687  
2645 2688    %}
2646 2689  
2647 2690    enc_class Push_ResultXD(regXD dst) %{
2648 2691      store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2649 2692  
2650 2693      // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2651 2694      emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2652 2695      emit_opcode  (cbuf, 0x0F );
2653 2696      emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2654 2697      encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2655 2698  
2656 2699      emit_opcode(cbuf,0x83);    // ADD ESP,8
2657 2700      emit_opcode(cbuf,0xC4);
2658 2701      emit_d8(cbuf,0x08);
2659 2702    %}
2660 2703  
2661 2704    enc_class Push_ResultX(regX dst, immI d8) %{
2662 2705      store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2663 2706  
2664 2707      emit_opcode  (cbuf, 0xF3 );     // MOVSS dst(xmm), [ESP]
2665 2708      emit_opcode  (cbuf, 0x0F );
2666 2709      emit_opcode  (cbuf, 0x10 );
2667 2710      encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2668 2711  
2669 2712      emit_opcode(cbuf,0x83);    // ADD ESP,d8 (4 or 8)
2670 2713      emit_opcode(cbuf,0xC4);
2671 2714      emit_d8(cbuf,$d8$$constant);
2672 2715    %}
2673 2716  
2674 2717    enc_class Push_SrcXD(regXD src) %{
2675 2718      // Allocate a word
2676 2719      emit_opcode(cbuf,0x83);            // SUB ESP,8
2677 2720      emit_opcode(cbuf,0xEC);
2678 2721      emit_d8(cbuf,0x08);
2679 2722  
2680 2723      emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src
2681 2724      emit_opcode  (cbuf, 0x0F );
2682 2725      emit_opcode  (cbuf, 0x11 );
2683 2726      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2684 2727  
2685 2728      emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2686 2729      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2687 2730    %}
2688 2731  
2689 2732    enc_class push_stack_temp_qword() %{
2690 2733      emit_opcode(cbuf,0x83);     // SUB ESP,8
2691 2734      emit_opcode(cbuf,0xEC);
2692 2735      emit_d8    (cbuf,0x08);
2693 2736    %}
2694 2737  
2695 2738    enc_class pop_stack_temp_qword() %{
2696 2739      emit_opcode(cbuf,0x83);     // ADD ESP,8
2697 2740      emit_opcode(cbuf,0xC4);
2698 2741      emit_d8    (cbuf,0x08);
2699 2742    %}
2700 2743  
2701 2744    enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2702 2745      emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], xmm_src
2703 2746      emit_opcode  (cbuf, 0x0F );
2704 2747      emit_opcode  (cbuf, 0x11 );
2705 2748      encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2706 2749  
2707 2750      emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2708 2751      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2709 2752    %}
2710 2753  
2711 2754    // Compute X^Y using Intel's fast hardware instructions, if possible.
2712 2755    // Otherwise return a NaN.
2713 2756    enc_class pow_exp_core_encoding %{
2714 2757      // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
2715 2758      emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
2716 2759      emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
2717 2760      emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
2718 2761      emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
2719 2762      emit_opcode(cbuf,0x1C);
2720 2763      emit_d8(cbuf,0x24);
2721 2764      emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
2722 2765      emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
2723 2766      emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
2724 2767      emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
2725 2768      encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2726 2769      emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
2727 2770      emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2728 2771      emit_d32(cbuf,0xFFFFF800);
2729 2772      emit_opcode(cbuf,0x81);                          // add rax,1023 - the double exponent bias
2730 2773      emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2731 2774      emit_d32(cbuf,1023);
2732 2775      emit_opcode(cbuf,0x8B);                          // mov rbx,eax
2733 2776      emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2734 2777      emit_opcode(cbuf,0xC1);                          // shl rax,20 - Slide to exponent position
2735 2778      emit_rm(cbuf,0x3,0x4,EAX_enc);
2736 2779      emit_d8(cbuf,20);
2737 2780      emit_opcode(cbuf,0x85);                          // test rbx,ecx - check for overflow
2738 2781      emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2739 2782      emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45);  // CMOVne rax,ecx - overflow; stuff NAN into EAX
2740 2783      emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2741 2784      emit_opcode(cbuf,0x89);                          // mov [esp+4],eax - Store as part of double word
2742 2785      encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2743 2786      emit_opcode(cbuf,0xC7);                          // mov [esp+0],0   - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2744 2787      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2745 2788      emit_d32(cbuf,0);
2746 2789      emit_opcode(cbuf,0xDC);                          // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2747 2790      encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2748 2791    %}
2749 2792  
2750 2793  //   enc_class Pop_Reg_Mod_D( regD dst, regD src)
2751 2794  //   was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2752 2795  
2753 2796    enc_class Push_Result_Mod_D( regD src) %{
2754 2797      if ($src$$reg != FPR1L_enc) {
2755 2798        // fincstp
2756 2799        emit_opcode (cbuf, 0xD9);
2757 2800        emit_opcode (cbuf, 0xF7);
2758 2801        // FXCH FPR1 with src
2759 2802        emit_opcode(cbuf, 0xD9);
2760 2803        emit_d8(cbuf, 0xC8-1+$src$$reg );
2761 2804        // fdecstp
2762 2805        emit_opcode (cbuf, 0xD9);
2763 2806        emit_opcode (cbuf, 0xF6);
2764 2807      }
2765 2808      // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2766 2809      // // FSTP   FPR$dst$$reg
2767 2810      // emit_opcode( cbuf, 0xDD );
2768 2811      // emit_d8( cbuf, 0xD8+$dst$$reg );
2769 2812    %}
2770 2813  
2771 2814    enc_class fnstsw_sahf_skip_parity() %{
2772 2815      // fnstsw ax
2773 2816      emit_opcode( cbuf, 0xDF );
2774 2817      emit_opcode( cbuf, 0xE0 );
2775 2818      // sahf
2776 2819      emit_opcode( cbuf, 0x9E );
2777 2820      // jnp  ::skip
2778 2821      emit_opcode( cbuf, 0x7B );
2779 2822      emit_opcode( cbuf, 0x05 );
2780 2823    %}
2781 2824  
2782 2825    enc_class emitModD() %{
2783 2826      // fprem must be iterative
2784 2827      // :: loop
2785 2828      // fprem
2786 2829      emit_opcode( cbuf, 0xD9 );
2787 2830      emit_opcode( cbuf, 0xF8 );
2788 2831      // wait
2789 2832      emit_opcode( cbuf, 0x9b );
2790 2833      // fnstsw ax
2791 2834      emit_opcode( cbuf, 0xDF );
2792 2835      emit_opcode( cbuf, 0xE0 );
2793 2836      // sahf
2794 2837      emit_opcode( cbuf, 0x9E );
2795 2838      // jp  ::loop
2796 2839      emit_opcode( cbuf, 0x0F );
2797 2840      emit_opcode( cbuf, 0x8A );
2798 2841      emit_opcode( cbuf, 0xF4 );
2799 2842      emit_opcode( cbuf, 0xFF );
2800 2843      emit_opcode( cbuf, 0xFF );
2801 2844      emit_opcode( cbuf, 0xFF );
2802 2845    %}
2803 2846  
2804 2847    enc_class fpu_flags() %{
2805 2848      // fnstsw_ax
2806 2849      emit_opcode( cbuf, 0xDF);
2807 2850      emit_opcode( cbuf, 0xE0);
2808 2851      // test ax,0x0400
2809 2852      emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2810 2853      emit_opcode( cbuf, 0xA9 );
2811 2854      emit_d16   ( cbuf, 0x0400 );
2812 2855      // // // This sequence works, but stalls for 12-16 cycles on PPro
2813 2856      // // test rax,0x0400
2814 2857      // emit_opcode( cbuf, 0xA9 );
2815 2858      // emit_d32   ( cbuf, 0x00000400 );
2816 2859      //
2817 2860      // jz exit (no unordered comparison)
2818 2861      emit_opcode( cbuf, 0x74 );
2819 2862      emit_d8    ( cbuf, 0x02 );
2820 2863      // mov ah,1 - treat as LT case (set carry flag)
2821 2864      emit_opcode( cbuf, 0xB4 );
2822 2865      emit_d8    ( cbuf, 0x01 );
2823 2866      // sahf
2824 2867      emit_opcode( cbuf, 0x9E);
2825 2868    %}
2826 2869  
2827 2870    enc_class cmpF_P6_fixup() %{
2828 2871      // Fixup the integer flags in case comparison involved a NaN
2829 2872      //
2830 2873      // JNP exit (no unordered comparison, P-flag is set by NaN)
2831 2874      emit_opcode( cbuf, 0x7B );
2832 2875      emit_d8    ( cbuf, 0x03 );
2833 2876      // MOV AH,1 - treat as LT case (set carry flag)
2834 2877      emit_opcode( cbuf, 0xB4 );
2835 2878      emit_d8    ( cbuf, 0x01 );
2836 2879      // SAHF
2837 2880      emit_opcode( cbuf, 0x9E);
2838 2881      // NOP     // target for branch to avoid branch to branch
2839 2882      emit_opcode( cbuf, 0x90);
2840 2883    %}
2841 2884  
2842 2885  //     fnstsw_ax();
2843 2886  //     sahf();
2844 2887  //     movl(dst, nan_result);
2845 2888  //     jcc(Assembler::parity, exit);
2846 2889  //     movl(dst, less_result);
2847 2890  //     jcc(Assembler::below, exit);
2848 2891  //     movl(dst, equal_result);
2849 2892  //     jcc(Assembler::equal, exit);
2850 2893  //     movl(dst, greater_result);
2851 2894  
2852 2895  // less_result     =  1;
2853 2896  // greater_result  = -1;
2854 2897  // equal_result    = 0;
2855 2898  // nan_result      = -1;
2856 2899  
2857 2900    enc_class CmpF_Result(eRegI dst) %{
2858 2901      // fnstsw_ax();
2859 2902      emit_opcode( cbuf, 0xDF);
2860 2903      emit_opcode( cbuf, 0xE0);
2861 2904      // sahf
2862 2905      emit_opcode( cbuf, 0x9E);
2863 2906      // movl(dst, nan_result);
2864 2907      emit_opcode( cbuf, 0xB8 + $dst$$reg);
2865 2908      emit_d32( cbuf, -1 );
2866 2909      // jcc(Assembler::parity, exit);
2867 2910      emit_opcode( cbuf, 0x7A );
2868 2911      emit_d8    ( cbuf, 0x13 );
2869 2912      // movl(dst, less_result);
2870 2913      emit_opcode( cbuf, 0xB8 + $dst$$reg);
2871 2914      emit_d32( cbuf, -1 );
2872 2915      // jcc(Assembler::below, exit);
2873 2916      emit_opcode( cbuf, 0x72 );
2874 2917      emit_d8    ( cbuf, 0x0C );
2875 2918      // movl(dst, equal_result);
2876 2919      emit_opcode( cbuf, 0xB8 + $dst$$reg);
2877 2920      emit_d32( cbuf, 0 );
2878 2921      // jcc(Assembler::equal, exit);
2879 2922      emit_opcode( cbuf, 0x74 );
2880 2923      emit_d8    ( cbuf, 0x05 );
2881 2924      // movl(dst, greater_result);
2882 2925      emit_opcode( cbuf, 0xB8 + $dst$$reg);
2883 2926      emit_d32( cbuf, 1 );
2884 2927    %}
2885 2928  
2886 2929  
2887 2930    // XMM version of CmpF_Result. Because the XMM compare
2888 2931    // instructions set the EFLAGS directly. It becomes simpler than
2889 2932    // the float version above.
2890 2933    enc_class CmpX_Result(eRegI dst) %{
2891 2934      MacroAssembler _masm(&cbuf);
2892 2935      Label nan, inc, done;
2893 2936  
2894 2937      __ jccb(Assembler::parity, nan);
2895 2938      __ jccb(Assembler::equal,  done);
2896 2939      __ jccb(Assembler::above,  inc);
2897 2940      __ bind(nan);
2898 2941      __ decrement(as_Register($dst$$reg)); // NO L qqq
2899 2942      __ jmpb(done);
2900 2943      __ bind(inc);
2901 2944      __ increment(as_Register($dst$$reg)); // NO L qqq
2902 2945      __ bind(done);
2903 2946    %}
2904 2947  
2905 2948    // Compare the longs and set flags
2906 2949    // BROKEN!  Do Not use as-is
2907 2950    enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2908 2951      // CMP    $src1.hi,$src2.hi
2909 2952      emit_opcode( cbuf, 0x3B );
2910 2953      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2911 2954      // JNE,s  done
2912 2955      emit_opcode(cbuf,0x75);
2913 2956      emit_d8(cbuf, 2 );
2914 2957      // CMP    $src1.lo,$src2.lo
2915 2958      emit_opcode( cbuf, 0x3B );
2916 2959      emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2917 2960  // done:
2918 2961    %}
2919 2962  
2920 2963    enc_class convert_int_long( regL dst, eRegI src ) %{
2921 2964      // mov $dst.lo,$src
2922 2965      int dst_encoding = $dst$$reg;
2923 2966      int src_encoding = $src$$reg;
2924 2967      encode_Copy( cbuf, dst_encoding  , src_encoding );
2925 2968      // mov $dst.hi,$src
2926 2969      encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2927 2970      // sar $dst.hi,31
2928 2971      emit_opcode( cbuf, 0xC1 );
2929 2972      emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2930 2973      emit_d8(cbuf, 0x1F );
2931 2974    %}
2932 2975  
2933 2976    enc_class convert_long_double( eRegL src ) %{
2934 2977      // push $src.hi
2935 2978      emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2936 2979      // push $src.lo
2937 2980      emit_opcode(cbuf, 0x50+$src$$reg  );
2938 2981      // fild 64-bits at [SP]
2939 2982      emit_opcode(cbuf,0xdf);
2940 2983      emit_d8(cbuf, 0x6C);
2941 2984      emit_d8(cbuf, 0x24);
2942 2985      emit_d8(cbuf, 0x00);
2943 2986      // pop stack
2944 2987      emit_opcode(cbuf, 0x83); // add  SP, #8
2945 2988      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2946 2989      emit_d8(cbuf, 0x8);
2947 2990    %}
2948 2991  
2949 2992    enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2950 2993      // IMUL   EDX:EAX,$src1
2951 2994      emit_opcode( cbuf, 0xF7 );
2952 2995      emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2953 2996      // SAR    EDX,$cnt-32
2954 2997      int shift_count = ((int)$cnt$$constant) - 32;
2955 2998      if (shift_count > 0) {
2956 2999        emit_opcode(cbuf, 0xC1);
2957 3000        emit_rm(cbuf, 0x3, 7, $dst$$reg );
2958 3001        emit_d8(cbuf, shift_count);
2959 3002      }
2960 3003    %}
2961 3004  
2962 3005    // this version doesn't have add sp, 8
2963 3006    enc_class convert_long_double2( eRegL src ) %{
2964 3007      // push $src.hi
2965 3008      emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2966 3009      // push $src.lo
2967 3010      emit_opcode(cbuf, 0x50+$src$$reg  );
2968 3011      // fild 64-bits at [SP]
2969 3012      emit_opcode(cbuf,0xdf);
2970 3013      emit_d8(cbuf, 0x6C);
2971 3014      emit_d8(cbuf, 0x24);
2972 3015      emit_d8(cbuf, 0x00);
2973 3016    %}
2974 3017  
2975 3018    enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2976 3019      // Basic idea: long = (long)int * (long)int
2977 3020      // IMUL EDX:EAX, src
2978 3021      emit_opcode( cbuf, 0xF7 );
2979 3022      emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2980 3023    %}
2981 3024  
2982 3025    enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2983 3026      // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2984 3027      // MUL EDX:EAX, src
2985 3028      emit_opcode( cbuf, 0xF7 );
2986 3029      emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2987 3030    %}
2988 3031  
2989 3032    enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
2990 3033      // Basic idea: lo(result) = lo(x_lo * y_lo)
2991 3034      //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2992 3035      // MOV    $tmp,$src.lo
2993 3036      encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2994 3037      // IMUL   $tmp,EDX
2995 3038      emit_opcode( cbuf, 0x0F );
2996 3039      emit_opcode( cbuf, 0xAF );
2997 3040      emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2998 3041      // MOV    EDX,$src.hi
2999 3042      encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
3000 3043      // IMUL   EDX,EAX
3001 3044      emit_opcode( cbuf, 0x0F );
3002 3045      emit_opcode( cbuf, 0xAF );
3003 3046      emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
3004 3047      // ADD    $tmp,EDX
3005 3048      emit_opcode( cbuf, 0x03 );
3006 3049      emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3007 3050      // MUL   EDX:EAX,$src.lo
3008 3051      emit_opcode( cbuf, 0xF7 );
3009 3052      emit_rm( cbuf, 0x3, 0x4, $src$$reg );
3010 3053      // ADD    EDX,ESI
3011 3054      emit_opcode( cbuf, 0x03 );
3012 3055      emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
3013 3056    %}
3014 3057  
3015 3058    enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
3016 3059      // Basic idea: lo(result) = lo(src * y_lo)
3017 3060      //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
3018 3061      // IMUL   $tmp,EDX,$src
3019 3062      emit_opcode( cbuf, 0x6B );
3020 3063      emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3021 3064      emit_d8( cbuf, (int)$src$$constant );
3022 3065      // MOV    EDX,$src
3023 3066      emit_opcode(cbuf, 0xB8 + EDX_enc);
3024 3067      emit_d32( cbuf, (int)$src$$constant );
3025 3068      // MUL   EDX:EAX,EDX
3026 3069      emit_opcode( cbuf, 0xF7 );
3027 3070      emit_rm( cbuf, 0x3, 0x4, EDX_enc );
3028 3071      // ADD    EDX,ESI
3029 3072      emit_opcode( cbuf, 0x03 );
3030 3073      emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
3031 3074    %}
3032 3075  
3033 3076    enc_class long_div( eRegL src1, eRegL src2 ) %{
3034 3077      // PUSH src1.hi
3035 3078      emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3036 3079      // PUSH src1.lo
3037 3080      emit_opcode(cbuf,               0x50+$src1$$reg  );
3038 3081      // PUSH src2.hi
3039 3082      emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3040 3083      // PUSH src2.lo
3041 3084      emit_opcode(cbuf,               0x50+$src2$$reg  );
3042 3085      // CALL directly to the runtime
3043 3086      cbuf.set_inst_mark();
3044 3087      emit_opcode(cbuf,0xE8);       // Call into runtime
3045 3088      emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3046 3089      // Restore stack
3047 3090      emit_opcode(cbuf, 0x83); // add  SP, #framesize
3048 3091      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3049 3092      emit_d8(cbuf, 4*4);
3050 3093    %}
3051 3094  
3052 3095    enc_class long_mod( eRegL src1, eRegL src2 ) %{
3053 3096      // PUSH src1.hi
3054 3097      emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3055 3098      // PUSH src1.lo
3056 3099      emit_opcode(cbuf,               0x50+$src1$$reg  );
3057 3100      // PUSH src2.hi
3058 3101      emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3059 3102      // PUSH src2.lo
3060 3103      emit_opcode(cbuf,               0x50+$src2$$reg  );
3061 3104      // CALL directly to the runtime
3062 3105      cbuf.set_inst_mark();
3063 3106      emit_opcode(cbuf,0xE8);       // Call into runtime
3064 3107      emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3065 3108      // Restore stack
3066 3109      emit_opcode(cbuf, 0x83); // add  SP, #framesize
3067 3110      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3068 3111      emit_d8(cbuf, 4*4);
3069 3112    %}
3070 3113  
3071 3114    enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
3072 3115      // MOV   $tmp,$src.lo
3073 3116      emit_opcode(cbuf, 0x8B);
3074 3117      emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
3075 3118      // OR    $tmp,$src.hi
3076 3119      emit_opcode(cbuf, 0x0B);
3077 3120      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
3078 3121    %}
3079 3122  
3080 3123    enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
3081 3124      // CMP    $src1.lo,$src2.lo
3082 3125      emit_opcode( cbuf, 0x3B );
3083 3126      emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3084 3127      // JNE,s  skip
3085 3128      emit_cc(cbuf, 0x70, 0x5);
3086 3129      emit_d8(cbuf,2);
3087 3130      // CMP    $src1.hi,$src2.hi
3088 3131      emit_opcode( cbuf, 0x3B );
3089 3132      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3090 3133    %}
3091 3134  
3092 3135    enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
3093 3136      // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
3094 3137      emit_opcode( cbuf, 0x3B );
3095 3138      emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3096 3139      // MOV    $tmp,$src1.hi
3097 3140      emit_opcode( cbuf, 0x8B );
3098 3141      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
3099 3142      // SBB   $tmp,$src2.hi\t! Compute flags for long compare
3100 3143      emit_opcode( cbuf, 0x1B );
3101 3144      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
3102 3145    %}
3103 3146  
3104 3147    enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
3105 3148      // XOR    $tmp,$tmp
3106 3149      emit_opcode(cbuf,0x33);  // XOR
3107 3150      emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3108 3151      // CMP    $tmp,$src.lo
3109 3152      emit_opcode( cbuf, 0x3B );
3110 3153      emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3111 3154      // SBB    $tmp,$src.hi
3112 3155      emit_opcode( cbuf, 0x1B );
3113 3156      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3114 3157    %}
3115 3158  
3116 3159   // Sniff, sniff... smells like Gnu Superoptimizer
3117 3160    enc_class neg_long( eRegL dst ) %{
3118 3161      emit_opcode(cbuf,0xF7);    // NEG hi
3119 3162      emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3120 3163      emit_opcode(cbuf,0xF7);    // NEG lo
3121 3164      emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
3122 3165      emit_opcode(cbuf,0x83);    // SBB hi,0
3123 3166      emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3124 3167      emit_d8    (cbuf,0 );
3125 3168    %}
3126 3169  
3127 3170    enc_class movq_ld(regXD dst, memory mem) %{
3128 3171      MacroAssembler _masm(&cbuf);
3129 3172      __ movq($dst$$XMMRegister, $mem$$Address);
3130 3173    %}
3131 3174  
3132 3175    enc_class movq_st(memory mem, regXD src) %{
3133 3176      MacroAssembler _masm(&cbuf);
3134 3177      __ movq($mem$$Address, $src$$XMMRegister);
3135 3178    %}
3136 3179  
3137 3180    enc_class pshufd_8x8(regX dst, regX src) %{
3138 3181      MacroAssembler _masm(&cbuf);
3139 3182  
3140 3183      encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3141 3184      __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3142 3185      __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3143 3186    %}
3144 3187  
3145 3188    enc_class pshufd_4x16(regX dst, regX src) %{
3146 3189      MacroAssembler _masm(&cbuf);
3147 3190  
3148 3191      __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3149 3192    %}
3150 3193  
3151 3194    enc_class pshufd(regXD dst, regXD src, int mode) %{
3152 3195      MacroAssembler _masm(&cbuf);
3153 3196  
3154 3197      __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3155 3198    %}
3156 3199  
3157 3200    enc_class pxor(regXD dst, regXD src) %{
3158 3201      MacroAssembler _masm(&cbuf);
3159 3202  
3160 3203      __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3161 3204    %}
3162 3205  
3163 3206    enc_class mov_i2x(regXD dst, eRegI src) %{
3164 3207      MacroAssembler _masm(&cbuf);
3165 3208  
3166 3209      __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3167 3210    %}
3168 3211  
3169 3212  
3170 3213    // Because the transitions from emitted code to the runtime
3171 3214    // monitorenter/exit helper stubs are so slow it's critical that
3172 3215    // we inline both the stack-locking fast-path and the inflated fast path.
3173 3216    //
3174 3217    // See also: cmpFastLock and cmpFastUnlock.
3175 3218    //
3176 3219    // What follows is a specialized inline transliteration of the code
3177 3220    // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
3178 3221    // another option would be to emit TrySlowEnter and TrySlowExit methods
3179 3222    // at startup-time.  These methods would accept arguments as
3180 3223    // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3181 3224    // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
3182 3225    // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3183 3226    // In practice, however, the # of lock sites is bounded and is usually small.
3184 3227    // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3185 3228    // if the processor uses simple bimodal branch predictors keyed by EIP
3186 3229    // Since the helper routines would be called from multiple synchronization
3187 3230    // sites.
3188 3231    //
3189 3232    // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3190 3233    // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
3191 3234    // to those specialized methods.  That'd give us a mostly platform-independent
3192 3235    // implementation that the JITs could optimize and inline at their pleasure.
3193 3236    // Done correctly, the only time we'd need to cross to native could would be
3194 3237    // to park() or unpark() threads.  We'd also need a few more unsafe operators
3195 3238    // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
3196 3239    // (b) explicit barriers or fence operations.
3197 3240    //
3198 3241    // TODO:
3199 3242    //
3200 3243    // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
3201 3244    //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
3202 3245    //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
3203 3246    //    the lock operators would typically be faster than reifying Self.
3204 3247    //
3205 3248    // *  Ideally I'd define the primitives as:
3206 3249    //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
3207 3250    //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
3208 3251    //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
3209 3252    //    Instead, we're stuck with a rather awkward and brittle register assignments below.
3210 3253    //    Furthermore the register assignments are overconstrained, possibly resulting in
3211 3254    //    sub-optimal code near the synchronization site.
3212 3255    //
3213 3256    // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
3214 3257    //    Alternately, use a better sp-proximity test.
3215 3258    //
3216 3259    // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
3217 3260    //    Either one is sufficient to uniquely identify a thread.
3218 3261    //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
3219 3262    //
3220 3263    // *  Intrinsify notify() and notifyAll() for the common cases where the
3221 3264    //    object is locked by the calling thread but the waitlist is empty.
3222 3265    //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
3223 3266    //
3224 3267    // *  use jccb and jmpb instead of jcc and jmp to improve code density.
3225 3268    //    But beware of excessive branch density on AMD Opterons.
3226 3269    //
3227 3270    // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
3228 3271    //    or failure of the fast-path.  If the fast-path fails then we pass
3229 3272    //    control to the slow-path, typically in C.  In Fast_Lock and
3230 3273    //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
3231 3274    //    will emit a conditional branch immediately after the node.
3232 3275    //    So we have branches to branches and lots of ICC.ZF games.
3233 3276    //    Instead, it might be better to have C2 pass a "FailureLabel"
3234 3277    //    into Fast_Lock and Fast_Unlock.  In the case of success, control
3235 3278    //    will drop through the node.  ICC.ZF is undefined at exit.
3236 3279    //    In the case of failure, the node will branch directly to the
3237 3280    //    FailureLabel
3238 3281  
3239 3282  
3240 3283    // obj: object to lock
3241 3284    // box: on-stack box address (displaced header location) - KILLED
3242 3285    // rax,: tmp -- KILLED
3243 3286    // scr: tmp -- KILLED
3244 3287    enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
3245 3288  
3246 3289      Register objReg = as_Register($obj$$reg);
3247 3290      Register boxReg = as_Register($box$$reg);
3248 3291      Register tmpReg = as_Register($tmp$$reg);
3249 3292      Register scrReg = as_Register($scr$$reg);
3250 3293  
3251 3294      // Ensure the register assignents are disjoint
3252 3295      guarantee (objReg != boxReg, "") ;
3253 3296      guarantee (objReg != tmpReg, "") ;
3254 3297      guarantee (objReg != scrReg, "") ;
3255 3298      guarantee (boxReg != tmpReg, "") ;
3256 3299      guarantee (boxReg != scrReg, "") ;
3257 3300      guarantee (tmpReg == as_Register(EAX_enc), "") ;
3258 3301  
3259 3302      MacroAssembler masm(&cbuf);
3260 3303  
3261 3304      if (_counters != NULL) {
3262 3305        masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3263 3306      }
3264 3307      if (EmitSync & 1) {
3265 3308          // set box->dhw = unused_mark (3)
3266 3309          // Force all sync thru slow-path: slow_enter() and slow_exit() 
3267 3310          masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;             
3268 3311          masm.cmpptr (rsp, (int32_t)0) ;                        
3269 3312      } else 
3270 3313      if (EmitSync & 2) { 
3271 3314          Label DONE_LABEL ;           
3272 3315          if (UseBiasedLocking) {
3273 3316             // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3274 3317             masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3275 3318          }
3276 3319  
3277 3320          masm.movptr(tmpReg, Address(objReg, 0)) ;          // fetch markword 
3278 3321          masm.orptr (tmpReg, 0x1);
3279 3322          masm.movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS 
3280 3323          if (os::is_MP()) { masm.lock();  }
3281 3324          masm.cmpxchgptr(boxReg, Address(objReg, 0));          // Updates tmpReg
3282 3325          masm.jcc(Assembler::equal, DONE_LABEL);
3283 3326          // Recursive locking
3284 3327          masm.subptr(tmpReg, rsp);
3285 3328          masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3286 3329          masm.movptr(Address(boxReg, 0), tmpReg);
3287 3330          masm.bind(DONE_LABEL) ; 
3288 3331      } else {  
3289 3332        // Possible cases that we'll encounter in fast_lock 
3290 3333        // ------------------------------------------------
3291 3334        // * Inflated
3292 3335        //    -- unlocked
3293 3336        //    -- Locked
3294 3337        //       = by self
3295 3338        //       = by other
3296 3339        // * biased
3297 3340        //    -- by Self
3298 3341        //    -- by other
3299 3342        // * neutral
3300 3343        // * stack-locked
3301 3344        //    -- by self
3302 3345        //       = sp-proximity test hits
3303 3346        //       = sp-proximity test generates false-negative
3304 3347        //    -- by other
3305 3348        //
3306 3349  
3307 3350        Label IsInflated, DONE_LABEL, PopDone ;
3308 3351  
3309 3352        // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3310 3353        // order to reduce the number of conditional branches in the most common cases.
3311 3354        // Beware -- there's a subtle invariant that fetch of the markword
3312 3355        // at [FETCH], below, will never observe a biased encoding (*101b).
3313 3356        // If this invariant is not held we risk exclusion (safety) failure.
3314 3357        if (UseBiasedLocking && !UseOptoBiasInlining) {
3315 3358          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3316 3359        }
3317 3360  
3318 3361        masm.movptr(tmpReg, Address(objReg, 0)) ;         // [FETCH]
3319 3362        masm.testptr(tmpReg, 0x02) ;                      // Inflated v (Stack-locked or neutral)
3320 3363        masm.jccb  (Assembler::notZero, IsInflated) ;
3321 3364  
3322 3365        // Attempt stack-locking ...
3323 3366        masm.orptr (tmpReg, 0x1);
3324 3367        masm.movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
3325 3368        if (os::is_MP()) { masm.lock();  }
3326 3369        masm.cmpxchgptr(boxReg, Address(objReg, 0));           // Updates tmpReg
3327 3370        if (_counters != NULL) {
3328 3371          masm.cond_inc32(Assembler::equal,
3329 3372                          ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3330 3373        }
3331 3374        masm.jccb (Assembler::equal, DONE_LABEL);
3332 3375  
3333 3376        // Recursive locking
3334 3377        masm.subptr(tmpReg, rsp);
3335 3378        masm.andptr(tmpReg, 0xFFFFF003 );
3336 3379        masm.movptr(Address(boxReg, 0), tmpReg);
3337 3380        if (_counters != NULL) {
3338 3381          masm.cond_inc32(Assembler::equal,
3339 3382                          ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3340 3383        }
3341 3384        masm.jmp  (DONE_LABEL) ;
3342 3385  
3343 3386        masm.bind (IsInflated) ;
3344 3387  
3345 3388        // The object is inflated.
3346 3389        //
3347 3390        // TODO-FIXME: eliminate the ugly use of manifest constants:
3348 3391        //   Use markOopDesc::monitor_value instead of "2".
3349 3392        //   use markOop::unused_mark() instead of "3".
3350 3393        // The tmpReg value is an objectMonitor reference ORed with
3351 3394        // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
3352 3395        // objectmonitor pointer by masking off the "2" bit or we can just
3353 3396        // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3354 3397        // field offsets with "-2" to compensate for and annul the low-order tag bit.
3355 3398        //
3356 3399        // I use the latter as it avoids AGI stalls.
3357 3400        // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3358 3401        // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3359 3402        //
3360 3403        #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3361 3404  
3362 3405        // boxReg refers to the on-stack BasicLock in the current frame.
3363 3406        // We'd like to write:
3364 3407        //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
3365 3408        // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
3366 3409        // additional latency as we have another ST in the store buffer that must drain.
3367 3410  
3368 3411        if (EmitSync & 8192) { 
3369 3412           masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
3370 3413           masm.get_thread (scrReg) ; 
3371 3414           masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2] 
3372 3415           masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
3373 3416           if (os::is_MP()) { masm.lock(); } 
3374 3417           masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3375 3418        } else 
3376 3419        if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
3377 3420           masm.movptr(scrReg, boxReg) ; 
3378 3421           masm.movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2] 
3379 3422  
3380 3423           // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3381 3424           if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3382 3425              // prefetchw [eax + Offset(_owner)-2]
3383 3426              masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3384 3427           }
3385 3428  
3386 3429           if ((EmitSync & 64) == 0) {
3387 3430             // Optimistic form: consider XORL tmpReg,tmpReg
3388 3431             masm.movptr(tmpReg, NULL_WORD) ; 
3389 3432           } else { 
3390 3433             // Can suffer RTS->RTO upgrades on shared or cold $ lines
3391 3434             // Test-And-CAS instead of CAS
3392 3435             masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3393 3436             masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3394 3437             masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3395 3438           }
3396 3439  
3397 3440           // Appears unlocked - try to swing _owner from null to non-null.
3398 3441           // Ideally, I'd manifest "Self" with get_thread and then attempt
3399 3442           // to CAS the register containing Self into m->Owner.
3400 3443           // But we don't have enough registers, so instead we can either try to CAS
3401 3444           // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
3402 3445           // we later store "Self" into m->Owner.  Transiently storing a stack address
3403 3446           // (rsp or the address of the box) into  m->owner is harmless.
3404 3447           // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3405 3448           if (os::is_MP()) { masm.lock();  }
3406 3449           masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3407 3450           masm.movptr(Address(scrReg, 0), 3) ;          // box->_displaced_header = 3
3408 3451           masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3409 3452           masm.get_thread (scrReg) ;                    // beware: clobbers ICCs
3410 3453           masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 
3411 3454           masm.xorptr(boxReg, boxReg) ;                 // set icc.ZFlag = 1 to indicate success
3412 3455                         
3413 3456           // If the CAS fails we can either retry or pass control to the slow-path.  
3414 3457           // We use the latter tactic.  
3415 3458           // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3416 3459           // If the CAS was successful ...
3417 3460           //   Self has acquired the lock
3418 3461           //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3419 3462           // Intentional fall-through into DONE_LABEL ...
3420 3463        } else {
3421 3464           masm.movptr(Address(boxReg, 0), 3) ;       // results in ST-before-CAS penalty
3422 3465           masm.movptr(boxReg, tmpReg) ; 
3423 3466  
3424 3467           // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3425 3468           if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3426 3469              // prefetchw [eax + Offset(_owner)-2]
3427 3470              masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3428 3471           }
3429 3472  
3430 3473           if ((EmitSync & 64) == 0) {
3431 3474             // Optimistic form
3432 3475             masm.xorptr  (tmpReg, tmpReg) ; 
3433 3476           } else { 
3434 3477             // Can suffer RTS->RTO upgrades on shared or cold $ lines
3435 3478             masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3436 3479             masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3437 3480             masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3438 3481           }
3439 3482  
3440 3483           // Appears unlocked - try to swing _owner from null to non-null.
3441 3484           // Use either "Self" (in scr) or rsp as thread identity in _owner.
3442 3485           // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3443 3486           masm.get_thread (scrReg) ;
3444 3487           if (os::is_MP()) { masm.lock(); }
3445 3488           masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3446 3489  
3447 3490           // If the CAS fails we can either retry or pass control to the slow-path.
3448 3491           // We use the latter tactic.
3449 3492           // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3450 3493           // If the CAS was successful ...
3451 3494           //   Self has acquired the lock
3452 3495           //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3453 3496           // Intentional fall-through into DONE_LABEL ...
3454 3497        }
3455 3498  
3456 3499        // DONE_LABEL is a hot target - we'd really like to place it at the
3457 3500        // start of cache line by padding with NOPs.
3458 3501        // See the AMD and Intel software optimization manuals for the
3459 3502        // most efficient "long" NOP encodings.
3460 3503        // Unfortunately none of our alignment mechanisms suffice.
3461 3504        masm.bind(DONE_LABEL);
3462 3505  
3463 3506        // Avoid branch-to-branch on AMD processors
3464 3507        // This appears to be superstition.
3465 3508        if (EmitSync & 32) masm.nop() ;
3466 3509  
3467 3510  
3468 3511        // At DONE_LABEL the icc ZFlag is set as follows ...
3469 3512        // Fast_Unlock uses the same protocol.
3470 3513        // ZFlag == 1 -> Success
3471 3514        // ZFlag == 0 -> Failure - force control through the slow-path
3472 3515      }
3473 3516    %}
3474 3517  
3475 3518    // obj: object to unlock
3476 3519    // box: box address (displaced header location), killed.  Must be EAX.
3477 3520    // rbx,: killed tmp; cannot be obj nor box.
3478 3521    //
3479 3522    // Some commentary on balanced locking:
3480 3523    //
3481 3524    // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3482 3525    // Methods that don't have provably balanced locking are forced to run in the
3483 3526    // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3484 3527    // The interpreter provides two properties:
3485 3528    // I1:  At return-time the interpreter automatically and quietly unlocks any
3486 3529    //      objects acquired the current activation (frame).  Recall that the
3487 3530    //      interpreter maintains an on-stack list of locks currently held by
3488 3531    //      a frame.
3489 3532    // I2:  If a method attempts to unlock an object that is not held by the
3490 3533    //      the frame the interpreter throws IMSX.
3491 3534    //
3492 3535    // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3493 3536    // B() doesn't have provably balanced locking so it runs in the interpreter.
3494 3537    // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
3495 3538    // is still locked by A().
3496 3539    //
3497 3540    // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
3498 3541    // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3499 3542    // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
3500 3543    // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3501 3544  
3502 3545    enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3503 3546  
3504 3547      Register objReg = as_Register($obj$$reg);
3505 3548      Register boxReg = as_Register($box$$reg);
3506 3549      Register tmpReg = as_Register($tmp$$reg);
3507 3550  
3508 3551      guarantee (objReg != boxReg, "") ;
3509 3552      guarantee (objReg != tmpReg, "") ;
3510 3553      guarantee (boxReg != tmpReg, "") ;
3511 3554      guarantee (boxReg == as_Register(EAX_enc), "") ;
3512 3555      MacroAssembler masm(&cbuf);
3513 3556  
3514 3557      if (EmitSync & 4) {
3515 3558        // Disable - inhibit all inlining.  Force control through the slow-path
3516 3559        masm.cmpptr (rsp, 0) ; 
3517 3560      } else 
3518 3561      if (EmitSync & 8) {
3519 3562        Label DONE_LABEL ;
3520 3563        if (UseBiasedLocking) {
3521 3564           masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3522 3565        }
3523 3566        // classic stack-locking code ...
3524 3567        masm.movptr(tmpReg, Address(boxReg, 0)) ;
3525 3568        masm.testptr(tmpReg, tmpReg) ;
3526 3569        masm.jcc   (Assembler::zero, DONE_LABEL) ;
3527 3570        if (os::is_MP()) { masm.lock(); }
3528 3571        masm.cmpxchgptr(tmpReg, Address(objReg, 0));          // Uses EAX which is box
3529 3572        masm.bind(DONE_LABEL);
3530 3573      } else {
3531 3574        Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3532 3575  
3533 3576        // Critically, the biased locking test must have precedence over
3534 3577        // and appear before the (box->dhw == 0) recursive stack-lock test.
3535 3578        if (UseBiasedLocking && !UseOptoBiasInlining) {
3536 3579           masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3537 3580        }
3538 3581        
3539 3582        masm.cmpptr(Address(boxReg, 0), 0) ;            // Examine the displaced header
3540 3583        masm.movptr(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
3541 3584        masm.jccb  (Assembler::zero, DONE_LABEL) ;      // 0 indicates recursive stack-lock
3542 3585  
3543 3586        masm.testptr(tmpReg, 0x02) ;                     // Inflated? 
3544 3587        masm.jccb  (Assembler::zero, Stacked) ;
3545 3588  
3546 3589        masm.bind  (Inflated) ;
3547 3590        // It's inflated.
3548 3591        // Despite our balanced locking property we still check that m->_owner == Self
3549 3592        // as java routines or native JNI code called by this thread might
3550 3593        // have released the lock.
3551 3594        // Refer to the comments in synchronizer.cpp for how we might encode extra
3552 3595        // state in _succ so we can avoid fetching EntryList|cxq.
3553 3596        //
3554 3597        // I'd like to add more cases in fast_lock() and fast_unlock() --
3555 3598        // such as recursive enter and exit -- but we have to be wary of
3556 3599        // I$ bloat, T$ effects and BP$ effects.
3557 3600        //
3558 3601        // If there's no contention try a 1-0 exit.  That is, exit without
3559 3602        // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
3560 3603        // we detect and recover from the race that the 1-0 exit admits.
3561 3604        //
3562 3605        // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3563 3606        // before it STs null into _owner, releasing the lock.  Updates
3564 3607        // to data protected by the critical section must be visible before
3565 3608        // we drop the lock (and thus before any other thread could acquire
3566 3609        // the lock and observe the fields protected by the lock).
3567 3610        // IA32's memory-model is SPO, so STs are ordered with respect to
3568 3611        // each other and there's no need for an explicit barrier (fence).
3569 3612        // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3570 3613  
3571 3614        masm.get_thread (boxReg) ;
3572 3615        if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) {
3573 3616          // prefetchw [ebx + Offset(_owner)-2]
3574 3617          masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3575 3618        }
3576 3619  
3577 3620        // Note that we could employ various encoding schemes to reduce
3578 3621        // the number of loads below (currently 4) to just 2 or 3.
3579 3622        // Refer to the comments in synchronizer.cpp.
3580 3623        // In practice the chain of fetches doesn't seem to impact performance, however.
3581 3624        if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3582 3625           // Attempt to reduce branch density - AMD's branch predictor.
3583 3626           masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3584 3627           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3585 3628           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3586 3629           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3587 3630           masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3588 3631           masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3589 3632           masm.jmpb  (DONE_LABEL) ; 
3590 3633        } else { 
3591 3634           masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3592 3635           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3593 3636           masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3594 3637           masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3595 3638           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3596 3639           masm.jccb  (Assembler::notZero, CheckSucc) ; 
3597 3640           masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3598 3641           masm.jmpb  (DONE_LABEL) ; 
3599 3642        }
3600 3643  
3601 3644        // The Following code fragment (EmitSync & 65536) improves the performance of
3602 3645        // contended applications and contended synchronization microbenchmarks.
3603 3646        // Unfortunately the emission of the code - even though not executed - causes regressions
3604 3647        // in scimark and jetstream, evidently because of $ effects.  Replacing the code
3605 3648        // with an equal number of never-executed NOPs results in the same regression.
3606 3649        // We leave it off by default.
3607 3650  
3608 3651        if ((EmitSync & 65536) != 0) {
3609 3652           Label LSuccess, LGoSlowPath ;
3610 3653  
3611 3654           masm.bind  (CheckSucc) ;
3612 3655  
3613 3656           // Optional pre-test ... it's safe to elide this
3614 3657           if ((EmitSync & 16) == 0) { 
3615 3658              masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3616 3659              masm.jccb  (Assembler::zero, LGoSlowPath) ; 
3617 3660           }
3618 3661  
3619 3662           // We have a classic Dekker-style idiom:
3620 3663           //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
3621 3664           // There are a number of ways to implement the barrier:
3622 3665           // (1) lock:andl &m->_owner, 0
3623 3666           //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3624 3667           //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3625 3668           //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3626 3669           // (2) If supported, an explicit MFENCE is appealing.
3627 3670           //     In older IA32 processors MFENCE is slower than lock:add or xchg
3628 3671           //     particularly if the write-buffer is full as might be the case if
3629 3672           //     if stores closely precede the fence or fence-equivalent instruction.
3630 3673           //     In more modern implementations MFENCE appears faster, however.
3631 3674           // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3632 3675           //     The $lines underlying the top-of-stack should be in M-state.
3633 3676           //     The locked add instruction is serializing, of course.
3634 3677           // (4) Use xchg, which is serializing
3635 3678           //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3636 3679           // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3637 3680           //     The integer condition codes will tell us if succ was 0.
3638 3681           //     Since _succ and _owner should reside in the same $line and
3639 3682           //     we just stored into _owner, it's likely that the $line
3640 3683           //     remains in M-state for the lock:orl.
3641 3684           //
3642 3685           // We currently use (3), although it's likely that switching to (2)
3643 3686           // is correct for the future.
3644 3687              
3645 3688           masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3646 3689           if (os::is_MP()) { 
3647 3690              if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 
3648 3691                masm.mfence();
3649 3692              } else { 
3650 3693                masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; 
3651 3694              }
3652 3695           }
3653 3696           // Ratify _succ remains non-null
3654 3697           masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3655 3698           masm.jccb  (Assembler::notZero, LSuccess) ; 
3656 3699  
3657 3700           masm.xorptr(boxReg, boxReg) ;                  // box is really EAX
3658 3701           if (os::is_MP()) { masm.lock(); }
3659 3702           masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3660 3703           masm.jccb  (Assembler::notEqual, LSuccess) ;
3661 3704           // Since we're low on registers we installed rsp as a placeholding in _owner.
3662 3705           // Now install Self over rsp.  This is safe as we're transitioning from
3663 3706           // non-null to non=null
3664 3707           masm.get_thread (boxReg) ;
3665 3708           masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3666 3709           // Intentional fall-through into LGoSlowPath ...
3667 3710  
3668 3711           masm.bind  (LGoSlowPath) ; 
3669 3712           masm.orptr(boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3670 3713           masm.jmpb  (DONE_LABEL) ; 
3671 3714  
3672 3715           masm.bind  (LSuccess) ; 
3673 3716           masm.xorptr(boxReg, boxReg) ;                 // set ICC.ZF=1 to indicate success
3674 3717           masm.jmpb  (DONE_LABEL) ; 
3675 3718        }
3676 3719  
3677 3720        masm.bind (Stacked) ;
3678 3721        // It's not inflated and it's not recursively stack-locked and it's not biased.
3679 3722        // It must be stack-locked.
3680 3723        // Try to reset the header to displaced header.
3681 3724        // The "box" value on the stack is stable, so we can reload
3682 3725        // and be assured we observe the same value as above.
3683 3726        masm.movptr(tmpReg, Address(boxReg, 0)) ;
3684 3727        if (os::is_MP()) {   masm.lock();    }
3685 3728        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3686 3729        // Intention fall-thru into DONE_LABEL
3687 3730  
3688 3731  
3689 3732        // DONE_LABEL is a hot target - we'd really like to place it at the
3690 3733        // start of cache line by padding with NOPs.
3691 3734        // See the AMD and Intel software optimization manuals for the
3692 3735        // most efficient "long" NOP encodings.
3693 3736        // Unfortunately none of our alignment mechanisms suffice.
3694 3737        if ((EmitSync & 65536) == 0) {
3695 3738           masm.bind (CheckSucc) ;
3696 3739        }
3697 3740        masm.bind(DONE_LABEL);
3698 3741  
3699 3742        // Avoid branch to branch on AMD processors
3700 3743        if (EmitSync & 32768) { masm.nop() ; }
3701 3744      }
3702 3745    %}
3703 3746  
3704 3747  
3705 3748    enc_class enc_pop_rdx() %{
3706 3749      emit_opcode(cbuf,0x5A);
3707 3750    %}
3708 3751  
3709 3752    enc_class enc_rethrow() %{
3710 3753      cbuf.set_inst_mark();
3711 3754      emit_opcode(cbuf, 0xE9);        // jmp    entry
3712 3755      emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.code_end())-4,
3713 3756                     runtime_call_Relocation::spec(), RELOC_IMM32 );
3714 3757    %}
3715 3758  
3716 3759  
3717 3760    // Convert a double to an int.  Java semantics require we do complex
3718 3761    // manglelations in the corner cases.  So we set the rounding mode to
3719 3762    // 'zero', store the darned double down as an int, and reset the
3720 3763    // rounding mode to 'nearest'.  The hardware throws an exception which
3721 3764    // patches up the correct value directly to the stack.
3722 3765    enc_class D2I_encoding( regD src ) %{
3723 3766      // Flip to round-to-zero mode.  We attempted to allow invalid-op
3724 3767      // exceptions here, so that a NAN or other corner-case value will
3725 3768      // thrown an exception (but normal values get converted at full speed).
3726 3769      // However, I2C adapters and other float-stack manglers leave pending
3727 3770      // invalid-op exceptions hanging.  We would have to clear them before
3728 3771      // enabling them and that is more expensive than just testing for the
3729 3772      // invalid value Intel stores down in the corner cases.
3730 3773      emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3731 3774      emit_opcode(cbuf,0x2D);
3732 3775      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3733 3776      // Allocate a word
3734 3777      emit_opcode(cbuf,0x83);            // SUB ESP,4
3735 3778      emit_opcode(cbuf,0xEC);
3736 3779      emit_d8(cbuf,0x04);
3737 3780      // Encoding assumes a double has been pushed into FPR0.
3738 3781      // Store down the double as an int, popping the FPU stack
3739 3782      emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3740 3783      emit_opcode(cbuf,0x1C);
3741 3784      emit_d8(cbuf,0x24);
3742 3785      // Restore the rounding mode; mask the exception
3743 3786      emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3744 3787      emit_opcode(cbuf,0x2D);
3745 3788      emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3746 3789          ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3747 3790          : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3748 3791  
3749 3792      // Load the converted int; adjust CPU stack
3750 3793      emit_opcode(cbuf,0x58);       // POP EAX
3751 3794      emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3752 3795      emit_d32   (cbuf,0x80000000); //         0x80000000
3753 3796      emit_opcode(cbuf,0x75);       // JNE around_slow_call
3754 3797      emit_d8    (cbuf,0x07);       // Size of slow_call
3755 3798      // Push src onto stack slow-path
3756 3799      emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3757 3800      emit_d8    (cbuf,0xC0-1+$src$$reg );
3758 3801      // CALL directly to the runtime
3759 3802      cbuf.set_inst_mark();
3760 3803      emit_opcode(cbuf,0xE8);       // Call into runtime
3761 3804      emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3762 3805      // Carry on here...
3763 3806    %}
3764 3807  
3765 3808    enc_class D2L_encoding( regD src ) %{
3766 3809      emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3767 3810      emit_opcode(cbuf,0x2D);
3768 3811      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3769 3812      // Allocate a word
3770 3813      emit_opcode(cbuf,0x83);            // SUB ESP,8
3771 3814      emit_opcode(cbuf,0xEC);
3772 3815      emit_d8(cbuf,0x08);
3773 3816      // Encoding assumes a double has been pushed into FPR0.
3774 3817      // Store down the double as a long, popping the FPU stack
3775 3818      emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3776 3819      emit_opcode(cbuf,0x3C);
3777 3820      emit_d8(cbuf,0x24);
3778 3821      // Restore the rounding mode; mask the exception
3779 3822      emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3780 3823      emit_opcode(cbuf,0x2D);
3781 3824      emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3782 3825          ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3783 3826          : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3784 3827  
3785 3828      // Load the converted int; adjust CPU stack
3786 3829      emit_opcode(cbuf,0x58);       // POP EAX
3787 3830      emit_opcode(cbuf,0x5A);       // POP EDX
3788 3831      emit_opcode(cbuf,0x81);       // CMP EDX,imm
3789 3832      emit_d8    (cbuf,0xFA);       // rdx
3790 3833      emit_d32   (cbuf,0x80000000); //         0x80000000
3791 3834      emit_opcode(cbuf,0x75);       // JNE around_slow_call
3792 3835      emit_d8    (cbuf,0x07+4);     // Size of slow_call
3793 3836      emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3794 3837      emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3795 3838      emit_opcode(cbuf,0x75);       // JNE around_slow_call
3796 3839      emit_d8    (cbuf,0x07);       // Size of slow_call
3797 3840      // Push src onto stack slow-path
3798 3841      emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3799 3842      emit_d8    (cbuf,0xC0-1+$src$$reg );
3800 3843      // CALL directly to the runtime
3801 3844      cbuf.set_inst_mark();
3802 3845      emit_opcode(cbuf,0xE8);       // Call into runtime
3803 3846      emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3804 3847      // Carry on here...
3805 3848    %}
3806 3849  
3807 3850    enc_class X2L_encoding( regX src ) %{
3808 3851      // Allocate a word
3809 3852      emit_opcode(cbuf,0x83);      // SUB ESP,8
3810 3853      emit_opcode(cbuf,0xEC);
3811 3854      emit_d8(cbuf,0x08);
3812 3855  
3813 3856      emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3814 3857      emit_opcode  (cbuf, 0x0F );
3815 3858      emit_opcode  (cbuf, 0x11 );
3816 3859      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3817 3860  
3818 3861      emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3819 3862      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3820 3863  
3821 3864      emit_opcode(cbuf,0xD9);      // FLDCW  trunc
3822 3865      emit_opcode(cbuf,0x2D);
3823 3866      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3824 3867  
3825 3868      // Encoding assumes a double has been pushed into FPR0.
3826 3869      // Store down the double as a long, popping the FPU stack
3827 3870      emit_opcode(cbuf,0xDF);      // FISTP [ESP]
3828 3871      emit_opcode(cbuf,0x3C);
3829 3872      emit_d8(cbuf,0x24);
3830 3873  
3831 3874      // Restore the rounding mode; mask the exception
3832 3875      emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
3833 3876      emit_opcode(cbuf,0x2D);
3834 3877      emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3835 3878        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3836 3879        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3837 3880  
3838 3881      // Load the converted int; adjust CPU stack
3839 3882      emit_opcode(cbuf,0x58);      // POP EAX
3840 3883  
3841 3884      emit_opcode(cbuf,0x5A);      // POP EDX
3842 3885  
3843 3886      emit_opcode(cbuf,0x81);      // CMP EDX,imm
3844 3887      emit_d8    (cbuf,0xFA);      // rdx
3845 3888      emit_d32   (cbuf,0x80000000);//         0x80000000
3846 3889  
3847 3890      emit_opcode(cbuf,0x75);      // JNE around_slow_call
3848 3891      emit_d8    (cbuf,0x13+4);    // Size of slow_call
3849 3892  
3850 3893      emit_opcode(cbuf,0x85);      // TEST EAX,EAX
3851 3894      emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
3852 3895  
3853 3896      emit_opcode(cbuf,0x75);      // JNE around_slow_call
3854 3897      emit_d8    (cbuf,0x13);      // Size of slow_call
3855 3898  
3856 3899      // Allocate a word
3857 3900      emit_opcode(cbuf,0x83);      // SUB ESP,4
3858 3901      emit_opcode(cbuf,0xEC);
3859 3902      emit_d8(cbuf,0x04);
3860 3903  
3861 3904      emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3862 3905      emit_opcode  (cbuf, 0x0F );
3863 3906      emit_opcode  (cbuf, 0x11 );
3864 3907      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3865 3908  
3866 3909      emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3867 3910      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3868 3911  
3869 3912      emit_opcode(cbuf,0x83);      // ADD ESP,4
3870 3913      emit_opcode(cbuf,0xC4);
3871 3914      emit_d8(cbuf,0x04);
3872 3915  
3873 3916      // CALL directly to the runtime
3874 3917      cbuf.set_inst_mark();
3875 3918      emit_opcode(cbuf,0xE8);       // Call into runtime
3876 3919      emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3877 3920      // Carry on here...
3878 3921    %}
3879 3922  
3880 3923    enc_class XD2L_encoding( regXD src ) %{
3881 3924      // Allocate a word
3882 3925      emit_opcode(cbuf,0x83);      // SUB ESP,8
3883 3926      emit_opcode(cbuf,0xEC);
3884 3927      emit_d8(cbuf,0x08);
3885 3928  
3886 3929      emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
3887 3930      emit_opcode  (cbuf, 0x0F );
3888 3931      emit_opcode  (cbuf, 0x11 );
3889 3932      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3890 3933  
3891 3934      emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
3892 3935      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3893 3936  
3894 3937      emit_opcode(cbuf,0xD9);      // FLDCW  trunc
3895 3938      emit_opcode(cbuf,0x2D);
3896 3939      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3897 3940  
3898 3941      // Encoding assumes a double has been pushed into FPR0.
3899 3942      // Store down the double as a long, popping the FPU stack
3900 3943      emit_opcode(cbuf,0xDF);      // FISTP [ESP]
3901 3944      emit_opcode(cbuf,0x3C);
3902 3945      emit_d8(cbuf,0x24);
3903 3946  
3904 3947      // Restore the rounding mode; mask the exception
3905 3948      emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
3906 3949      emit_opcode(cbuf,0x2D);
3907 3950      emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3908 3951        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3909 3952        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3910 3953  
3911 3954      // Load the converted int; adjust CPU stack
3912 3955      emit_opcode(cbuf,0x58);      // POP EAX
3913 3956  
3914 3957      emit_opcode(cbuf,0x5A);      // POP EDX
3915 3958  
3916 3959      emit_opcode(cbuf,0x81);      // CMP EDX,imm
3917 3960      emit_d8    (cbuf,0xFA);      // rdx
3918 3961      emit_d32   (cbuf,0x80000000); //         0x80000000
3919 3962  
3920 3963      emit_opcode(cbuf,0x75);      // JNE around_slow_call
3921 3964      emit_d8    (cbuf,0x13+4);    // Size of slow_call
3922 3965  
3923 3966      emit_opcode(cbuf,0x85);      // TEST EAX,EAX
3924 3967      emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
3925 3968  
3926 3969      emit_opcode(cbuf,0x75);      // JNE around_slow_call
3927 3970      emit_d8    (cbuf,0x13);      // Size of slow_call
3928 3971  
3929 3972      // Push src onto stack slow-path
3930 3973      // Allocate a word
3931 3974      emit_opcode(cbuf,0x83);      // SUB ESP,8
3932 3975      emit_opcode(cbuf,0xEC);
3933 3976      emit_d8(cbuf,0x08);
3934 3977  
3935 3978      emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
3936 3979      emit_opcode  (cbuf, 0x0F );
3937 3980      emit_opcode  (cbuf, 0x11 );
3938 3981      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3939 3982  
3940 3983      emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
3941 3984      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3942 3985  
3943 3986      emit_opcode(cbuf,0x83);      // ADD ESP,8
3944 3987      emit_opcode(cbuf,0xC4);
3945 3988      emit_d8(cbuf,0x08);
3946 3989  
3947 3990      // CALL directly to the runtime
3948 3991      cbuf.set_inst_mark();
3949 3992      emit_opcode(cbuf,0xE8);      // Call into runtime
3950 3993      emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3951 3994      // Carry on here...
3952 3995    %}
3953 3996  
3954 3997    enc_class D2X_encoding( regX dst, regD src ) %{
3955 3998      // Allocate a word
3956 3999      emit_opcode(cbuf,0x83);            // SUB ESP,4
3957 4000      emit_opcode(cbuf,0xEC);
3958 4001      emit_d8(cbuf,0x04);
3959 4002      int pop = 0x02;
3960 4003      if ($src$$reg != FPR1L_enc) {
3961 4004        emit_opcode( cbuf, 0xD9 );       // FLD    ST(i-1)
3962 4005        emit_d8( cbuf, 0xC0-1+$src$$reg );
3963 4006        pop = 0x03;
3964 4007      }
3965 4008      store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S  [ESP]
3966 4009  
3967 4010      emit_opcode  (cbuf, 0xF3 );        // MOVSS dst(xmm), [ESP]
3968 4011      emit_opcode  (cbuf, 0x0F );
3969 4012      emit_opcode  (cbuf, 0x10 );
3970 4013      encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
3971 4014  
3972 4015      emit_opcode(cbuf,0x83);            // ADD ESP,4
3973 4016      emit_opcode(cbuf,0xC4);
3974 4017      emit_d8(cbuf,0x04);
3975 4018      // Carry on here...
3976 4019    %}
3977 4020  
3978 4021    enc_class FX2I_encoding( regX src, eRegI dst ) %{
3979 4022      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
3980 4023  
3981 4024      // Compare the result to see if we need to go to the slow path
3982 4025      emit_opcode(cbuf,0x81);       // CMP dst,imm
3983 4026      emit_rm    (cbuf,0x3,0x7,$dst$$reg);
3984 4027      emit_d32   (cbuf,0x80000000); //         0x80000000
3985 4028  
3986 4029      emit_opcode(cbuf,0x75);       // JNE around_slow_call
3987 4030      emit_d8    (cbuf,0x13);       // Size of slow_call
3988 4031      // Store xmm to a temp memory
3989 4032      // location and push it onto stack.
3990 4033  
3991 4034      emit_opcode(cbuf,0x83);  // SUB ESP,4
3992 4035      emit_opcode(cbuf,0xEC);
3993 4036      emit_d8(cbuf, $primary ? 0x8 : 0x4);
3994 4037  
3995 4038      emit_opcode  (cbuf, $primary ? 0xF2 : 0xF3 );   // MOVSS [ESP], xmm
3996 4039      emit_opcode  (cbuf, 0x0F );
3997 4040      emit_opcode  (cbuf, 0x11 );
3998 4041      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3999 4042  
4000 4043      emit_opcode(cbuf, $primary ? 0xDD : 0xD9 );      // FLD [ESP]
4001 4044      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4002 4045  
4003 4046      emit_opcode(cbuf,0x83);    // ADD ESP,4
4004 4047      emit_opcode(cbuf,0xC4);
4005 4048      emit_d8(cbuf, $primary ? 0x8 : 0x4);
4006 4049  
4007 4050      // CALL directly to the runtime
4008 4051      cbuf.set_inst_mark();
4009 4052      emit_opcode(cbuf,0xE8);       // Call into runtime
4010 4053      emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4011 4054  
4012 4055      // Carry on here...
4013 4056    %}
4014 4057  
4015 4058    enc_class X2D_encoding( regD dst, regX src ) %{
4016 4059      // Allocate a word
4017 4060      emit_opcode(cbuf,0x83);     // SUB ESP,4
4018 4061      emit_opcode(cbuf,0xEC);
4019 4062      emit_d8(cbuf,0x04);
4020 4063  
4021 4064      emit_opcode  (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4022 4065      emit_opcode  (cbuf, 0x0F );
4023 4066      emit_opcode  (cbuf, 0x11 );
4024 4067      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4025 4068  
4026 4069      emit_opcode(cbuf,0xD9 );    // FLD_S [ESP]
4027 4070      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4028 4071  
4029 4072      emit_opcode(cbuf,0x83);     // ADD ESP,4
4030 4073      emit_opcode(cbuf,0xC4);
4031 4074      emit_d8(cbuf,0x04);
4032 4075  
4033 4076      // Carry on here...
4034 4077    %}
4035 4078  
4036 4079    enc_class AbsXF_encoding(regX dst) %{
4037 4080      address signmask_address=(address)float_signmask_pool;
4038 4081      // andpd:\tANDPS  $dst,[signconst]
4039 4082      emit_opcode(cbuf, 0x0F);
4040 4083      emit_opcode(cbuf, 0x54);
4041 4084      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4042 4085      emit_d32(cbuf, (int)signmask_address);
4043 4086    %}
4044 4087  
4045 4088    enc_class AbsXD_encoding(regXD dst) %{
4046 4089      address signmask_address=(address)double_signmask_pool;
4047 4090      // andpd:\tANDPD  $dst,[signconst]
4048 4091      emit_opcode(cbuf, 0x66);
4049 4092      emit_opcode(cbuf, 0x0F);
4050 4093      emit_opcode(cbuf, 0x54);
4051 4094      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4052 4095      emit_d32(cbuf, (int)signmask_address);
4053 4096    %}
4054 4097  
4055 4098    enc_class NegXF_encoding(regX dst) %{
4056 4099      address signmask_address=(address)float_signflip_pool;
4057 4100      // andpd:\tXORPS  $dst,[signconst]
4058 4101      emit_opcode(cbuf, 0x0F);
4059 4102      emit_opcode(cbuf, 0x57);
4060 4103      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4061 4104      emit_d32(cbuf, (int)signmask_address);
4062 4105    %}
4063 4106  
4064 4107    enc_class NegXD_encoding(regXD dst) %{
4065 4108      address signmask_address=(address)double_signflip_pool;
4066 4109      // andpd:\tXORPD  $dst,[signconst]
4067 4110      emit_opcode(cbuf, 0x66);
4068 4111      emit_opcode(cbuf, 0x0F);
4069 4112      emit_opcode(cbuf, 0x57);
4070 4113      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4071 4114      emit_d32(cbuf, (int)signmask_address);
4072 4115    %}
4073 4116  
4074 4117    enc_class FMul_ST_reg( eRegF src1 ) %{
4075 4118      // Operand was loaded from memory into fp ST (stack top)
4076 4119      // FMUL   ST,$src  /* D8 C8+i */
4077 4120      emit_opcode(cbuf, 0xD8);
4078 4121      emit_opcode(cbuf, 0xC8 + $src1$$reg);
4079 4122    %}
4080 4123  
4081 4124    enc_class FAdd_ST_reg( eRegF src2 ) %{
4082 4125      // FADDP  ST,src2  /* D8 C0+i */
4083 4126      emit_opcode(cbuf, 0xD8);
4084 4127      emit_opcode(cbuf, 0xC0 + $src2$$reg);
4085 4128      //could use FADDP  src2,fpST  /* DE C0+i */
4086 4129    %}
4087 4130  
4088 4131    enc_class FAddP_reg_ST( eRegF src2 ) %{
4089 4132      // FADDP  src2,ST  /* DE C0+i */
4090 4133      emit_opcode(cbuf, 0xDE);
4091 4134      emit_opcode(cbuf, 0xC0 + $src2$$reg);
4092 4135    %}
4093 4136  
4094 4137    enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
4095 4138      // Operand has been loaded into fp ST (stack top)
4096 4139        // FSUB   ST,$src1
4097 4140        emit_opcode(cbuf, 0xD8);
4098 4141        emit_opcode(cbuf, 0xE0 + $src1$$reg);
4099 4142  
4100 4143        // FDIV
4101 4144        emit_opcode(cbuf, 0xD8);
4102 4145        emit_opcode(cbuf, 0xF0 + $src2$$reg);
4103 4146    %}
4104 4147  
4105 4148    enc_class MulFAddF (eRegF src1, eRegF src2) %{
4106 4149      // Operand was loaded from memory into fp ST (stack top)
4107 4150      // FADD   ST,$src  /* D8 C0+i */
4108 4151      emit_opcode(cbuf, 0xD8);
4109 4152      emit_opcode(cbuf, 0xC0 + $src1$$reg);
4110 4153  
4111 4154      // FMUL  ST,src2  /* D8 C*+i */
4112 4155      emit_opcode(cbuf, 0xD8);
4113 4156      emit_opcode(cbuf, 0xC8 + $src2$$reg);
4114 4157    %}
4115 4158  
4116 4159  
4117 4160    enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
4118 4161      // Operand was loaded from memory into fp ST (stack top)
4119 4162      // FADD   ST,$src  /* D8 C0+i */
4120 4163      emit_opcode(cbuf, 0xD8);
4121 4164      emit_opcode(cbuf, 0xC0 + $src1$$reg);
4122 4165  
4123 4166      // FMULP  src2,ST  /* DE C8+i */
4124 4167      emit_opcode(cbuf, 0xDE);
4125 4168      emit_opcode(cbuf, 0xC8 + $src2$$reg);
4126 4169    %}
4127 4170  
4128 4171    // Atomically load the volatile long
4129 4172    enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
4130 4173      emit_opcode(cbuf,0xDF);
4131 4174      int rm_byte_opcode = 0x05;
4132 4175      int base     = $mem$$base;
4133 4176      int index    = $mem$$index;
4134 4177      int scale    = $mem$$scale;
4135 4178      int displace = $mem$$disp;
4136 4179      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4137 4180      encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4138 4181      store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4139 4182    %}
4140 4183  
4141 4184    enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4142 4185      { // Atomic long load
4143 4186        // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4144 4187        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4145 4188        emit_opcode(cbuf,0x0F);
4146 4189        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4147 4190        int base     = $mem$$base;
4148 4191        int index    = $mem$$index;
4149 4192        int scale    = $mem$$scale;
4150 4193        int displace = $mem$$disp;
4151 4194        bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4152 4195        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4153 4196      }
4154 4197      { // MOVSD $dst,$tmp ! atomic long store
4155 4198        emit_opcode(cbuf,0xF2);
4156 4199        emit_opcode(cbuf,0x0F);
4157 4200        emit_opcode(cbuf,0x11);
4158 4201        int base     = $dst$$base;
4159 4202        int index    = $dst$$index;
4160 4203        int scale    = $dst$$scale;
4161 4204        int displace = $dst$$disp;
4162 4205        bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4163 4206        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4164 4207      }
4165 4208    %}
4166 4209  
4167 4210    enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4168 4211      { // Atomic long load
4169 4212        // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4170 4213        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4171 4214        emit_opcode(cbuf,0x0F);
4172 4215        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4173 4216        int base     = $mem$$base;
4174 4217        int index    = $mem$$index;
4175 4218        int scale    = $mem$$scale;
4176 4219        int displace = $mem$$disp;
4177 4220        bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4178 4221        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4179 4222      }
4180 4223      { // MOVD $dst.lo,$tmp
4181 4224        emit_opcode(cbuf,0x66);
4182 4225        emit_opcode(cbuf,0x0F);
4183 4226        emit_opcode(cbuf,0x7E);
4184 4227        emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4185 4228      }
4186 4229      { // PSRLQ $tmp,32
4187 4230        emit_opcode(cbuf,0x66);
4188 4231        emit_opcode(cbuf,0x0F);
4189 4232        emit_opcode(cbuf,0x73);
4190 4233        emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4191 4234        emit_d8(cbuf, 0x20);
4192 4235      }
4193 4236      { // MOVD $dst.hi,$tmp
4194 4237        emit_opcode(cbuf,0x66);
4195 4238        emit_opcode(cbuf,0x0F);
4196 4239        emit_opcode(cbuf,0x7E);
4197 4240        emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4198 4241      }
4199 4242    %}
4200 4243  
4201 4244    // Volatile Store Long.  Must be atomic, so move it into
4202 4245    // the FP TOS and then do a 64-bit FIST.  Has to probe the
4203 4246    // target address before the store (for null-ptr checks)
4204 4247    // so the memory operand is used twice in the encoding.
4205 4248    enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
4206 4249      store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
4207 4250      cbuf.set_inst_mark();            // Mark start of FIST in case $mem has an oop
4208 4251      emit_opcode(cbuf,0xDF);
4209 4252      int rm_byte_opcode = 0x07;
4210 4253      int base     = $mem$$base;
4211 4254      int index    = $mem$$index;
4212 4255      int scale    = $mem$$scale;
4213 4256      int displace = $mem$$disp;
4214 4257      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4215 4258      encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4216 4259    %}
4217 4260  
4218 4261    enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4219 4262      { // Atomic long load
4220 4263        // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4221 4264        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4222 4265        emit_opcode(cbuf,0x0F);
4223 4266        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4224 4267        int base     = $src$$base;
4225 4268        int index    = $src$$index;
4226 4269        int scale    = $src$$scale;
4227 4270        int displace = $src$$disp;
4228 4271        bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4229 4272        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4230 4273      }
4231 4274      cbuf.set_inst_mark();            // Mark start of MOVSD in case $mem has an oop
4232 4275      { // MOVSD $mem,$tmp ! atomic long store
4233 4276        emit_opcode(cbuf,0xF2);
4234 4277        emit_opcode(cbuf,0x0F);
4235 4278        emit_opcode(cbuf,0x11);
4236 4279        int base     = $mem$$base;
4237 4280        int index    = $mem$$index;
4238 4281        int scale    = $mem$$scale;
4239 4282        int displace = $mem$$disp;
4240 4283        bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4241 4284        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4242 4285      }
4243 4286    %}
4244 4287  
4245 4288    enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4246 4289      { // MOVD $tmp,$src.lo
4247 4290        emit_opcode(cbuf,0x66);
4248 4291        emit_opcode(cbuf,0x0F);
4249 4292        emit_opcode(cbuf,0x6E);
4250 4293        emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4251 4294      }
4252 4295      { // MOVD $tmp2,$src.hi
4253 4296        emit_opcode(cbuf,0x66);
4254 4297        emit_opcode(cbuf,0x0F);
4255 4298        emit_opcode(cbuf,0x6E);
4256 4299        emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4257 4300      }
4258 4301      { // PUNPCKLDQ $tmp,$tmp2
4259 4302        emit_opcode(cbuf,0x66);
4260 4303        emit_opcode(cbuf,0x0F);
4261 4304        emit_opcode(cbuf,0x62);
4262 4305        emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4263 4306      }
4264 4307      cbuf.set_inst_mark();            // Mark start of MOVSD in case $mem has an oop
4265 4308      { // MOVSD $mem,$tmp ! atomic long store
4266 4309        emit_opcode(cbuf,0xF2);
4267 4310        emit_opcode(cbuf,0x0F);
4268 4311        emit_opcode(cbuf,0x11);
4269 4312        int base     = $mem$$base;
4270 4313        int index    = $mem$$index;
4271 4314        int scale    = $mem$$scale;
4272 4315        int displace = $mem$$disp;
4273 4316        bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4274 4317        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4275 4318      }
4276 4319    %}
4277 4320  
4278 4321    // Safepoint Poll.  This polls the safepoint page, and causes an
4279 4322    // exception if it is not readable. Unfortunately, it kills the condition code
4280 4323    // in the process
4281 4324    // We current use TESTL [spp],EDI
4282 4325    // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
4283 4326  
4284 4327    enc_class Safepoint_Poll() %{
4285 4328      cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0);
4286 4329      emit_opcode(cbuf,0x85);
4287 4330      emit_rm (cbuf, 0x0, 0x7, 0x5);
4288 4331      emit_d32(cbuf, (intptr_t)os::get_polling_page());
4289 4332    %}
4290 4333  %}
4291 4334  
4292 4335  
4293 4336  //----------FRAME--------------------------------------------------------------
4294 4337  // Definition of frame structure and management information.
4295 4338  //
4296 4339  //  S T A C K   L A Y O U T    Allocators stack-slot number
4297 4340  //                             |   (to get allocators register number
4298 4341  //  G  Owned by    |        |  v    add OptoReg::stack0())
4299 4342  //  r   CALLER     |        |
4300 4343  //  o     |        +--------+      pad to even-align allocators stack-slot
4301 4344  //  w     V        |  pad0  |        numbers; owned by CALLER
4302 4345  //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4303 4346  //  h     ^        |   in   |  5
4304 4347  //        |        |  args  |  4   Holes in incoming args owned by SELF
4305 4348  //  |     |        |        |  3
4306 4349  //  |     |        +--------+
4307 4350  //  V     |        | old out|      Empty on Intel, window on Sparc
4308 4351  //        |    old |preserve|      Must be even aligned.
4309 4352  //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4310 4353  //        |        |   in   |  3   area for Intel ret address
4311 4354  //     Owned by    |preserve|      Empty on Sparc.
4312 4355  //       SELF      +--------+
4313 4356  //        |        |  pad2  |  2   pad to align old SP
4314 4357  //        |        +--------+  1
4315 4358  //        |        | locks  |  0
4316 4359  //        |        +--------+----> OptoReg::stack0(), even aligned
4317 4360  //        |        |  pad1  | 11   pad to align new SP
4318 4361  //        |        +--------+
4319 4362  //        |        |        | 10
4320 4363  //        |        | spills |  9   spills
4321 4364  //        V        |        |  8   (pad0 slot for callee)
4322 4365  //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4323 4366  //        ^        |  out   |  7
4324 4367  //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4325 4368  //     Owned by    +--------+
4326 4369  //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4327 4370  //        |    new |preserve|      Must be even-aligned.
4328 4371  //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4329 4372  //        |        |        |
4330 4373  //
4331 4374  // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4332 4375  //         known from SELF's arguments and the Java calling convention.
4333 4376  //         Region 6-7 is determined per call site.
4334 4377  // Note 2: If the calling convention leaves holes in the incoming argument
4335 4378  //         area, those holes are owned by SELF.  Holes in the outgoing area
4336 4379  //         are owned by the CALLEE.  Holes should not be nessecary in the
4337 4380  //         incoming area, as the Java calling convention is completely under
4338 4381  //         the control of the AD file.  Doubles can be sorted and packed to
4339 4382  //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4340 4383  //         varargs C calling conventions.
4341 4384  // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4342 4385  //         even aligned with pad0 as needed.
4343 4386  //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4344 4387  //         region 6-11 is even aligned; it may be padded out more so that
4345 4388  //         the region from SP to FP meets the minimum stack alignment.
4346 4389  
4347 4390  frame %{
4348 4391    // What direction does stack grow in (assumed to be same for C & Java)
4349 4392    stack_direction(TOWARDS_LOW);
4350 4393  
4351 4394    // These three registers define part of the calling convention
4352 4395    // between compiled code and the interpreter.
4353 4396    inline_cache_reg(EAX);                // Inline Cache Register
4354 4397    interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
4355 4398  
4356 4399    // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
4357 4400    cisc_spilling_operand_name(indOffset32);
4358 4401  
4359 4402    // Number of stack slots consumed by locking an object
4360 4403    sync_stack_slots(1);
4361 4404  
4362 4405    // Compiled code's Frame Pointer
4363 4406    frame_pointer(ESP);
4364 4407    // Interpreter stores its frame pointer in a register which is
4365 4408    // stored to the stack by I2CAdaptors.
4366 4409    // I2CAdaptors convert from interpreted java to compiled java.
4367 4410    interpreter_frame_pointer(EBP);
4368 4411  
4369 4412    // Stack alignment requirement
4370 4413    // Alignment size in bytes (128-bit -> 16 bytes)
4371 4414    stack_alignment(StackAlignmentInBytes);
4372 4415  
4373 4416    // Number of stack slots between incoming argument block and the start of
4374 4417    // a new frame.  The PROLOG must add this many slots to the stack.  The
4375 4418    // EPILOG must remove this many slots.  Intel needs one slot for
4376 4419    // return address and one for rbp, (must save rbp)
4377 4420    in_preserve_stack_slots(2+VerifyStackAtCalls);
4378 4421  
4379 4422    // Number of outgoing stack slots killed above the out_preserve_stack_slots
4380 4423    // for calls to C.  Supports the var-args backing area for register parms.
4381 4424    varargs_C_out_slots_killed(0);
4382 4425  
4383 4426    // The after-PROLOG location of the return address.  Location of
4384 4427    // return address specifies a type (REG or STACK) and a number
4385 4428    // representing the register number (i.e. - use a register name) or
4386 4429    // stack slot.
4387 4430    // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4388 4431    // Otherwise, it is above the locks and verification slot and alignment word
4389 4432    return_addr(STACK - 1 +
4390 4433                round_to(1+VerifyStackAtCalls+
4391 4434                Compile::current()->fixed_slots(),
4392 4435                (StackAlignmentInBytes/wordSize)));
4393 4436  
4394 4437    // Body of function which returns an integer array locating
4395 4438    // arguments either in registers or in stack slots.  Passed an array
4396 4439    // of ideal registers called "sig" and a "length" count.  Stack-slot
4397 4440    // offsets are based on outgoing arguments, i.e. a CALLER setting up
4398 4441    // arguments for a CALLEE.  Incoming stack arguments are
4399 4442    // automatically biased by the preserve_stack_slots field above.
4400 4443    calling_convention %{
4401 4444      // No difference between ingoing/outgoing just pass false
4402 4445      SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4403 4446    %}
4404 4447  
4405 4448  
4406 4449    // Body of function which returns an integer array locating
4407 4450    // arguments either in registers or in stack slots.  Passed an array
4408 4451    // of ideal registers called "sig" and a "length" count.  Stack-slot
4409 4452    // offsets are based on outgoing arguments, i.e. a CALLER setting up
4410 4453    // arguments for a CALLEE.  Incoming stack arguments are
4411 4454    // automatically biased by the preserve_stack_slots field above.
4412 4455    c_calling_convention %{
4413 4456      // This is obviously always outgoing
4414 4457      (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4415 4458    %}
4416 4459  
4417 4460    // Location of C & interpreter return values
4418 4461    c_return_value %{
4419 4462      assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4420 4463      static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4421 4464      static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4422 4465  
4423 4466      // in SSE2+ mode we want to keep the FPU stack clean so pretend
4424 4467      // that C functions return float and double results in XMM0.
4425 4468      if( ideal_reg == Op_RegD && UseSSE>=2 )
4426 4469        return OptoRegPair(XMM0b_num,XMM0a_num);
4427 4470      if( ideal_reg == Op_RegF && UseSSE>=2 )
4428 4471        return OptoRegPair(OptoReg::Bad,XMM0a_num);
4429 4472  
4430 4473      return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4431 4474    %}
4432 4475  
4433 4476    // Location of return values
4434 4477    return_value %{
4435 4478      assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4436 4479      static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4437 4480      static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4438 4481      if( ideal_reg == Op_RegD && UseSSE>=2 )
4439 4482        return OptoRegPair(XMM0b_num,XMM0a_num);
4440 4483      if( ideal_reg == Op_RegF && UseSSE>=1 )
4441 4484        return OptoRegPair(OptoReg::Bad,XMM0a_num);
4442 4485      return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4443 4486    %}
4444 4487  
4445 4488  %}
4446 4489  
4447 4490  //----------ATTRIBUTES---------------------------------------------------------
4448 4491  //----------Operand Attributes-------------------------------------------------
4449 4492  op_attrib op_cost(0);        // Required cost attribute
4450 4493  
4451 4494  //----------Instruction Attributes---------------------------------------------
4452 4495  ins_attrib ins_cost(100);       // Required cost attribute
4453 4496  ins_attrib ins_size(8);         // Required size attribute (in bits)
4454 4497  ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4455 4498  ins_attrib ins_short_branch(0); // Required flag: is this instruction a
4456 4499                                  // non-matching short branch variant of some
4457 4500                                                              // long branch?
4458 4501  ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
4459 4502                                  // specifies the alignment that some part of the instruction (not
4460 4503                                  // necessarily the start) requires.  If > 1, a compute_padding()
4461 4504                                  // function must be provided for the instruction
4462 4505  
4463 4506  //----------OPERANDS-----------------------------------------------------------
4464 4507  // Operand definitions must precede instruction definitions for correct parsing
4465 4508  // in the ADLC because operands constitute user defined types which are used in
4466 4509  // instruction definitions.
4467 4510  
4468 4511  //----------Simple Operands----------------------------------------------------
4469 4512  // Immediate Operands
4470 4513  // Integer Immediate
4471 4514  operand immI() %{
4472 4515    match(ConI);
4473 4516  
4474 4517    op_cost(10);
4475 4518    format %{ %}
4476 4519    interface(CONST_INTER);
4477 4520  %}
4478 4521  
4479 4522  // Constant for test vs zero
4480 4523  operand immI0() %{
4481 4524    predicate(n->get_int() == 0);
4482 4525    match(ConI);
4483 4526  
4484 4527    op_cost(0);
4485 4528    format %{ %}
4486 4529    interface(CONST_INTER);
4487 4530  %}
4488 4531  
4489 4532  // Constant for increment
4490 4533  operand immI1() %{
4491 4534    predicate(n->get_int() == 1);
4492 4535    match(ConI);
4493 4536  
4494 4537    op_cost(0);
4495 4538    format %{ %}
4496 4539    interface(CONST_INTER);
4497 4540  %}
4498 4541  
4499 4542  // Constant for decrement
4500 4543  operand immI_M1() %{
4501 4544    predicate(n->get_int() == -1);
4502 4545    match(ConI);
4503 4546  
4504 4547    op_cost(0);
4505 4548    format %{ %}
4506 4549    interface(CONST_INTER);
4507 4550  %}
4508 4551  
4509 4552  // Valid scale values for addressing modes
4510 4553  operand immI2() %{
4511 4554    predicate(0 <= n->get_int() && (n->get_int() <= 3));
4512 4555    match(ConI);
4513 4556  
4514 4557    format %{ %}
4515 4558    interface(CONST_INTER);
4516 4559  %}
4517 4560  
4518 4561  operand immI8() %{
4519 4562    predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
4520 4563    match(ConI);
4521 4564  
4522 4565    op_cost(5);
4523 4566    format %{ %}
4524 4567    interface(CONST_INTER);
4525 4568  %}
4526 4569  
4527 4570  operand immI16() %{
4528 4571    predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4529 4572    match(ConI);
4530 4573  
4531 4574    op_cost(10);
4532 4575    format %{ %}
4533 4576    interface(CONST_INTER);
4534 4577  %}
4535 4578  
4536 4579  // Constant for long shifts
4537 4580  operand immI_32() %{
4538 4581    predicate( n->get_int() == 32 );
4539 4582    match(ConI);
4540 4583  
4541 4584    op_cost(0);
4542 4585    format %{ %}
4543 4586    interface(CONST_INTER);
4544 4587  %}
4545 4588  
4546 4589  operand immI_1_31() %{
4547 4590    predicate( n->get_int() >= 1 && n->get_int() <= 31 );
4548 4591    match(ConI);
4549 4592  
4550 4593    op_cost(0);
4551 4594    format %{ %}
4552 4595    interface(CONST_INTER);
4553 4596  %}
4554 4597  
4555 4598  operand immI_32_63() %{
4556 4599    predicate( n->get_int() >= 32 && n->get_int() <= 63 );
4557 4600    match(ConI);
4558 4601    op_cost(0);
4559 4602  
4560 4603    format %{ %}
4561 4604    interface(CONST_INTER);
4562 4605  %}
4563 4606  
4564 4607  operand immI_1() %{
4565 4608    predicate( n->get_int() == 1 );
4566 4609    match(ConI);
4567 4610  
4568 4611    op_cost(0);
4569 4612    format %{ %}
4570 4613    interface(CONST_INTER);
4571 4614  %}
4572 4615  
4573 4616  operand immI_2() %{
4574 4617    predicate( n->get_int() == 2 );
4575 4618    match(ConI);
4576 4619  
4577 4620    op_cost(0);
4578 4621    format %{ %}
4579 4622    interface(CONST_INTER);
4580 4623  %}
4581 4624  
4582 4625  operand immI_3() %{
4583 4626    predicate( n->get_int() == 3 );
4584 4627    match(ConI);
4585 4628  
4586 4629    op_cost(0);
4587 4630    format %{ %}
4588 4631    interface(CONST_INTER);
4589 4632  %}
4590 4633  
4591 4634  // Pointer Immediate
4592 4635  operand immP() %{
4593 4636    match(ConP);
4594 4637  
4595 4638    op_cost(10);
4596 4639    format %{ %}
4597 4640    interface(CONST_INTER);
4598 4641  %}
4599 4642  
4600 4643  // NULL Pointer Immediate
4601 4644  operand immP0() %{
4602 4645    predicate( n->get_ptr() == 0 );
4603 4646    match(ConP);
4604 4647    op_cost(0);
4605 4648  
4606 4649    format %{ %}
4607 4650    interface(CONST_INTER);
4608 4651  %}
4609 4652  
4610 4653  // Long Immediate
4611 4654  operand immL() %{
4612 4655    match(ConL);
4613 4656  
4614 4657    op_cost(20);
4615 4658    format %{ %}
4616 4659    interface(CONST_INTER);
4617 4660  %}
4618 4661  
4619 4662  // Long Immediate zero
4620 4663  operand immL0() %{
4621 4664    predicate( n->get_long() == 0L );
4622 4665    match(ConL);
4623 4666    op_cost(0);
4624 4667  
4625 4668    format %{ %}
4626 4669    interface(CONST_INTER);
4627 4670  %}
4628 4671  
4629 4672  // Long Immediate zero
4630 4673  operand immL_M1() %{
4631 4674    predicate( n->get_long() == -1L );
4632 4675    match(ConL);
4633 4676    op_cost(0);
4634 4677  
4635 4678    format %{ %}
4636 4679    interface(CONST_INTER);
4637 4680  %}
4638 4681  
4639 4682  // Long immediate from 0 to 127.
4640 4683  // Used for a shorter form of long mul by 10.
4641 4684  operand immL_127() %{
4642 4685    predicate((0 <= n->get_long()) && (n->get_long() <= 127));
4643 4686    match(ConL);
4644 4687    op_cost(0);
4645 4688  
4646 4689    format %{ %}
4647 4690    interface(CONST_INTER);
4648 4691  %}
4649 4692  
4650 4693  // Long Immediate: low 32-bit mask
4651 4694  operand immL_32bits() %{
4652 4695    predicate(n->get_long() == 0xFFFFFFFFL);
4653 4696    match(ConL);
4654 4697    op_cost(0);
4655 4698  
4656 4699    format %{ %}
4657 4700    interface(CONST_INTER);
4658 4701  %}
4659 4702  
4660 4703  // Long Immediate: low 32-bit mask
4661 4704  operand immL32() %{
4662 4705    predicate(n->get_long() == (int)(n->get_long()));
4663 4706    match(ConL);
4664 4707    op_cost(20);
4665 4708  
4666 4709    format %{ %}
4667 4710    interface(CONST_INTER);
4668 4711  %}
4669 4712  
4670 4713  //Double Immediate zero
4671 4714  operand immD0() %{
4672 4715    // Do additional (and counter-intuitive) test against NaN to work around VC++
4673 4716    // bug that generates code such that NaNs compare equal to 0.0
4674 4717    predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4675 4718    match(ConD);
4676 4719  
4677 4720    op_cost(5);
4678 4721    format %{ %}
4679 4722    interface(CONST_INTER);
4680 4723  %}
4681 4724  
4682 4725  // Double Immediate
4683 4726  operand immD1() %{
4684 4727    predicate( UseSSE<=1 && n->getd() == 1.0 );
4685 4728    match(ConD);
4686 4729  
4687 4730    op_cost(5);
4688 4731    format %{ %}
4689 4732    interface(CONST_INTER);
4690 4733  %}
4691 4734  
4692 4735  // Double Immediate
4693 4736  operand immD() %{
4694 4737    predicate(UseSSE<=1);
4695 4738    match(ConD);
4696 4739  
4697 4740    op_cost(5);
4698 4741    format %{ %}
4699 4742    interface(CONST_INTER);
4700 4743  %}
4701 4744  
4702 4745  operand immXD() %{
4703 4746    predicate(UseSSE>=2);
4704 4747    match(ConD);
4705 4748  
4706 4749    op_cost(5);
4707 4750    format %{ %}
4708 4751    interface(CONST_INTER);
4709 4752  %}
4710 4753  
4711 4754  // Double Immediate zero
4712 4755  operand immXD0() %{
4713 4756    // Do additional (and counter-intuitive) test against NaN to work around VC++
4714 4757    // bug that generates code such that NaNs compare equal to 0.0 AND do not
4715 4758    // compare equal to -0.0.
4716 4759    predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4717 4760    match(ConD);
4718 4761  
4719 4762    format %{ %}
4720 4763    interface(CONST_INTER);
4721 4764  %}
4722 4765  
4723 4766  // Float Immediate zero
4724 4767  operand immF0() %{
4725 4768    predicate( UseSSE == 0 && n->getf() == 0.0 );
4726 4769    match(ConF);
4727 4770  
4728 4771    op_cost(5);
4729 4772    format %{ %}
4730 4773    interface(CONST_INTER);
4731 4774  %}
4732 4775  
4733 4776  // Float Immediate
4734 4777  operand immF() %{
4735 4778    predicate( UseSSE == 0 );
4736 4779    match(ConF);
4737 4780  
4738 4781    op_cost(5);
4739 4782    format %{ %}
4740 4783    interface(CONST_INTER);
4741 4784  %}
4742 4785  
4743 4786  // Float Immediate
4744 4787  operand immXF() %{
4745 4788    predicate(UseSSE >= 1);
4746 4789    match(ConF);
4747 4790  
4748 4791    op_cost(5);
4749 4792    format %{ %}
4750 4793    interface(CONST_INTER);
4751 4794  %}
4752 4795  
4753 4796  // Float Immediate zero.  Zero and not -0.0
4754 4797  operand immXF0() %{
4755 4798    predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4756 4799    match(ConF);
4757 4800  
4758 4801    op_cost(5);
4759 4802    format %{ %}
4760 4803    interface(CONST_INTER);
4761 4804  %}
4762 4805  
4763 4806  // Immediates for special shifts (sign extend)
4764 4807  
4765 4808  // Constants for increment
4766 4809  operand immI_16() %{
4767 4810    predicate( n->get_int() == 16 );
4768 4811    match(ConI);
4769 4812  
4770 4813    format %{ %}
4771 4814    interface(CONST_INTER);
4772 4815  %}
4773 4816  
4774 4817  operand immI_24() %{
4775 4818    predicate( n->get_int() == 24 );
4776 4819    match(ConI);
4777 4820  
4778 4821    format %{ %}
4779 4822    interface(CONST_INTER);
4780 4823  %}
4781 4824  
4782 4825  // Constant for byte-wide masking
4783 4826  operand immI_255() %{
4784 4827    predicate( n->get_int() == 255 );
4785 4828    match(ConI);
4786 4829  
4787 4830    format %{ %}
4788 4831    interface(CONST_INTER);
4789 4832  %}
4790 4833  
4791 4834  // Constant for short-wide masking
4792 4835  operand immI_65535() %{
4793 4836    predicate(n->get_int() == 65535);
4794 4837    match(ConI);
4795 4838  
4796 4839    format %{ %}
4797 4840    interface(CONST_INTER);
4798 4841  %}
4799 4842  
4800 4843  // Register Operands
4801 4844  // Integer Register
4802 4845  operand eRegI() %{
4803 4846    constraint(ALLOC_IN_RC(e_reg));
4804 4847    match(RegI);
4805 4848    match(xRegI);
4806 4849    match(eAXRegI);
4807 4850    match(eBXRegI);
4808 4851    match(eCXRegI);
4809 4852    match(eDXRegI);
4810 4853    match(eDIRegI);
4811 4854    match(eSIRegI);
4812 4855  
4813 4856    format %{ %}
4814 4857    interface(REG_INTER);
4815 4858  %}
4816 4859  
4817 4860  // Subset of Integer Register
4818 4861  operand xRegI(eRegI reg) %{
4819 4862    constraint(ALLOC_IN_RC(x_reg));
4820 4863    match(reg);
4821 4864    match(eAXRegI);
4822 4865    match(eBXRegI);
4823 4866    match(eCXRegI);
4824 4867    match(eDXRegI);
4825 4868  
4826 4869    format %{ %}
4827 4870    interface(REG_INTER);
4828 4871  %}
4829 4872  
4830 4873  // Special Registers
4831 4874  operand eAXRegI(xRegI reg) %{
4832 4875    constraint(ALLOC_IN_RC(eax_reg));
4833 4876    match(reg);
4834 4877    match(eRegI);
4835 4878  
4836 4879    format %{ "EAX" %}
4837 4880    interface(REG_INTER);
4838 4881  %}
4839 4882  
4840 4883  // Special Registers
4841 4884  operand eBXRegI(xRegI reg) %{
4842 4885    constraint(ALLOC_IN_RC(ebx_reg));
4843 4886    match(reg);
4844 4887    match(eRegI);
4845 4888  
4846 4889    format %{ "EBX" %}
4847 4890    interface(REG_INTER);
4848 4891  %}
4849 4892  
4850 4893  operand eCXRegI(xRegI reg) %{
4851 4894    constraint(ALLOC_IN_RC(ecx_reg));
4852 4895    match(reg);
4853 4896    match(eRegI);
4854 4897  
4855 4898    format %{ "ECX" %}
4856 4899    interface(REG_INTER);
4857 4900  %}
4858 4901  
4859 4902  operand eDXRegI(xRegI reg) %{
4860 4903    constraint(ALLOC_IN_RC(edx_reg));
4861 4904    match(reg);
4862 4905    match(eRegI);
4863 4906  
4864 4907    format %{ "EDX" %}
4865 4908    interface(REG_INTER);
4866 4909  %}
4867 4910  
4868 4911  operand eDIRegI(xRegI reg) %{
4869 4912    constraint(ALLOC_IN_RC(edi_reg));
4870 4913    match(reg);
4871 4914    match(eRegI);
4872 4915  
4873 4916    format %{ "EDI" %}
4874 4917    interface(REG_INTER);
4875 4918  %}
4876 4919  
4877 4920  operand naxRegI() %{
4878 4921    constraint(ALLOC_IN_RC(nax_reg));
4879 4922    match(RegI);
4880 4923    match(eCXRegI);
4881 4924    match(eDXRegI);
4882 4925    match(eSIRegI);
4883 4926    match(eDIRegI);
4884 4927  
4885 4928    format %{ %}
4886 4929    interface(REG_INTER);
4887 4930  %}
4888 4931  
4889 4932  operand nadxRegI() %{
4890 4933    constraint(ALLOC_IN_RC(nadx_reg));
4891 4934    match(RegI);
4892 4935    match(eBXRegI);
4893 4936    match(eCXRegI);
4894 4937    match(eSIRegI);
4895 4938    match(eDIRegI);
4896 4939  
4897 4940    format %{ %}
4898 4941    interface(REG_INTER);
4899 4942  %}
4900 4943  
4901 4944  operand ncxRegI() %{
4902 4945    constraint(ALLOC_IN_RC(ncx_reg));
4903 4946    match(RegI);
4904 4947    match(eAXRegI);
4905 4948    match(eDXRegI);
4906 4949    match(eSIRegI);
4907 4950    match(eDIRegI);
4908 4951  
4909 4952    format %{ %}
4910 4953    interface(REG_INTER);
4911 4954  %}
4912 4955  
4913 4956  // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
4914 4957  // //
4915 4958  operand eSIRegI(xRegI reg) %{
4916 4959     constraint(ALLOC_IN_RC(esi_reg));
4917 4960     match(reg);
4918 4961     match(eRegI);
4919 4962  
4920 4963     format %{ "ESI" %}
4921 4964     interface(REG_INTER);
4922 4965  %}
4923 4966  
4924 4967  // Pointer Register
4925 4968  operand anyRegP() %{
4926 4969    constraint(ALLOC_IN_RC(any_reg));
4927 4970    match(RegP);
4928 4971    match(eAXRegP);
4929 4972    match(eBXRegP);
4930 4973    match(eCXRegP);
4931 4974    match(eDIRegP);
4932 4975    match(eRegP);
4933 4976  
4934 4977    format %{ %}
4935 4978    interface(REG_INTER);
4936 4979  %}
4937 4980  
4938 4981  operand eRegP() %{
4939 4982    constraint(ALLOC_IN_RC(e_reg));
4940 4983    match(RegP);
4941 4984    match(eAXRegP);
4942 4985    match(eBXRegP);
4943 4986    match(eCXRegP);
4944 4987    match(eDIRegP);
4945 4988  
4946 4989    format %{ %}
4947 4990    interface(REG_INTER);
4948 4991  %}
4949 4992  
4950 4993  // On windows95, EBP is not safe to use for implicit null tests.
4951 4994  operand eRegP_no_EBP() %{
4952 4995    constraint(ALLOC_IN_RC(e_reg_no_rbp));
4953 4996    match(RegP);
4954 4997    match(eAXRegP);
4955 4998    match(eBXRegP);
4956 4999    match(eCXRegP);
4957 5000    match(eDIRegP);
4958 5001  
4959 5002    op_cost(100);
4960 5003    format %{ %}
4961 5004    interface(REG_INTER);
4962 5005  %}
4963 5006  
4964 5007  operand naxRegP() %{
4965 5008    constraint(ALLOC_IN_RC(nax_reg));
4966 5009    match(RegP);
4967 5010    match(eBXRegP);
4968 5011    match(eDXRegP);
4969 5012    match(eCXRegP);
4970 5013    match(eSIRegP);
4971 5014    match(eDIRegP);
4972 5015  
4973 5016    format %{ %}
4974 5017    interface(REG_INTER);
4975 5018  %}
4976 5019  
4977 5020  operand nabxRegP() %{
4978 5021    constraint(ALLOC_IN_RC(nabx_reg));
4979 5022    match(RegP);
4980 5023    match(eCXRegP);
4981 5024    match(eDXRegP);
4982 5025    match(eSIRegP);
4983 5026    match(eDIRegP);
4984 5027  
4985 5028    format %{ %}
4986 5029    interface(REG_INTER);
4987 5030  %}
4988 5031  
4989 5032  operand pRegP() %{
4990 5033    constraint(ALLOC_IN_RC(p_reg));
4991 5034    match(RegP);
4992 5035    match(eBXRegP);
4993 5036    match(eDXRegP);
4994 5037    match(eSIRegP);
4995 5038    match(eDIRegP);
4996 5039  
4997 5040    format %{ %}
4998 5041    interface(REG_INTER);
4999 5042  %}
5000 5043  
5001 5044  // Special Registers
5002 5045  // Return a pointer value
5003 5046  operand eAXRegP(eRegP reg) %{
5004 5047    constraint(ALLOC_IN_RC(eax_reg));
5005 5048    match(reg);
5006 5049    format %{ "EAX" %}
5007 5050    interface(REG_INTER);
5008 5051  %}
5009 5052  
5010 5053  // Used in AtomicAdd
5011 5054  operand eBXRegP(eRegP reg) %{
5012 5055    constraint(ALLOC_IN_RC(ebx_reg));
5013 5056    match(reg);
5014 5057    format %{ "EBX" %}
5015 5058    interface(REG_INTER);
5016 5059  %}
5017 5060  
5018 5061  // Tail-call (interprocedural jump) to interpreter
5019 5062  operand eCXRegP(eRegP reg) %{
5020 5063    constraint(ALLOC_IN_RC(ecx_reg));
5021 5064    match(reg);
5022 5065    format %{ "ECX" %}
5023 5066    interface(REG_INTER);
5024 5067  %}
5025 5068  
5026 5069  operand eSIRegP(eRegP reg) %{
5027 5070    constraint(ALLOC_IN_RC(esi_reg));
5028 5071    match(reg);
5029 5072    format %{ "ESI" %}
5030 5073    interface(REG_INTER);
5031 5074  %}
5032 5075  
5033 5076  // Used in rep stosw
5034 5077  operand eDIRegP(eRegP reg) %{
5035 5078    constraint(ALLOC_IN_RC(edi_reg));
5036 5079    match(reg);
5037 5080    format %{ "EDI" %}
5038 5081    interface(REG_INTER);
5039 5082  %}
5040 5083  
5041 5084  operand eBPRegP() %{
5042 5085    constraint(ALLOC_IN_RC(ebp_reg));
5043 5086    match(RegP);
5044 5087    format %{ "EBP" %}
5045 5088    interface(REG_INTER);
5046 5089  %}
5047 5090  
5048 5091  operand eRegL() %{
5049 5092    constraint(ALLOC_IN_RC(long_reg));
5050 5093    match(RegL);
5051 5094    match(eADXRegL);
5052 5095  
5053 5096    format %{ %}
5054 5097    interface(REG_INTER);
5055 5098  %}
5056 5099  
5057 5100  operand eADXRegL( eRegL reg ) %{
5058 5101    constraint(ALLOC_IN_RC(eadx_reg));
5059 5102    match(reg);
5060 5103  
5061 5104    format %{ "EDX:EAX" %}
5062 5105    interface(REG_INTER);
5063 5106  %}
5064 5107  
5065 5108  operand eBCXRegL( eRegL reg ) %{
5066 5109    constraint(ALLOC_IN_RC(ebcx_reg));
5067 5110    match(reg);
5068 5111  
5069 5112    format %{ "EBX:ECX" %}
5070 5113    interface(REG_INTER);
5071 5114  %}
5072 5115  
5073 5116  // Special case for integer high multiply
5074 5117  operand eADXRegL_low_only() %{
5075 5118    constraint(ALLOC_IN_RC(eadx_reg));
5076 5119    match(RegL);
5077 5120  
5078 5121    format %{ "EAX" %}
5079 5122    interface(REG_INTER);
5080 5123  %}
5081 5124  
5082 5125  // Flags register, used as output of compare instructions
5083 5126  operand eFlagsReg() %{
5084 5127    constraint(ALLOC_IN_RC(int_flags));
5085 5128    match(RegFlags);
5086 5129  
5087 5130    format %{ "EFLAGS" %}
5088 5131    interface(REG_INTER);
5089 5132  %}
5090 5133  
5091 5134  // Flags register, used as output of FLOATING POINT compare instructions
5092 5135  operand eFlagsRegU() %{
5093 5136    constraint(ALLOC_IN_RC(int_flags));
5094 5137    match(RegFlags);
5095 5138  
5096 5139    format %{ "EFLAGS_U" %}
5097 5140    interface(REG_INTER);
5098 5141  %}
5099 5142  
5100 5143  operand eFlagsRegUCF() %{
5101 5144    constraint(ALLOC_IN_RC(int_flags));
5102 5145    match(RegFlags);
5103 5146    predicate(false);
5104 5147  
5105 5148    format %{ "EFLAGS_U_CF" %}
5106 5149    interface(REG_INTER);
5107 5150  %}
5108 5151  
5109 5152  // Condition Code Register used by long compare
5110 5153  operand flagsReg_long_LTGE() %{
5111 5154    constraint(ALLOC_IN_RC(int_flags));
5112 5155    match(RegFlags);
5113 5156    format %{ "FLAGS_LTGE" %}
5114 5157    interface(REG_INTER);
5115 5158  %}
5116 5159  operand flagsReg_long_EQNE() %{
5117 5160    constraint(ALLOC_IN_RC(int_flags));
5118 5161    match(RegFlags);
5119 5162    format %{ "FLAGS_EQNE" %}
5120 5163    interface(REG_INTER);
5121 5164  %}
5122 5165  operand flagsReg_long_LEGT() %{
5123 5166    constraint(ALLOC_IN_RC(int_flags));
5124 5167    match(RegFlags);
5125 5168    format %{ "FLAGS_LEGT" %}
5126 5169    interface(REG_INTER);
5127 5170  %}
5128 5171  
5129 5172  // Float register operands
5130 5173  operand regD() %{
5131 5174    predicate( UseSSE < 2 );
5132 5175    constraint(ALLOC_IN_RC(dbl_reg));
5133 5176    match(RegD);
5134 5177    match(regDPR1);
5135 5178    match(regDPR2);
5136 5179    format %{ %}
5137 5180    interface(REG_INTER);
5138 5181  %}
5139 5182  
5140 5183  operand regDPR1(regD reg) %{
5141 5184    predicate( UseSSE < 2 );
5142 5185    constraint(ALLOC_IN_RC(dbl_reg0));
5143 5186    match(reg);
5144 5187    format %{ "FPR1" %}
5145 5188    interface(REG_INTER);
5146 5189  %}
5147 5190  
5148 5191  operand regDPR2(regD reg) %{
5149 5192    predicate( UseSSE < 2 );
5150 5193    constraint(ALLOC_IN_RC(dbl_reg1));
5151 5194    match(reg);
5152 5195    format %{ "FPR2" %}
5153 5196    interface(REG_INTER);
5154 5197  %}
5155 5198  
5156 5199  operand regnotDPR1(regD reg) %{
5157 5200    predicate( UseSSE < 2 );
5158 5201    constraint(ALLOC_IN_RC(dbl_notreg0));
5159 5202    match(reg);
5160 5203    format %{ %}
5161 5204    interface(REG_INTER);
5162 5205  %}
5163 5206  
5164 5207  // XMM Double register operands
5165 5208  operand regXD() %{
5166 5209    predicate( UseSSE>=2 );
5167 5210    constraint(ALLOC_IN_RC(xdb_reg));
5168 5211    match(RegD);
5169 5212    match(regXD6);
5170 5213    match(regXD7);
5171 5214    format %{ %}
5172 5215    interface(REG_INTER);
5173 5216  %}
5174 5217  
5175 5218  // XMM6 double register operands
5176 5219  operand regXD6(regXD reg) %{
5177 5220    predicate( UseSSE>=2 );
5178 5221    constraint(ALLOC_IN_RC(xdb_reg6));
5179 5222    match(reg);
5180 5223    format %{ "XMM6" %}
5181 5224    interface(REG_INTER);
5182 5225  %}
5183 5226  
5184 5227  // XMM7 double register operands
5185 5228  operand regXD7(regXD reg) %{
5186 5229    predicate( UseSSE>=2 );
5187 5230    constraint(ALLOC_IN_RC(xdb_reg7));
5188 5231    match(reg);
5189 5232    format %{ "XMM7" %}
5190 5233    interface(REG_INTER);
5191 5234  %}
5192 5235  
5193 5236  // Float register operands
5194 5237  operand regF() %{
5195 5238    predicate( UseSSE < 2 );
5196 5239    constraint(ALLOC_IN_RC(flt_reg));
5197 5240    match(RegF);
5198 5241    match(regFPR1);
5199 5242    format %{ %}
5200 5243    interface(REG_INTER);
5201 5244  %}
5202 5245  
5203 5246  // Float register operands
5204 5247  operand regFPR1(regF reg) %{
5205 5248    predicate( UseSSE < 2 );
5206 5249    constraint(ALLOC_IN_RC(flt_reg0));
5207 5250    match(reg);
5208 5251    format %{ "FPR1" %}
5209 5252    interface(REG_INTER);
5210 5253  %}
5211 5254  
5212 5255  // XMM register operands
5213 5256  operand regX() %{
5214 5257    predicate( UseSSE>=1 );
5215 5258    constraint(ALLOC_IN_RC(xmm_reg));
5216 5259    match(RegF);
5217 5260    format %{ %}
5218 5261    interface(REG_INTER);
5219 5262  %}
5220 5263  
5221 5264  
5222 5265  //----------Memory Operands----------------------------------------------------
5223 5266  // Direct Memory Operand
5224 5267  operand direct(immP addr) %{
5225 5268    match(addr);
5226 5269  
5227 5270    format %{ "[$addr]" %}
5228 5271    interface(MEMORY_INTER) %{
5229 5272      base(0xFFFFFFFF);
5230 5273      index(0x4);
5231 5274      scale(0x0);
5232 5275      disp($addr);
5233 5276    %}
5234 5277  %}
5235 5278  
5236 5279  // Indirect Memory Operand
5237 5280  operand indirect(eRegP reg) %{
5238 5281    constraint(ALLOC_IN_RC(e_reg));
5239 5282    match(reg);
5240 5283  
5241 5284    format %{ "[$reg]" %}
5242 5285    interface(MEMORY_INTER) %{
5243 5286      base($reg);
5244 5287      index(0x4);
5245 5288      scale(0x0);
5246 5289      disp(0x0);
5247 5290    %}
5248 5291  %}
5249 5292  
5250 5293  // Indirect Memory Plus Short Offset Operand
5251 5294  operand indOffset8(eRegP reg, immI8 off) %{
5252 5295    match(AddP reg off);
5253 5296  
5254 5297    format %{ "[$reg + $off]" %}
5255 5298    interface(MEMORY_INTER) %{
5256 5299      base($reg);
5257 5300      index(0x4);
5258 5301      scale(0x0);
5259 5302      disp($off);
5260 5303    %}
5261 5304  %}
5262 5305  
5263 5306  // Indirect Memory Plus Long Offset Operand
5264 5307  operand indOffset32(eRegP reg, immI off) %{
5265 5308    match(AddP reg off);
5266 5309  
5267 5310    format %{ "[$reg + $off]" %}
5268 5311    interface(MEMORY_INTER) %{
5269 5312      base($reg);
5270 5313      index(0x4);
5271 5314      scale(0x0);
5272 5315      disp($off);
5273 5316    %}
5274 5317  %}
5275 5318  
5276 5319  // Indirect Memory Plus Long Offset Operand
5277 5320  operand indOffset32X(eRegI reg, immP off) %{
5278 5321    match(AddP off reg);
5279 5322  
5280 5323    format %{ "[$reg + $off]" %}
5281 5324    interface(MEMORY_INTER) %{
5282 5325      base($reg);
5283 5326      index(0x4);
5284 5327      scale(0x0);
5285 5328      disp($off);
5286 5329    %}
5287 5330  %}
5288 5331  
5289 5332  // Indirect Memory Plus Index Register Plus Offset Operand
5290 5333  operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
5291 5334    match(AddP (AddP reg ireg) off);
5292 5335  
5293 5336    op_cost(10);
5294 5337    format %{"[$reg + $off + $ireg]" %}
5295 5338    interface(MEMORY_INTER) %{
5296 5339      base($reg);
5297 5340      index($ireg);
5298 5341      scale(0x0);
5299 5342      disp($off);
5300 5343    %}
5301 5344  %}
5302 5345  
5303 5346  // Indirect Memory Plus Index Register Plus Offset Operand
5304 5347  operand indIndex(eRegP reg, eRegI ireg) %{
5305 5348    match(AddP reg ireg);
5306 5349  
5307 5350    op_cost(10);
5308 5351    format %{"[$reg + $ireg]" %}
5309 5352    interface(MEMORY_INTER) %{
5310 5353      base($reg);
5311 5354      index($ireg);
5312 5355      scale(0x0);
5313 5356      disp(0x0);
5314 5357    %}
5315 5358  %}
5316 5359  
5317 5360  // // -------------------------------------------------------------------------
5318 5361  // // 486 architecture doesn't support "scale * index + offset" with out a base
5319 5362  // // -------------------------------------------------------------------------
5320 5363  // // Scaled Memory Operands
5321 5364  // // Indirect Memory Times Scale Plus Offset Operand
5322 5365  // operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
5323 5366  //   match(AddP off (LShiftI ireg scale));
5324 5367  //
5325 5368  //   op_cost(10);
5326 5369  //   format %{"[$off + $ireg << $scale]" %}
5327 5370  //   interface(MEMORY_INTER) %{
5328 5371  //     base(0x4);
5329 5372  //     index($ireg);
5330 5373  //     scale($scale);
5331 5374  //     disp($off);
5332 5375  //   %}
5333 5376  // %}
5334 5377  
5335 5378  // Indirect Memory Times Scale Plus Index Register
5336 5379  operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
5337 5380    match(AddP reg (LShiftI ireg scale));
5338 5381  
5339 5382    op_cost(10);
5340 5383    format %{"[$reg + $ireg << $scale]" %}
5341 5384    interface(MEMORY_INTER) %{
5342 5385      base($reg);
5343 5386      index($ireg);
5344 5387      scale($scale);
5345 5388      disp(0x0);
5346 5389    %}
5347 5390  %}
5348 5391  
5349 5392  // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5350 5393  operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
5351 5394    match(AddP (AddP reg (LShiftI ireg scale)) off);
5352 5395  
5353 5396    op_cost(10);
5354 5397    format %{"[$reg + $off + $ireg << $scale]" %}
5355 5398    interface(MEMORY_INTER) %{
5356 5399      base($reg);
5357 5400      index($ireg);
5358 5401      scale($scale);
5359 5402      disp($off);
5360 5403    %}
5361 5404  %}
5362 5405  
5363 5406  //----------Load Long Memory Operands------------------------------------------
5364 5407  // The load-long idiom will use it's address expression again after loading
5365 5408  // the first word of the long.  If the load-long destination overlaps with
5366 5409  // registers used in the addressing expression, the 2nd half will be loaded
5367 5410  // from a clobbered address.  Fix this by requiring that load-long use
5368 5411  // address registers that do not overlap with the load-long target.
5369 5412  
5370 5413  // load-long support
5371 5414  operand load_long_RegP() %{
5372 5415    constraint(ALLOC_IN_RC(esi_reg));
5373 5416    match(RegP);
5374 5417    match(eSIRegP);
5375 5418    op_cost(100);
5376 5419    format %{  %}
5377 5420    interface(REG_INTER);
5378 5421  %}
5379 5422  
5380 5423  // Indirect Memory Operand Long
5381 5424  operand load_long_indirect(load_long_RegP reg) %{
5382 5425    constraint(ALLOC_IN_RC(esi_reg));
5383 5426    match(reg);
5384 5427  
5385 5428    format %{ "[$reg]" %}
5386 5429    interface(MEMORY_INTER) %{
5387 5430      base($reg);
5388 5431      index(0x4);
5389 5432      scale(0x0);
5390 5433      disp(0x0);
5391 5434    %}
5392 5435  %}
5393 5436  
5394 5437  // Indirect Memory Plus Long Offset Operand
5395 5438  operand load_long_indOffset32(load_long_RegP reg, immI off) %{
5396 5439    match(AddP reg off);
5397 5440  
5398 5441    format %{ "[$reg + $off]" %}
5399 5442    interface(MEMORY_INTER) %{
5400 5443      base($reg);
5401 5444      index(0x4);
5402 5445      scale(0x0);
5403 5446      disp($off);
5404 5447    %}
5405 5448  %}
5406 5449  
5407 5450  opclass load_long_memory(load_long_indirect, load_long_indOffset32);
5408 5451  
5409 5452  
5410 5453  //----------Special Memory Operands--------------------------------------------
5411 5454  // Stack Slot Operand - This operand is used for loading and storing temporary
5412 5455  //                      values on the stack where a match requires a value to
5413 5456  //                      flow through memory.
5414 5457  operand stackSlotP(sRegP reg) %{
5415 5458    constraint(ALLOC_IN_RC(stack_slots));
5416 5459    // No match rule because this operand is only generated in matching
5417 5460    format %{ "[$reg]" %}
5418 5461    interface(MEMORY_INTER) %{
5419 5462      base(0x4);   // ESP
5420 5463      index(0x4);  // No Index
5421 5464      scale(0x0);  // No Scale
5422 5465      disp($reg);  // Stack Offset
5423 5466    %}
5424 5467  %}
5425 5468  
5426 5469  operand stackSlotI(sRegI reg) %{
5427 5470    constraint(ALLOC_IN_RC(stack_slots));
5428 5471    // No match rule because this operand is only generated in matching
5429 5472    format %{ "[$reg]" %}
5430 5473    interface(MEMORY_INTER) %{
5431 5474      base(0x4);   // ESP
5432 5475      index(0x4);  // No Index
5433 5476      scale(0x0);  // No Scale
5434 5477      disp($reg);  // Stack Offset
5435 5478    %}
5436 5479  %}
5437 5480  
5438 5481  operand stackSlotF(sRegF reg) %{
5439 5482    constraint(ALLOC_IN_RC(stack_slots));
5440 5483    // No match rule because this operand is only generated in matching
5441 5484    format %{ "[$reg]" %}
5442 5485    interface(MEMORY_INTER) %{
5443 5486      base(0x4);   // ESP
5444 5487      index(0x4);  // No Index
5445 5488      scale(0x0);  // No Scale
5446 5489      disp($reg);  // Stack Offset
5447 5490    %}
5448 5491  %}
5449 5492  
5450 5493  operand stackSlotD(sRegD reg) %{
5451 5494    constraint(ALLOC_IN_RC(stack_slots));
5452 5495    // No match rule because this operand is only generated in matching
5453 5496    format %{ "[$reg]" %}
5454 5497    interface(MEMORY_INTER) %{
5455 5498      base(0x4);   // ESP
5456 5499      index(0x4);  // No Index
5457 5500      scale(0x0);  // No Scale
5458 5501      disp($reg);  // Stack Offset
5459 5502    %}
5460 5503  %}
5461 5504  
5462 5505  operand stackSlotL(sRegL reg) %{
5463 5506    constraint(ALLOC_IN_RC(stack_slots));
5464 5507    // No match rule because this operand is only generated in matching
5465 5508    format %{ "[$reg]" %}
5466 5509    interface(MEMORY_INTER) %{
5467 5510      base(0x4);   // ESP
5468 5511      index(0x4);  // No Index
5469 5512      scale(0x0);  // No Scale
5470 5513      disp($reg);  // Stack Offset
5471 5514    %}
5472 5515  %}
5473 5516  
5474 5517  //----------Memory Operands - Win95 Implicit Null Variants----------------
5475 5518  // Indirect Memory Operand
5476 5519  operand indirect_win95_safe(eRegP_no_EBP reg)
5477 5520  %{
5478 5521    constraint(ALLOC_IN_RC(e_reg));
5479 5522    match(reg);
5480 5523  
5481 5524    op_cost(100);
5482 5525    format %{ "[$reg]" %}
5483 5526    interface(MEMORY_INTER) %{
5484 5527      base($reg);
5485 5528      index(0x4);
5486 5529      scale(0x0);
5487 5530      disp(0x0);
5488 5531    %}
5489 5532  %}
5490 5533  
5491 5534  // Indirect Memory Plus Short Offset Operand
5492 5535  operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
5493 5536  %{
5494 5537    match(AddP reg off);
5495 5538  
5496 5539    op_cost(100);
5497 5540    format %{ "[$reg + $off]" %}
5498 5541    interface(MEMORY_INTER) %{
5499 5542      base($reg);
5500 5543      index(0x4);
5501 5544      scale(0x0);
5502 5545      disp($off);
5503 5546    %}
5504 5547  %}
5505 5548  
5506 5549  // Indirect Memory Plus Long Offset Operand
5507 5550  operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
5508 5551  %{
5509 5552    match(AddP reg off);
5510 5553  
5511 5554    op_cost(100);
5512 5555    format %{ "[$reg + $off]" %}
5513 5556    interface(MEMORY_INTER) %{
5514 5557      base($reg);
5515 5558      index(0x4);
5516 5559      scale(0x0);
5517 5560      disp($off);
5518 5561    %}
5519 5562  %}
5520 5563  
5521 5564  // Indirect Memory Plus Index Register Plus Offset Operand
5522 5565  operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
5523 5566  %{
5524 5567    match(AddP (AddP reg ireg) off);
5525 5568  
5526 5569    op_cost(100);
5527 5570    format %{"[$reg + $off + $ireg]" %}
5528 5571    interface(MEMORY_INTER) %{
5529 5572      base($reg);
5530 5573      index($ireg);
5531 5574      scale(0x0);
5532 5575      disp($off);
5533 5576    %}
5534 5577  %}
5535 5578  
5536 5579  // Indirect Memory Times Scale Plus Index Register
5537 5580  operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
5538 5581  %{
5539 5582    match(AddP reg (LShiftI ireg scale));
5540 5583  
5541 5584    op_cost(100);
5542 5585    format %{"[$reg + $ireg << $scale]" %}
5543 5586    interface(MEMORY_INTER) %{
5544 5587      base($reg);
5545 5588      index($ireg);
5546 5589      scale($scale);
5547 5590      disp(0x0);
5548 5591    %}
5549 5592  %}
5550 5593  
5551 5594  // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5552 5595  operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
5553 5596  %{
5554 5597    match(AddP (AddP reg (LShiftI ireg scale)) off);
5555 5598  
5556 5599    op_cost(100);
5557 5600    format %{"[$reg + $off + $ireg << $scale]" %}
5558 5601    interface(MEMORY_INTER) %{
5559 5602      base($reg);
5560 5603      index($ireg);
5561 5604      scale($scale);
5562 5605      disp($off);
5563 5606    %}
5564 5607  %}
5565 5608  
5566 5609  //----------Conditional Branch Operands----------------------------------------
5567 5610  // Comparison Op  - This is the operation of the comparison, and is limited to
5568 5611  //                  the following set of codes:
5569 5612  //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5570 5613  //
5571 5614  // Other attributes of the comparison, such as unsignedness, are specified
5572 5615  // by the comparison instruction that sets a condition code flags register.
5573 5616  // That result is represented by a flags operand whose subtype is appropriate
5574 5617  // to the unsignedness (etc.) of the comparison.
5575 5618  //
5576 5619  // Later, the instruction which matches both the Comparison Op (a Bool) and
5577 5620  // the flags (produced by the Cmp) specifies the coding of the comparison op
5578 5621  // by matching a specific subtype of Bool operand below, such as cmpOpU.
5579 5622  
5580 5623  // Comparision Code
5581 5624  operand cmpOp() %{
5582 5625    match(Bool);
5583 5626  
5584 5627    format %{ "" %}
5585 5628    interface(COND_INTER) %{
5586 5629      equal(0x4, "e");
5587 5630      not_equal(0x5, "ne");
5588 5631      less(0xC, "l");
5589 5632      greater_equal(0xD, "ge");
5590 5633      less_equal(0xE, "le");
5591 5634      greater(0xF, "g");
5592 5635    %}
5593 5636  %}
5594 5637  
5595 5638  // Comparison Code, unsigned compare.  Used by FP also, with
5596 5639  // C2 (unordered) turned into GT or LT already.  The other bits
5597 5640  // C0 and C3 are turned into Carry & Zero flags.
5598 5641  operand cmpOpU() %{
5599 5642    match(Bool);
5600 5643  
5601 5644    format %{ "" %}
5602 5645    interface(COND_INTER) %{
5603 5646      equal(0x4, "e");
5604 5647      not_equal(0x5, "ne");
5605 5648      less(0x2, "b");
5606 5649      greater_equal(0x3, "nb");
5607 5650      less_equal(0x6, "be");
5608 5651      greater(0x7, "nbe");
5609 5652    %}
5610 5653  %}
5611 5654  
5612 5655  // Floating comparisons that don't require any fixup for the unordered case
5613 5656  operand cmpOpUCF() %{
5614 5657    match(Bool);
5615 5658    predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5616 5659              n->as_Bool()->_test._test == BoolTest::ge ||
5617 5660              n->as_Bool()->_test._test == BoolTest::le ||
5618 5661              n->as_Bool()->_test._test == BoolTest::gt);
5619 5662    format %{ "" %}
5620 5663    interface(COND_INTER) %{
5621 5664      equal(0x4, "e");
5622 5665      not_equal(0x5, "ne");
5623 5666      less(0x2, "b");
5624 5667      greater_equal(0x3, "nb");
5625 5668      less_equal(0x6, "be");
5626 5669      greater(0x7, "nbe");
5627 5670    %}
5628 5671  %}
5629 5672  
5630 5673  
5631 5674  // Floating comparisons that can be fixed up with extra conditional jumps
5632 5675  operand cmpOpUCF2() %{
5633 5676    match(Bool);
5634 5677    predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5635 5678              n->as_Bool()->_test._test == BoolTest::eq);
5636 5679    format %{ "" %}
5637 5680    interface(COND_INTER) %{
5638 5681      equal(0x4, "e");
5639 5682      not_equal(0x5, "ne");
5640 5683      less(0x2, "b");
5641 5684      greater_equal(0x3, "nb");
5642 5685      less_equal(0x6, "be");
5643 5686      greater(0x7, "nbe");
5644 5687    %}
5645 5688  %}
5646 5689  
5647 5690  // Comparison Code for FP conditional move
5648 5691  operand cmpOp_fcmov() %{
5649 5692    match(Bool);
5650 5693  
5651 5694    format %{ "" %}
5652 5695    interface(COND_INTER) %{
5653 5696      equal        (0x0C8);
5654 5697      not_equal    (0x1C8);
5655 5698      less         (0x0C0);
5656 5699      greater_equal(0x1C0);
5657 5700      less_equal   (0x0D0);
5658 5701      greater      (0x1D0);
5659 5702    %}
5660 5703  %}
5661 5704  
5662 5705  // Comparision Code used in long compares
5663 5706  operand cmpOp_commute() %{
5664 5707    match(Bool);
5665 5708  
5666 5709    format %{ "" %}
5667 5710    interface(COND_INTER) %{
5668 5711      equal(0x4, "e");
5669 5712      not_equal(0x5, "ne");
5670 5713      less(0xF, "g");
5671 5714      greater_equal(0xE, "le");
5672 5715      less_equal(0xD, "ge");
5673 5716      greater(0xC, "l");
5674 5717    %}
5675 5718  %}
5676 5719  
5677 5720  //----------OPERAND CLASSES----------------------------------------------------
5678 5721  // Operand Classes are groups of operands that are used as to simplify
5679 5722  // instruction definitions by not requiring the AD writer to specify separate
5680 5723  // instructions for every form of operand when the instruction accepts
5681 5724  // multiple operand types with the same basic encoding and format.  The classic
5682 5725  // case of this is memory operands.
5683 5726  
5684 5727  opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
5685 5728                 indIndex, indIndexScale, indIndexScaleOffset);
5686 5729  
5687 5730  // Long memory operations are encoded in 2 instructions and a +4 offset.
5688 5731  // This means some kind of offset is always required and you cannot use
5689 5732  // an oop as the offset (done when working on static globals).
5690 5733  opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
5691 5734                      indIndex, indIndexScale, indIndexScaleOffset);
5692 5735  
5693 5736  
5694 5737  //----------PIPELINE-----------------------------------------------------------
5695 5738  // Rules which define the behavior of the target architectures pipeline.
5696 5739  pipeline %{
5697 5740  
5698 5741  //----------ATTRIBUTES---------------------------------------------------------
5699 5742  attributes %{
5700 5743    variable_size_instructions;        // Fixed size instructions
5701 5744    max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5702 5745    instruction_unit_size = 1;         // An instruction is 1 bytes long
5703 5746    instruction_fetch_unit_size = 16;  // The processor fetches one line
5704 5747    instruction_fetch_units = 1;       // of 16 bytes
5705 5748  
5706 5749    // List of nop instructions
5707 5750    nops( MachNop );
5708 5751  %}
5709 5752  
5710 5753  //----------RESOURCES----------------------------------------------------------
5711 5754  // Resources are the functional units available to the machine
5712 5755  
5713 5756  // Generic P2/P3 pipeline
5714 5757  // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5715 5758  // 3 instructions decoded per cycle.
5716 5759  // 2 load/store ops per cycle, 1 branch, 1 FPU,
5717 5760  // 2 ALU op, only ALU0 handles mul/div instructions.
5718 5761  resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5719 5762             MS0, MS1, MEM = MS0 | MS1,
5720 5763             BR, FPU,
5721 5764             ALU0, ALU1, ALU = ALU0 | ALU1 );
5722 5765  
5723 5766  //----------PIPELINE DESCRIPTION-----------------------------------------------
5724 5767  // Pipeline Description specifies the stages in the machine's pipeline
5725 5768  
5726 5769  // Generic P2/P3 pipeline
5727 5770  pipe_desc(S0, S1, S2, S3, S4, S5);
5728 5771  
5729 5772  //----------PIPELINE CLASSES---------------------------------------------------
5730 5773  // Pipeline Classes describe the stages in which input and output are
5731 5774  // referenced by the hardware pipeline.
5732 5775  
5733 5776  // Naming convention: ialu or fpu
5734 5777  // Then: _reg
5735 5778  // Then: _reg if there is a 2nd register
5736 5779  // Then: _long if it's a pair of instructions implementing a long
5737 5780  // Then: _fat if it requires the big decoder
5738 5781  //   Or: _mem if it requires the big decoder and a memory unit.
5739 5782  
5740 5783  // Integer ALU reg operation
5741 5784  pipe_class ialu_reg(eRegI dst) %{
5742 5785      single_instruction;
5743 5786      dst    : S4(write);
5744 5787      dst    : S3(read);
5745 5788      DECODE : S0;        // any decoder
5746 5789      ALU    : S3;        // any alu
5747 5790  %}
5748 5791  
5749 5792  // Long ALU reg operation
5750 5793  pipe_class ialu_reg_long(eRegL dst) %{
5751 5794      instruction_count(2);
5752 5795      dst    : S4(write);
5753 5796      dst    : S3(read);
5754 5797      DECODE : S0(2);     // any 2 decoders
5755 5798      ALU    : S3(2);     // both alus
5756 5799  %}
5757 5800  
5758 5801  // Integer ALU reg operation using big decoder
5759 5802  pipe_class ialu_reg_fat(eRegI dst) %{
5760 5803      single_instruction;
5761 5804      dst    : S4(write);
5762 5805      dst    : S3(read);
5763 5806      D0     : S0;        // big decoder only
5764 5807      ALU    : S3;        // any alu
5765 5808  %}
5766 5809  
5767 5810  // Long ALU reg operation using big decoder
5768 5811  pipe_class ialu_reg_long_fat(eRegL dst) %{
5769 5812      instruction_count(2);
5770 5813      dst    : S4(write);
5771 5814      dst    : S3(read);
5772 5815      D0     : S0(2);     // big decoder only; twice
5773 5816      ALU    : S3(2);     // any 2 alus
5774 5817  %}
5775 5818  
5776 5819  // Integer ALU reg-reg operation
5777 5820  pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
5778 5821      single_instruction;
5779 5822      dst    : S4(write);
5780 5823      src    : S3(read);
5781 5824      DECODE : S0;        // any decoder
5782 5825      ALU    : S3;        // any alu
5783 5826  %}
5784 5827  
5785 5828  // Long ALU reg-reg operation
5786 5829  pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
5787 5830      instruction_count(2);
5788 5831      dst    : S4(write);
5789 5832      src    : S3(read);
5790 5833      DECODE : S0(2);     // any 2 decoders
5791 5834      ALU    : S3(2);     // both alus
5792 5835  %}
5793 5836  
5794 5837  // Integer ALU reg-reg operation
5795 5838  pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
5796 5839      single_instruction;
5797 5840      dst    : S4(write);
5798 5841      src    : S3(read);
5799 5842      D0     : S0;        // big decoder only
5800 5843      ALU    : S3;        // any alu
5801 5844  %}
5802 5845  
5803 5846  // Long ALU reg-reg operation
5804 5847  pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
5805 5848      instruction_count(2);
5806 5849      dst    : S4(write);
5807 5850      src    : S3(read);
5808 5851      D0     : S0(2);     // big decoder only; twice
5809 5852      ALU    : S3(2);     // both alus
5810 5853  %}
5811 5854  
5812 5855  // Integer ALU reg-mem operation
5813 5856  pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
5814 5857      single_instruction;
5815 5858      dst    : S5(write);
5816 5859      mem    : S3(read);
5817 5860      D0     : S0;        // big decoder only
5818 5861      ALU    : S4;        // any alu
5819 5862      MEM    : S3;        // any mem
5820 5863  %}
5821 5864  
5822 5865  // Long ALU reg-mem operation
5823 5866  pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
5824 5867      instruction_count(2);
5825 5868      dst    : S5(write);
5826 5869      mem    : S3(read);
5827 5870      D0     : S0(2);     // big decoder only; twice
5828 5871      ALU    : S4(2);     // any 2 alus
5829 5872      MEM    : S3(2);     // both mems
5830 5873  %}
5831 5874  
5832 5875  // Integer mem operation (prefetch)
5833 5876  pipe_class ialu_mem(memory mem)
5834 5877  %{
5835 5878      single_instruction;
5836 5879      mem    : S3(read);
5837 5880      D0     : S0;        // big decoder only
5838 5881      MEM    : S3;        // any mem
5839 5882  %}
5840 5883  
5841 5884  // Integer Store to Memory
5842 5885  pipe_class ialu_mem_reg(memory mem, eRegI src) %{
5843 5886      single_instruction;
5844 5887      mem    : S3(read);
5845 5888      src    : S5(read);
5846 5889      D0     : S0;        // big decoder only
5847 5890      ALU    : S4;        // any alu
5848 5891      MEM    : S3;
5849 5892  %}
5850 5893  
5851 5894  // Long Store to Memory
5852 5895  pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
5853 5896      instruction_count(2);
5854 5897      mem    : S3(read);
5855 5898      src    : S5(read);
5856 5899      D0     : S0(2);     // big decoder only; twice
5857 5900      ALU    : S4(2);     // any 2 alus
5858 5901      MEM    : S3(2);     // Both mems
5859 5902  %}
5860 5903  
5861 5904  // Integer Store to Memory
5862 5905  pipe_class ialu_mem_imm(memory mem) %{
5863 5906      single_instruction;
5864 5907      mem    : S3(read);
5865 5908      D0     : S0;        // big decoder only
5866 5909      ALU    : S4;        // any alu
5867 5910      MEM    : S3;
5868 5911  %}
5869 5912  
5870 5913  // Integer ALU0 reg-reg operation
5871 5914  pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
5872 5915      single_instruction;
5873 5916      dst    : S4(write);
5874 5917      src    : S3(read);
5875 5918      D0     : S0;        // Big decoder only
5876 5919      ALU0   : S3;        // only alu0
5877 5920  %}
5878 5921  
5879 5922  // Integer ALU0 reg-mem operation
5880 5923  pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
5881 5924      single_instruction;
5882 5925      dst    : S5(write);
5883 5926      mem    : S3(read);
5884 5927      D0     : S0;        // big decoder only
5885 5928      ALU0   : S4;        // ALU0 only
5886 5929      MEM    : S3;        // any mem
5887 5930  %}
5888 5931  
5889 5932  // Integer ALU reg-reg operation
5890 5933  pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
5891 5934      single_instruction;
5892 5935      cr     : S4(write);
5893 5936      src1   : S3(read);
5894 5937      src2   : S3(read);
5895 5938      DECODE : S0;        // any decoder
5896 5939      ALU    : S3;        // any alu
5897 5940  %}
5898 5941  
5899 5942  // Integer ALU reg-imm operation
5900 5943  pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
5901 5944      single_instruction;
5902 5945      cr     : S4(write);
5903 5946      src1   : S3(read);
5904 5947      DECODE : S0;        // any decoder
5905 5948      ALU    : S3;        // any alu
5906 5949  %}
5907 5950  
5908 5951  // Integer ALU reg-mem operation
5909 5952  pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
5910 5953      single_instruction;
5911 5954      cr     : S4(write);
5912 5955      src1   : S3(read);
5913 5956      src2   : S3(read);
5914 5957      D0     : S0;        // big decoder only
5915 5958      ALU    : S4;        // any alu
5916 5959      MEM    : S3;
5917 5960  %}
5918 5961  
5919 5962  // Conditional move reg-reg
5920 5963  pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
5921 5964      instruction_count(4);
5922 5965      y      : S4(read);
5923 5966      q      : S3(read);
5924 5967      p      : S3(read);
5925 5968      DECODE : S0(4);     // any decoder
5926 5969  %}
5927 5970  
5928 5971  // Conditional move reg-reg
5929 5972  pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
5930 5973      single_instruction;
5931 5974      dst    : S4(write);
5932 5975      src    : S3(read);
5933 5976      cr     : S3(read);
5934 5977      DECODE : S0;        // any decoder
5935 5978  %}
5936 5979  
5937 5980  // Conditional move reg-mem
5938 5981  pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
5939 5982      single_instruction;
5940 5983      dst    : S4(write);
5941 5984      src    : S3(read);
5942 5985      cr     : S3(read);
5943 5986      DECODE : S0;        // any decoder
5944 5987      MEM    : S3;
5945 5988  %}
5946 5989  
5947 5990  // Conditional move reg-reg long
5948 5991  pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
5949 5992      single_instruction;
5950 5993      dst    : S4(write);
5951 5994      src    : S3(read);
5952 5995      cr     : S3(read);
5953 5996      DECODE : S0(2);     // any 2 decoders
5954 5997  %}
5955 5998  
5956 5999  // Conditional move double reg-reg
5957 6000  pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
5958 6001      single_instruction;
5959 6002      dst    : S4(write);
5960 6003      src    : S3(read);
5961 6004      cr     : S3(read);
5962 6005      DECODE : S0;        // any decoder
5963 6006  %}
5964 6007  
5965 6008  // Float reg-reg operation
5966 6009  pipe_class fpu_reg(regD dst) %{
5967 6010      instruction_count(2);
5968 6011      dst    : S3(read);
5969 6012      DECODE : S0(2);     // any 2 decoders
5970 6013      FPU    : S3;
5971 6014  %}
5972 6015  
5973 6016  // Float reg-reg operation
5974 6017  pipe_class fpu_reg_reg(regD dst, regD src) %{
5975 6018      instruction_count(2);
5976 6019      dst    : S4(write);
5977 6020      src    : S3(read);
5978 6021      DECODE : S0(2);     // any 2 decoders
5979 6022      FPU    : S3;
5980 6023  %}
5981 6024  
5982 6025  // Float reg-reg operation
5983 6026  pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
5984 6027      instruction_count(3);
5985 6028      dst    : S4(write);
5986 6029      src1   : S3(read);
5987 6030      src2   : S3(read);
5988 6031      DECODE : S0(3);     // any 3 decoders
5989 6032      FPU    : S3(2);
5990 6033  %}
5991 6034  
5992 6035  // Float reg-reg operation
5993 6036  pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
5994 6037      instruction_count(4);
5995 6038      dst    : S4(write);
5996 6039      src1   : S3(read);
5997 6040      src2   : S3(read);
5998 6041      src3   : S3(read);
5999 6042      DECODE : S0(4);     // any 3 decoders
6000 6043      FPU    : S3(2);
6001 6044  %}
6002 6045  
6003 6046  // Float reg-reg operation
6004 6047  pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
6005 6048      instruction_count(4);
6006 6049      dst    : S4(write);
6007 6050      src1   : S3(read);
6008 6051      src2   : S3(read);
6009 6052      src3   : S3(read);
6010 6053      DECODE : S1(3);     // any 3 decoders
6011 6054      D0     : S0;        // Big decoder only
6012 6055      FPU    : S3(2);
6013 6056      MEM    : S3;
6014 6057  %}
6015 6058  
6016 6059  // Float reg-mem operation
6017 6060  pipe_class fpu_reg_mem(regD dst, memory mem) %{
6018 6061      instruction_count(2);
6019 6062      dst    : S5(write);
6020 6063      mem    : S3(read);
6021 6064      D0     : S0;        // big decoder only
6022 6065      DECODE : S1;        // any decoder for FPU POP
6023 6066      FPU    : S4;
6024 6067      MEM    : S3;        // any mem
6025 6068  %}
6026 6069  
6027 6070  // Float reg-mem operation
6028 6071  pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
6029 6072      instruction_count(3);
6030 6073      dst    : S5(write);
6031 6074      src1   : S3(read);
6032 6075      mem    : S3(read);
6033 6076      D0     : S0;        // big decoder only
6034 6077      DECODE : S1(2);     // any decoder for FPU POP
6035 6078      FPU    : S4;
6036 6079      MEM    : S3;        // any mem
6037 6080  %}
6038 6081  
6039 6082  // Float mem-reg operation
6040 6083  pipe_class fpu_mem_reg(memory mem, regD src) %{
6041 6084      instruction_count(2);
6042 6085      src    : S5(read);
6043 6086      mem    : S3(read);
6044 6087      DECODE : S0;        // any decoder for FPU PUSH
6045 6088      D0     : S1;        // big decoder only
6046 6089      FPU    : S4;
6047 6090      MEM    : S3;        // any mem
6048 6091  %}
6049 6092  
6050 6093  pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
6051 6094      instruction_count(3);
6052 6095      src1   : S3(read);
6053 6096      src2   : S3(read);
6054 6097      mem    : S3(read);
6055 6098      DECODE : S0(2);     // any decoder for FPU PUSH
6056 6099      D0     : S1;        // big decoder only
6057 6100      FPU    : S4;
6058 6101      MEM    : S3;        // any mem
6059 6102  %}
6060 6103  
6061 6104  pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
6062 6105      instruction_count(3);
6063 6106      src1   : S3(read);
6064 6107      src2   : S3(read);
6065 6108      mem    : S4(read);
6066 6109      DECODE : S0;        // any decoder for FPU PUSH
6067 6110      D0     : S0(2);     // big decoder only
6068 6111      FPU    : S4;
6069 6112      MEM    : S3(2);     // any mem
6070 6113  %}
6071 6114  
6072 6115  pipe_class fpu_mem_mem(memory dst, memory src1) %{
6073 6116      instruction_count(2);
6074 6117      src1   : S3(read);
6075 6118      dst    : S4(read);
6076 6119      D0     : S0(2);     // big decoder only
6077 6120      MEM    : S3(2);     // any mem
6078 6121  %}
6079 6122  
6080 6123  pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
6081 6124      instruction_count(3);
6082 6125      src1   : S3(read);
6083 6126      src2   : S3(read);
6084 6127      dst    : S4(read);
6085 6128      D0     : S0(3);     // big decoder only
6086 6129      FPU    : S4;
6087 6130      MEM    : S3(3);     // any mem
6088 6131  %}
6089 6132  
6090 6133  pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
6091 6134      instruction_count(3);
6092 6135      src1   : S4(read);
6093 6136      mem    : S4(read);
6094 6137      DECODE : S0;        // any decoder for FPU PUSH
6095 6138      D0     : S0(2);     // big decoder only
6096 6139      FPU    : S4;
6097 6140      MEM    : S3(2);     // any mem
6098 6141  %}
6099 6142  
6100 6143  // Float load constant
6101 6144  pipe_class fpu_reg_con(regD dst) %{
6102 6145      instruction_count(2);
6103 6146      dst    : S5(write);
6104 6147      D0     : S0;        // big decoder only for the load
6105 6148      DECODE : S1;        // any decoder for FPU POP
6106 6149      FPU    : S4;
6107 6150      MEM    : S3;        // any mem
6108 6151  %}
6109 6152  
6110 6153  // Float load constant
6111 6154  pipe_class fpu_reg_reg_con(regD dst, regD src) %{
6112 6155      instruction_count(3);
6113 6156      dst    : S5(write);
6114 6157      src    : S3(read);
6115 6158      D0     : S0;        // big decoder only for the load
6116 6159      DECODE : S1(2);     // any decoder for FPU POP
6117 6160      FPU    : S4;
6118 6161      MEM    : S3;        // any mem
6119 6162  %}
6120 6163  
6121 6164  // UnConditional branch
6122 6165  pipe_class pipe_jmp( label labl ) %{
6123 6166      single_instruction;
6124 6167      BR   : S3;
6125 6168  %}
6126 6169  
6127 6170  // Conditional branch
6128 6171  pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
6129 6172      single_instruction;
6130 6173      cr    : S1(read);
6131 6174      BR    : S3;
6132 6175  %}
6133 6176  
6134 6177  // Allocation idiom
6135 6178  pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
6136 6179      instruction_count(1); force_serialization;
6137 6180      fixed_latency(6);
6138 6181      heap_ptr : S3(read);
6139 6182      DECODE   : S0(3);
6140 6183      D0       : S2;
6141 6184      MEM      : S3;
6142 6185      ALU      : S3(2);
6143 6186      dst      : S5(write);
6144 6187      BR       : S5;
6145 6188  %}
6146 6189  
6147 6190  // Generic big/slow expanded idiom
6148 6191  pipe_class pipe_slow(  ) %{
6149 6192      instruction_count(10); multiple_bundles; force_serialization;
6150 6193      fixed_latency(100);
6151 6194      D0  : S0(2);
6152 6195      MEM : S3(2);
6153 6196  %}
6154 6197  
6155 6198  // The real do-nothing guy
6156 6199  pipe_class empty( ) %{
6157 6200      instruction_count(0);
6158 6201  %}
6159 6202  
6160 6203  // Define the class for the Nop node
6161 6204  define %{
6162 6205     MachNop = empty;
6163 6206  %}
6164 6207  
6165 6208  %}
6166 6209  
6167 6210  //----------INSTRUCTIONS-------------------------------------------------------
6168 6211  //
6169 6212  // match      -- States which machine-independent subtree may be replaced
6170 6213  //               by this instruction.
6171 6214  // ins_cost   -- The estimated cost of this instruction is used by instruction
6172 6215  //               selection to identify a minimum cost tree of machine
6173 6216  //               instructions that matches a tree of machine-independent
6174 6217  //               instructions.
6175 6218  // format     -- A string providing the disassembly for this instruction.
6176 6219  //               The value of an instruction's operand may be inserted
6177 6220  //               by referring to it with a '$' prefix.
6178 6221  // opcode     -- Three instruction opcodes may be provided.  These are referred
6179 6222  //               to within an encode class as $primary, $secondary, and $tertiary
6180 6223  //               respectively.  The primary opcode is commonly used to
6181 6224  //               indicate the type of machine instruction, while secondary
6182 6225  //               and tertiary are often used for prefix options or addressing
6183 6226  //               modes.
6184 6227  // ins_encode -- A list of encode classes with parameters. The encode class
6185 6228  //               name must have been defined in an 'enc_class' specification
6186 6229  //               in the encode section of the architecture description.
6187 6230  
6188 6231  //----------BSWAP-Instruction--------------------------------------------------
6189 6232  instruct bytes_reverse_int(eRegI dst) %{
6190 6233    match(Set dst (ReverseBytesI dst));
6191 6234  
6192 6235    format %{ "BSWAP  $dst" %}
6193 6236    opcode(0x0F, 0xC8);
6194 6237    ins_encode( OpcP, OpcSReg(dst) );
6195 6238    ins_pipe( ialu_reg );
6196 6239  %}
6197 6240  
6198 6241  instruct bytes_reverse_long(eRegL dst) %{
6199 6242    match(Set dst (ReverseBytesL dst));
6200 6243  
6201 6244    format %{ "BSWAP  $dst.lo\n\t"
6202 6245              "BSWAP  $dst.hi\n\t"
6203 6246              "XCHG   $dst.lo $dst.hi" %}
6204 6247  
6205 6248    ins_cost(125);
6206 6249    ins_encode( bswap_long_bytes(dst) );
6207 6250    ins_pipe( ialu_reg_reg);
6208 6251  %}
6209 6252  
6210 6253  
6211 6254  //---------- Zeros Count Instructions ------------------------------------------
6212 6255  
6213 6256  instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6214 6257    predicate(UseCountLeadingZerosInstruction);
6215 6258    match(Set dst (CountLeadingZerosI src));
6216 6259    effect(KILL cr);
6217 6260  
6218 6261    format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
6219 6262    ins_encode %{
6220 6263      __ lzcntl($dst$$Register, $src$$Register);
6221 6264    %}
6222 6265    ins_pipe(ialu_reg);
6223 6266  %}
6224 6267  
6225 6268  instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
6226 6269    predicate(!UseCountLeadingZerosInstruction);
6227 6270    match(Set dst (CountLeadingZerosI src));
6228 6271    effect(KILL cr);
6229 6272  
6230 6273    format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
6231 6274              "JNZ    skip\n\t"
6232 6275              "MOV    $dst, -1\n"
6233 6276        "skip:\n\t"
6234 6277              "NEG    $dst\n\t"
6235 6278              "ADD    $dst, 31" %}
6236 6279    ins_encode %{
6237 6280      Register Rdst = $dst$$Register;
6238 6281      Register Rsrc = $src$$Register;
6239 6282      Label skip;
6240 6283      __ bsrl(Rdst, Rsrc);
6241 6284      __ jccb(Assembler::notZero, skip);
6242 6285      __ movl(Rdst, -1);
6243 6286      __ bind(skip);
6244 6287      __ negl(Rdst);
6245 6288      __ addl(Rdst, BitsPerInt - 1);
6246 6289    %}
6247 6290    ins_pipe(ialu_reg);
6248 6291  %}
6249 6292  
6250 6293  instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6251 6294    predicate(UseCountLeadingZerosInstruction);
6252 6295    match(Set dst (CountLeadingZerosL src));
6253 6296    effect(TEMP dst, KILL cr);
6254 6297  
6255 6298    format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
6256 6299              "JNC    done\n\t"
6257 6300              "LZCNT  $dst, $src.lo\n\t"
6258 6301              "ADD    $dst, 32\n"
6259 6302        "done:" %}
6260 6303    ins_encode %{
6261 6304      Register Rdst = $dst$$Register;
6262 6305      Register Rsrc = $src$$Register;
6263 6306      Label done;
6264 6307      __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
6265 6308      __ jccb(Assembler::carryClear, done);
6266 6309      __ lzcntl(Rdst, Rsrc);
6267 6310      __ addl(Rdst, BitsPerInt);
6268 6311      __ bind(done);
6269 6312    %}
6270 6313    ins_pipe(ialu_reg);
6271 6314  %}
6272 6315  
6273 6316  instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
6274 6317    predicate(!UseCountLeadingZerosInstruction);
6275 6318    match(Set dst (CountLeadingZerosL src));
6276 6319    effect(TEMP dst, KILL cr);
6277 6320  
6278 6321    format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
6279 6322              "JZ     msw_is_zero\n\t"
6280 6323              "ADD    $dst, 32\n\t"
6281 6324              "JMP    not_zero\n"
6282 6325        "msw_is_zero:\n\t"
6283 6326              "BSR    $dst, $src.lo\n\t"
6284 6327              "JNZ    not_zero\n\t"
6285 6328              "MOV    $dst, -1\n"
6286 6329        "not_zero:\n\t"
6287 6330              "NEG    $dst\n\t"
6288 6331              "ADD    $dst, 63\n" %}
6289 6332   ins_encode %{
6290 6333      Register Rdst = $dst$$Register;
6291 6334      Register Rsrc = $src$$Register;
6292 6335      Label msw_is_zero;
6293 6336      Label not_zero;
6294 6337      __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
6295 6338      __ jccb(Assembler::zero, msw_is_zero);
6296 6339      __ addl(Rdst, BitsPerInt);
6297 6340      __ jmpb(not_zero);
6298 6341      __ bind(msw_is_zero);
6299 6342      __ bsrl(Rdst, Rsrc);
6300 6343      __ jccb(Assembler::notZero, not_zero);
6301 6344      __ movl(Rdst, -1);
6302 6345      __ bind(not_zero);
6303 6346      __ negl(Rdst);
6304 6347      __ addl(Rdst, BitsPerLong - 1);
6305 6348    %}
6306 6349    ins_pipe(ialu_reg);
6307 6350  %}
6308 6351  
6309 6352  instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6310 6353    match(Set dst (CountTrailingZerosI src));
6311 6354    effect(KILL cr);
6312 6355  
6313 6356    format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
6314 6357              "JNZ    done\n\t"
6315 6358              "MOV    $dst, 32\n"
6316 6359        "done:" %}
6317 6360    ins_encode %{
6318 6361      Register Rdst = $dst$$Register;
6319 6362      Label done;
6320 6363      __ bsfl(Rdst, $src$$Register);
6321 6364      __ jccb(Assembler::notZero, done);
6322 6365      __ movl(Rdst, BitsPerInt);
6323 6366      __ bind(done);
6324 6367    %}
6325 6368    ins_pipe(ialu_reg);
6326 6369  %}
6327 6370  
6328 6371  instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6329 6372    match(Set dst (CountTrailingZerosL src));
6330 6373    effect(TEMP dst, KILL cr);
6331 6374  
6332 6375    format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
6333 6376              "JNZ    done\n\t"
6334 6377              "BSF    $dst, $src.hi\n\t"
6335 6378              "JNZ    msw_not_zero\n\t"
6336 6379              "MOV    $dst, 32\n"
6337 6380        "msw_not_zero:\n\t"
6338 6381              "ADD    $dst, 32\n"
6339 6382        "done:" %}
6340 6383    ins_encode %{
6341 6384      Register Rdst = $dst$$Register;
6342 6385      Register Rsrc = $src$$Register;
6343 6386      Label msw_not_zero;
6344 6387      Label done;
6345 6388      __ bsfl(Rdst, Rsrc);
6346 6389      __ jccb(Assembler::notZero, done);
6347 6390      __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
6348 6391      __ jccb(Assembler::notZero, msw_not_zero);
6349 6392      __ movl(Rdst, BitsPerInt);
6350 6393      __ bind(msw_not_zero);
6351 6394      __ addl(Rdst, BitsPerInt);
6352 6395      __ bind(done);
6353 6396    %}
6354 6397    ins_pipe(ialu_reg);
6355 6398  %}
6356 6399  
6357 6400  
6358 6401  //---------- Population Count Instructions -------------------------------------
6359 6402  
6360 6403  instruct popCountI(eRegI dst, eRegI src) %{
6361 6404    predicate(UsePopCountInstruction);
6362 6405    match(Set dst (PopCountI src));
6363 6406  
6364 6407    format %{ "POPCNT $dst, $src" %}
6365 6408    ins_encode %{
6366 6409      __ popcntl($dst$$Register, $src$$Register);
6367 6410    %}
6368 6411    ins_pipe(ialu_reg);
6369 6412  %}
6370 6413  
6371 6414  instruct popCountI_mem(eRegI dst, memory mem) %{
6372 6415    predicate(UsePopCountInstruction);
6373 6416    match(Set dst (PopCountI (LoadI mem)));
6374 6417  
6375 6418    format %{ "POPCNT $dst, $mem" %}
6376 6419    ins_encode %{
6377 6420      __ popcntl($dst$$Register, $mem$$Address);
6378 6421    %}
6379 6422    ins_pipe(ialu_reg);
6380 6423  %}
6381 6424  
6382 6425  // Note: Long.bitCount(long) returns an int.
6383 6426  instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
6384 6427    predicate(UsePopCountInstruction);
6385 6428    match(Set dst (PopCountL src));
6386 6429    effect(KILL cr, TEMP tmp, TEMP dst);
6387 6430  
6388 6431    format %{ "POPCNT $dst, $src.lo\n\t"
6389 6432              "POPCNT $tmp, $src.hi\n\t"
6390 6433              "ADD    $dst, $tmp" %}
6391 6434    ins_encode %{
6392 6435      __ popcntl($dst$$Register, $src$$Register);
6393 6436      __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
6394 6437      __ addl($dst$$Register, $tmp$$Register);
6395 6438    %}
6396 6439    ins_pipe(ialu_reg);
6397 6440  %}
6398 6441  
6399 6442  // Note: Long.bitCount(long) returns an int.
6400 6443  instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
6401 6444    predicate(UsePopCountInstruction);
6402 6445    match(Set dst (PopCountL (LoadL mem)));
6403 6446    effect(KILL cr, TEMP tmp, TEMP dst);
6404 6447  
6405 6448    format %{ "POPCNT $dst, $mem\n\t"
6406 6449              "POPCNT $tmp, $mem+4\n\t"
6407 6450              "ADD    $dst, $tmp" %}
6408 6451    ins_encode %{
6409 6452      //__ popcntl($dst$$Register, $mem$$Address$$first);
6410 6453      //__ popcntl($tmp$$Register, $mem$$Address$$second);
6411 6454      __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
6412 6455      __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
6413 6456      __ addl($dst$$Register, $tmp$$Register);
6414 6457    %}
6415 6458    ins_pipe(ialu_reg);
6416 6459  %}
6417 6460  
6418 6461  
6419 6462  //----------Load/Store/Move Instructions---------------------------------------
6420 6463  //----------Load Instructions--------------------------------------------------
6421 6464  // Load Byte (8bit signed)
6422 6465  instruct loadB(xRegI dst, memory mem) %{
6423 6466    match(Set dst (LoadB mem));
6424 6467  
6425 6468    ins_cost(125);
6426 6469    format %{ "MOVSX8 $dst,$mem\t# byte" %}
6427 6470  
6428 6471    ins_encode %{
6429 6472      __ movsbl($dst$$Register, $mem$$Address);
6430 6473    %}
6431 6474  
6432 6475    ins_pipe(ialu_reg_mem);
6433 6476  %}
6434 6477  
6435 6478  // Load Byte (8bit signed) into Long Register
6436 6479  instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6437 6480    match(Set dst (ConvI2L (LoadB mem)));
6438 6481    effect(KILL cr);
6439 6482  
6440 6483    ins_cost(375);
6441 6484    format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
6442 6485              "MOV    $dst.hi,$dst.lo\n\t"
6443 6486              "SAR    $dst.hi,7" %}
6444 6487  
6445 6488    ins_encode %{
6446 6489      __ movsbl($dst$$Register, $mem$$Address);
6447 6490      __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6448 6491      __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
6449 6492    %}
6450 6493  
6451 6494    ins_pipe(ialu_reg_mem);
6452 6495  %}
6453 6496  
6454 6497  // Load Unsigned Byte (8bit UNsigned)
6455 6498  instruct loadUB(xRegI dst, memory mem) %{
6456 6499    match(Set dst (LoadUB mem));
6457 6500  
6458 6501    ins_cost(125);
6459 6502    format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
6460 6503  
6461 6504    ins_encode %{
6462 6505      __ movzbl($dst$$Register, $mem$$Address);
6463 6506    %}
6464 6507  
6465 6508    ins_pipe(ialu_reg_mem);
6466 6509  %}
6467 6510  
6468 6511  // Load Unsigned Byte (8 bit UNsigned) into Long Register
6469 6512  instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6470 6513    match(Set dst (ConvI2L (LoadUB mem)));
6471 6514    effect(KILL cr);
6472 6515  
6473 6516    ins_cost(250);
6474 6517    format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
6475 6518              "XOR    $dst.hi,$dst.hi" %}
6476 6519  
6477 6520    ins_encode %{
6478 6521      Register Rdst = $dst$$Register;
6479 6522      __ movzbl(Rdst, $mem$$Address);
6480 6523      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6481 6524    %}
6482 6525  
6483 6526    ins_pipe(ialu_reg_mem);
6484 6527  %}
6485 6528  
6486 6529  // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
6487 6530  instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
6488 6531    match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6489 6532    effect(KILL cr);
6490 6533  
6491 6534    format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
6492 6535              "XOR    $dst.hi,$dst.hi\n\t"
6493 6536              "AND    $dst.lo,$mask" %}
6494 6537    ins_encode %{
6495 6538      Register Rdst = $dst$$Register;
6496 6539      __ movzbl(Rdst, $mem$$Address);
6497 6540      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6498 6541      __ andl(Rdst, $mask$$constant);
6499 6542    %}
6500 6543    ins_pipe(ialu_reg_mem);
6501 6544  %}
6502 6545  
6503 6546  // Load Short (16bit signed)
6504 6547  instruct loadS(eRegI dst, memory mem) %{
6505 6548    match(Set dst (LoadS mem));
6506 6549  
6507 6550    ins_cost(125);
6508 6551    format %{ "MOVSX  $dst,$mem\t# short" %}
6509 6552  
6510 6553    ins_encode %{
6511 6554      __ movswl($dst$$Register, $mem$$Address);
6512 6555    %}
6513 6556  
6514 6557    ins_pipe(ialu_reg_mem);
6515 6558  %}
6516 6559  
6517 6560  // Load Short (16 bit signed) to Byte (8 bit signed)
6518 6561  instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6519 6562    match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6520 6563  
6521 6564    ins_cost(125);
6522 6565    format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
6523 6566    ins_encode %{
6524 6567      __ movsbl($dst$$Register, $mem$$Address);
6525 6568    %}
6526 6569    ins_pipe(ialu_reg_mem);
6527 6570  %}
6528 6571  
6529 6572  // Load Short (16bit signed) into Long Register
6530 6573  instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6531 6574    match(Set dst (ConvI2L (LoadS mem)));
6532 6575    effect(KILL cr);
6533 6576  
6534 6577    ins_cost(375);
6535 6578    format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
6536 6579              "MOV    $dst.hi,$dst.lo\n\t"
6537 6580              "SAR    $dst.hi,15" %}
6538 6581  
6539 6582    ins_encode %{
6540 6583      __ movswl($dst$$Register, $mem$$Address);
6541 6584      __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6542 6585      __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
6543 6586    %}
6544 6587  
6545 6588    ins_pipe(ialu_reg_mem);
6546 6589  %}
6547 6590  
6548 6591  // Load Unsigned Short/Char (16bit unsigned)
6549 6592  instruct loadUS(eRegI dst, memory mem) %{
6550 6593    match(Set dst (LoadUS mem));
6551 6594  
6552 6595    ins_cost(125);
6553 6596    format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
6554 6597  
6555 6598    ins_encode %{
6556 6599      __ movzwl($dst$$Register, $mem$$Address);
6557 6600    %}
6558 6601  
6559 6602    ins_pipe(ialu_reg_mem);
6560 6603  %}
6561 6604  
6562 6605  // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6563 6606  instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6564 6607    match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6565 6608  
6566 6609    ins_cost(125);
6567 6610    format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
6568 6611    ins_encode %{
6569 6612      __ movsbl($dst$$Register, $mem$$Address);
6570 6613    %}
6571 6614    ins_pipe(ialu_reg_mem);
6572 6615  %}
6573 6616  
6574 6617  // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6575 6618  instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6576 6619    match(Set dst (ConvI2L (LoadUS mem)));
6577 6620    effect(KILL cr);
6578 6621  
6579 6622    ins_cost(250);
6580 6623    format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
6581 6624              "XOR    $dst.hi,$dst.hi" %}
6582 6625  
6583 6626    ins_encode %{
6584 6627      __ movzwl($dst$$Register, $mem$$Address);
6585 6628      __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6586 6629    %}
6587 6630  
6588 6631    ins_pipe(ialu_reg_mem);
6589 6632  %}
6590 6633  
6591 6634  // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6592 6635  instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6593 6636    match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6594 6637    effect(KILL cr);
6595 6638  
6596 6639    format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
6597 6640              "XOR    $dst.hi,$dst.hi" %}
6598 6641    ins_encode %{
6599 6642      Register Rdst = $dst$$Register;
6600 6643      __ movzbl(Rdst, $mem$$Address);
6601 6644      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6602 6645    %}
6603 6646    ins_pipe(ialu_reg_mem);
6604 6647  %}
6605 6648  
6606 6649  // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
6607 6650  instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
6608 6651    match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6609 6652    effect(KILL cr);
6610 6653  
6611 6654    format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6612 6655              "XOR    $dst.hi,$dst.hi\n\t"
6613 6656              "AND    $dst.lo,$mask" %}
6614 6657    ins_encode %{
6615 6658      Register Rdst = $dst$$Register;
6616 6659      __ movzwl(Rdst, $mem$$Address);
6617 6660      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6618 6661      __ andl(Rdst, $mask$$constant);
6619 6662    %}
6620 6663    ins_pipe(ialu_reg_mem);
6621 6664  %}
6622 6665  
6623 6666  // Load Integer
6624 6667  instruct loadI(eRegI dst, memory mem) %{
6625 6668    match(Set dst (LoadI mem));
6626 6669  
6627 6670    ins_cost(125);
6628 6671    format %{ "MOV    $dst,$mem\t# int" %}
6629 6672  
6630 6673    ins_encode %{
6631 6674      __ movl($dst$$Register, $mem$$Address);
6632 6675    %}
6633 6676  
6634 6677    ins_pipe(ialu_reg_mem);
6635 6678  %}
6636 6679  
6637 6680  // Load Integer (32 bit signed) to Byte (8 bit signed)
6638 6681  instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6639 6682    match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6640 6683  
6641 6684    ins_cost(125);
6642 6685    format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
6643 6686    ins_encode %{
6644 6687      __ movsbl($dst$$Register, $mem$$Address);
6645 6688    %}
6646 6689    ins_pipe(ialu_reg_mem);
6647 6690  %}
6648 6691  
6649 6692  // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6650 6693  instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
6651 6694    match(Set dst (AndI (LoadI mem) mask));
6652 6695  
6653 6696    ins_cost(125);
6654 6697    format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
6655 6698    ins_encode %{
6656 6699      __ movzbl($dst$$Register, $mem$$Address);
6657 6700    %}
6658 6701    ins_pipe(ialu_reg_mem);
6659 6702  %}
6660 6703  
6661 6704  // Load Integer (32 bit signed) to Short (16 bit signed)
6662 6705  instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
6663 6706    match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6664 6707  
6665 6708    ins_cost(125);
6666 6709    format %{ "MOVSX  $dst, $mem\t# int -> short" %}
6667 6710    ins_encode %{
6668 6711      __ movswl($dst$$Register, $mem$$Address);
6669 6712    %}
6670 6713    ins_pipe(ialu_reg_mem);
6671 6714  %}
6672 6715  
6673 6716  // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6674 6717  instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
6675 6718    match(Set dst (AndI (LoadI mem) mask));
6676 6719  
6677 6720    ins_cost(125);
6678 6721    format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
6679 6722    ins_encode %{
6680 6723      __ movzwl($dst$$Register, $mem$$Address);
6681 6724    %}
6682 6725    ins_pipe(ialu_reg_mem);
6683 6726  %}
6684 6727  
6685 6728  // Load Integer into Long Register
6686 6729  instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6687 6730    match(Set dst (ConvI2L (LoadI mem)));
6688 6731    effect(KILL cr);
6689 6732  
6690 6733    ins_cost(375);
6691 6734    format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
6692 6735              "MOV    $dst.hi,$dst.lo\n\t"
6693 6736              "SAR    $dst.hi,31" %}
6694 6737  
6695 6738    ins_encode %{
6696 6739      __ movl($dst$$Register, $mem$$Address);
6697 6740      __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6698 6741      __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
6699 6742    %}
6700 6743  
6701 6744    ins_pipe(ialu_reg_mem);
6702 6745  %}
6703 6746  
6704 6747  // Load Integer with mask 0xFF into Long Register
6705 6748  instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6706 6749    match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6707 6750    effect(KILL cr);
6708 6751  
6709 6752    format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
6710 6753              "XOR    $dst.hi,$dst.hi" %}
6711 6754    ins_encode %{
6712 6755      Register Rdst = $dst$$Register;
6713 6756      __ movzbl(Rdst, $mem$$Address);
6714 6757      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6715 6758    %}
6716 6759    ins_pipe(ialu_reg_mem);
6717 6760  %}
6718 6761  
6719 6762  // Load Integer with mask 0xFFFF into Long Register
6720 6763  instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
6721 6764    match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6722 6765    effect(KILL cr);
6723 6766  
6724 6767    format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
6725 6768              "XOR    $dst.hi,$dst.hi" %}
6726 6769    ins_encode %{
6727 6770      Register Rdst = $dst$$Register;
6728 6771      __ movzwl(Rdst, $mem$$Address);
6729 6772      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6730 6773    %}
6731 6774    ins_pipe(ialu_reg_mem);
6732 6775  %}
6733 6776  
6734 6777  // Load Integer with 32-bit mask into Long Register
6735 6778  instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
6736 6779    match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6737 6780    effect(KILL cr);
6738 6781  
6739 6782    format %{ "MOV    $dst.lo,$mem\t# int & 32-bit mask -> long\n\t"
6740 6783              "XOR    $dst.hi,$dst.hi\n\t"
6741 6784              "AND    $dst.lo,$mask" %}
6742 6785    ins_encode %{
6743 6786      Register Rdst = $dst$$Register;
6744 6787      __ movl(Rdst, $mem$$Address);
6745 6788      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6746 6789      __ andl(Rdst, $mask$$constant);
6747 6790    %}
6748 6791    ins_pipe(ialu_reg_mem);
6749 6792  %}
6750 6793  
6751 6794  // Load Unsigned Integer into Long Register
6752 6795  instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6753 6796    match(Set dst (LoadUI2L mem));
6754 6797    effect(KILL cr);
6755 6798  
6756 6799    ins_cost(250);
6757 6800    format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
6758 6801              "XOR    $dst.hi,$dst.hi" %}
6759 6802  
6760 6803    ins_encode %{
6761 6804      __ movl($dst$$Register, $mem$$Address);
6762 6805      __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6763 6806    %}
6764 6807  
6765 6808    ins_pipe(ialu_reg_mem);
6766 6809  %}
6767 6810  
6768 6811  // Load Long.  Cannot clobber address while loading, so restrict address
6769 6812  // register to ESI
6770 6813  instruct loadL(eRegL dst, load_long_memory mem) %{
6771 6814    predicate(!((LoadLNode*)n)->require_atomic_access());
6772 6815    match(Set dst (LoadL mem));
6773 6816  
6774 6817    ins_cost(250);
6775 6818    format %{ "MOV    $dst.lo,$mem\t# long\n\t"
6776 6819              "MOV    $dst.hi,$mem+4" %}
6777 6820  
6778 6821    ins_encode %{
6779 6822      Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
6780 6823      Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
6781 6824      __ movl($dst$$Register, Amemlo);
6782 6825      __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6783 6826    %}
6784 6827  
6785 6828    ins_pipe(ialu_reg_long_mem);
6786 6829  %}
6787 6830  
6788 6831  // Volatile Load Long.  Must be atomic, so do 64-bit FILD
6789 6832  // then store it down to the stack and reload on the int
6790 6833  // side.
6791 6834  instruct loadL_volatile(stackSlotL dst, memory mem) %{
6792 6835    predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6793 6836    match(Set dst (LoadL mem));
6794 6837  
6795 6838    ins_cost(200);
6796 6839    format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
6797 6840              "FISTp  $dst" %}
6798 6841    ins_encode(enc_loadL_volatile(mem,dst));
6799 6842    ins_pipe( fpu_reg_mem );
6800 6843  %}
6801 6844  
6802 6845  instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6803 6846    predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6804 6847    match(Set dst (LoadL mem));
6805 6848    effect(TEMP tmp);
6806 6849    ins_cost(180);
6807 6850    format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6808 6851              "MOVSD  $dst,$tmp" %}
6809 6852    ins_encode(enc_loadLX_volatile(mem, dst, tmp));
6810 6853    ins_pipe( pipe_slow );
6811 6854  %}
6812 6855  
6813 6856  instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6814 6857    predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6815 6858    match(Set dst (LoadL mem));
6816 6859    effect(TEMP tmp);
6817 6860    ins_cost(160);
6818 6861    format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6819 6862              "MOVD   $dst.lo,$tmp\n\t"
6820 6863              "PSRLQ  $tmp,32\n\t"
6821 6864              "MOVD   $dst.hi,$tmp" %}
6822 6865    ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
6823 6866    ins_pipe( pipe_slow );
6824 6867  %}
6825 6868  
6826 6869  // Load Range
6827 6870  instruct loadRange(eRegI dst, memory mem) %{
6828 6871    match(Set dst (LoadRange mem));
6829 6872  
6830 6873    ins_cost(125);
6831 6874    format %{ "MOV    $dst,$mem" %}
6832 6875    opcode(0x8B);
6833 6876    ins_encode( OpcP, RegMem(dst,mem));
6834 6877    ins_pipe( ialu_reg_mem );
6835 6878  %}
6836 6879  
6837 6880  
6838 6881  // Load Pointer
6839 6882  instruct loadP(eRegP dst, memory mem) %{
6840 6883    match(Set dst (LoadP mem));
6841 6884  
6842 6885    ins_cost(125);
6843 6886    format %{ "MOV    $dst,$mem" %}
6844 6887    opcode(0x8B);
6845 6888    ins_encode( OpcP, RegMem(dst,mem));
6846 6889    ins_pipe( ialu_reg_mem );
6847 6890  %}
6848 6891  
6849 6892  // Load Klass Pointer
6850 6893  instruct loadKlass(eRegP dst, memory mem) %{
6851 6894    match(Set dst (LoadKlass mem));
6852 6895  
6853 6896    ins_cost(125);
6854 6897    format %{ "MOV    $dst,$mem" %}
6855 6898    opcode(0x8B);
6856 6899    ins_encode( OpcP, RegMem(dst,mem));
6857 6900    ins_pipe( ialu_reg_mem );
6858 6901  %}
6859 6902  
6860 6903  // Load Double
6861 6904  instruct loadD(regD dst, memory mem) %{
6862 6905    predicate(UseSSE<=1);
6863 6906    match(Set dst (LoadD mem));
6864 6907  
6865 6908    ins_cost(150);
6866 6909    format %{ "FLD_D  ST,$mem\n\t"
6867 6910              "FSTP   $dst" %}
6868 6911    opcode(0xDD);               /* DD /0 */
6869 6912    ins_encode( OpcP, RMopc_Mem(0x00,mem),
6870 6913                Pop_Reg_D(dst) );
6871 6914    ins_pipe( fpu_reg_mem );
6872 6915  %}
6873 6916  
6874 6917  // Load Double to XMM
6875 6918  instruct loadXD(regXD dst, memory mem) %{
6876 6919    predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6877 6920    match(Set dst (LoadD mem));
6878 6921    ins_cost(145);
6879 6922    format %{ "MOVSD  $dst,$mem" %}
6880 6923    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
6881 6924    ins_pipe( pipe_slow );
6882 6925  %}
6883 6926  
6884 6927  instruct loadXD_partial(regXD dst, memory mem) %{
6885 6928    predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6886 6929    match(Set dst (LoadD mem));
6887 6930    ins_cost(145);
6888 6931    format %{ "MOVLPD $dst,$mem" %}
6889 6932    ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
6890 6933    ins_pipe( pipe_slow );
6891 6934  %}
6892 6935  
6893 6936  // Load to XMM register (single-precision floating point)
6894 6937  // MOVSS instruction
6895 6938  instruct loadX(regX dst, memory mem) %{
6896 6939    predicate(UseSSE>=1);
6897 6940    match(Set dst (LoadF mem));
6898 6941    ins_cost(145);
6899 6942    format %{ "MOVSS  $dst,$mem" %}
6900 6943    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
6901 6944    ins_pipe( pipe_slow );
6902 6945  %}
6903 6946  
6904 6947  // Load Float
6905 6948  instruct loadF(regF dst, memory mem) %{
6906 6949    predicate(UseSSE==0);
6907 6950    match(Set dst (LoadF mem));
6908 6951  
6909 6952    ins_cost(150);
6910 6953    format %{ "FLD_S  ST,$mem\n\t"
6911 6954              "FSTP   $dst" %}
6912 6955    opcode(0xD9);               /* D9 /0 */
6913 6956    ins_encode( OpcP, RMopc_Mem(0x00,mem),
6914 6957                Pop_Reg_F(dst) );
6915 6958    ins_pipe( fpu_reg_mem );
6916 6959  %}
6917 6960  
6918 6961  // Load Aligned Packed Byte to XMM register
6919 6962  instruct loadA8B(regXD dst, memory mem) %{
6920 6963    predicate(UseSSE>=1);
6921 6964    match(Set dst (Load8B mem));
6922 6965    ins_cost(125);
6923 6966    format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6924 6967    ins_encode( movq_ld(dst, mem));
6925 6968    ins_pipe( pipe_slow );
6926 6969  %}
6927 6970  
6928 6971  // Load Aligned Packed Short to XMM register
6929 6972  instruct loadA4S(regXD dst, memory mem) %{
6930 6973    predicate(UseSSE>=1);
6931 6974    match(Set dst (Load4S mem));
6932 6975    ins_cost(125);
6933 6976    format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6934 6977    ins_encode( movq_ld(dst, mem));
6935 6978    ins_pipe( pipe_slow );
6936 6979  %}
6937 6980  
6938 6981  // Load Aligned Packed Char to XMM register
6939 6982  instruct loadA4C(regXD dst, memory mem) %{
6940 6983    predicate(UseSSE>=1);
6941 6984    match(Set dst (Load4C mem));
6942 6985    ins_cost(125);
6943 6986    format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6944 6987    ins_encode( movq_ld(dst, mem));
6945 6988    ins_pipe( pipe_slow );
6946 6989  %}
6947 6990  
6948 6991  // Load Aligned Packed Integer to XMM register
6949 6992  instruct load2IU(regXD dst, memory mem) %{
6950 6993    predicate(UseSSE>=1);
6951 6994    match(Set dst (Load2I mem));
6952 6995    ins_cost(125);
6953 6996    format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6954 6997    ins_encode( movq_ld(dst, mem));
6955 6998    ins_pipe( pipe_slow );
6956 6999  %}
6957 7000  
6958 7001  // Load Aligned Packed Single to XMM
6959 7002  instruct loadA2F(regXD dst, memory mem) %{
6960 7003    predicate(UseSSE>=1);
6961 7004    match(Set dst (Load2F mem));
6962 7005    ins_cost(145);
6963 7006    format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6964 7007    ins_encode( movq_ld(dst, mem));
6965 7008    ins_pipe( pipe_slow );
6966 7009  %}
6967 7010  
6968 7011  // Load Effective Address
6969 7012  instruct leaP8(eRegP dst, indOffset8 mem) %{
6970 7013    match(Set dst mem);
6971 7014  
6972 7015    ins_cost(110);
6973 7016    format %{ "LEA    $dst,$mem" %}
6974 7017    opcode(0x8D);
6975 7018    ins_encode( OpcP, RegMem(dst,mem));
6976 7019    ins_pipe( ialu_reg_reg_fat );
6977 7020  %}
6978 7021  
6979 7022  instruct leaP32(eRegP dst, indOffset32 mem) %{
6980 7023    match(Set dst mem);
6981 7024  
6982 7025    ins_cost(110);
6983 7026    format %{ "LEA    $dst,$mem" %}
6984 7027    opcode(0x8D);
6985 7028    ins_encode( OpcP, RegMem(dst,mem));
6986 7029    ins_pipe( ialu_reg_reg_fat );
6987 7030  %}
6988 7031  
6989 7032  instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6990 7033    match(Set dst mem);
6991 7034  
6992 7035    ins_cost(110);
6993 7036    format %{ "LEA    $dst,$mem" %}
6994 7037    opcode(0x8D);
6995 7038    ins_encode( OpcP, RegMem(dst,mem));
6996 7039    ins_pipe( ialu_reg_reg_fat );
6997 7040  %}
6998 7041  
6999 7042  instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
7000 7043    match(Set dst mem);
7001 7044  
7002 7045    ins_cost(110);
7003 7046    format %{ "LEA    $dst,$mem" %}
7004 7047    opcode(0x8D);
7005 7048    ins_encode( OpcP, RegMem(dst,mem));
7006 7049    ins_pipe( ialu_reg_reg_fat );
7007 7050  %}
7008 7051  
7009 7052  instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
7010 7053    match(Set dst mem);
7011 7054  
7012 7055    ins_cost(110);
7013 7056    format %{ "LEA    $dst,$mem" %}
7014 7057    opcode(0x8D);
7015 7058    ins_encode( OpcP, RegMem(dst,mem));
7016 7059    ins_pipe( ialu_reg_reg_fat );
7017 7060  %}
7018 7061  
7019 7062  // Load Constant
7020 7063  instruct loadConI(eRegI dst, immI src) %{
7021 7064    match(Set dst src);
7022 7065  
7023 7066    format %{ "MOV    $dst,$src" %}
7024 7067    ins_encode( LdImmI(dst, src) );
7025 7068    ins_pipe( ialu_reg_fat );
7026 7069  %}
7027 7070  
7028 7071  // Load Constant zero
7029 7072  instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
7030 7073    match(Set dst src);
7031 7074    effect(KILL cr);
7032 7075  
7033 7076    ins_cost(50);
7034 7077    format %{ "XOR    $dst,$dst" %}
7035 7078    opcode(0x33);  /* + rd */
7036 7079    ins_encode( OpcP, RegReg( dst, dst ) );
7037 7080    ins_pipe( ialu_reg );
7038 7081  %}
7039 7082  
7040 7083  instruct loadConP(eRegP dst, immP src) %{
7041 7084    match(Set dst src);
7042 7085  
7043 7086    format %{ "MOV    $dst,$src" %}
7044 7087    opcode(0xB8);  /* + rd */
7045 7088    ins_encode( LdImmP(dst, src) );
7046 7089    ins_pipe( ialu_reg_fat );
7047 7090  %}
7048 7091  
7049 7092  instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
7050 7093    match(Set dst src);
7051 7094    effect(KILL cr);
7052 7095    ins_cost(200);
7053 7096    format %{ "MOV    $dst.lo,$src.lo\n\t"
7054 7097              "MOV    $dst.hi,$src.hi" %}
7055 7098    opcode(0xB8);
7056 7099    ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
7057 7100    ins_pipe( ialu_reg_long_fat );
7058 7101  %}
7059 7102  
7060 7103  instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
7061 7104    match(Set dst src);
7062 7105    effect(KILL cr);
7063 7106    ins_cost(150);
7064 7107    format %{ "XOR    $dst.lo,$dst.lo\n\t"
7065 7108              "XOR    $dst.hi,$dst.hi" %}
7066 7109    opcode(0x33,0x33);
7067 7110    ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
7068 7111    ins_pipe( ialu_reg_long );
7069 7112  %}
7070 7113  
7071 7114  // The instruction usage is guarded by predicate in operand immF().
7072 7115  instruct loadConF(regF dst, immF src) %{
7073 7116    match(Set dst src);
7074 7117    ins_cost(125);
7075 7118  
7076 7119    format %{ "FLD_S  ST,$src\n\t"
7077 7120              "FSTP   $dst" %}
7078 7121    opcode(0xD9, 0x00);       /* D9 /0 */
7079 7122    ins_encode(LdImmF(src), Pop_Reg_F(dst) );
7080 7123    ins_pipe( fpu_reg_con );
7081 7124  %}
7082 7125  
7083 7126  // The instruction usage is guarded by predicate in operand immXF().
7084 7127  instruct loadConX(regX dst, immXF con) %{
7085 7128    match(Set dst con);
7086 7129    ins_cost(125);
7087 7130    format %{ "MOVSS  $dst,[$con]" %}
7088 7131    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), LdImmX(dst, con));
7089 7132    ins_pipe( pipe_slow );
7090 7133  %}
7091 7134  
7092 7135  // The instruction usage is guarded by predicate in operand immXF0().
7093 7136  instruct loadConX0(regX dst, immXF0 src) %{
7094 7137    match(Set dst src);
7095 7138    ins_cost(100);
7096 7139    format %{ "XORPS  $dst,$dst\t# float 0.0" %}
7097 7140    ins_encode( Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7098 7141    ins_pipe( pipe_slow );
7099 7142  %}
7100 7143  
7101 7144  // The instruction usage is guarded by predicate in operand immD().
7102 7145  instruct loadConD(regD dst, immD src) %{
7103 7146    match(Set dst src);
7104 7147    ins_cost(125);
7105 7148  
7106 7149    format %{ "FLD_D  ST,$src\n\t"
7107 7150              "FSTP   $dst" %}
7108 7151    ins_encode(LdImmD(src), Pop_Reg_D(dst) );
7109 7152    ins_pipe( fpu_reg_con );
7110 7153  %}
7111 7154  
7112 7155  // The instruction usage is guarded by predicate in operand immXD().
7113 7156  instruct loadConXD(regXD dst, immXD con) %{
7114 7157    match(Set dst con);
7115 7158    ins_cost(125);
7116 7159    format %{ "MOVSD  $dst,[$con]" %}
7117 7160    ins_encode(load_conXD(dst, con));
7118 7161    ins_pipe( pipe_slow );
7119 7162  %}
7120 7163  
7121 7164  // The instruction usage is guarded by predicate in operand immXD0().
7122 7165  instruct loadConXD0(regXD dst, immXD0 src) %{
7123 7166    match(Set dst src);
7124 7167    ins_cost(100);
7125 7168    format %{ "XORPD  $dst,$dst\t# double 0.0" %}
7126 7169    ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7127 7170    ins_pipe( pipe_slow );
7128 7171  %}
7129 7172  
7130 7173  // Load Stack Slot
7131 7174  instruct loadSSI(eRegI dst, stackSlotI src) %{
7132 7175    match(Set dst src);
7133 7176    ins_cost(125);
7134 7177  
7135 7178    format %{ "MOV    $dst,$src" %}
7136 7179    opcode(0x8B);
7137 7180    ins_encode( OpcP, RegMem(dst,src));
7138 7181    ins_pipe( ialu_reg_mem );
7139 7182  %}
7140 7183  
7141 7184  instruct loadSSL(eRegL dst, stackSlotL src) %{
7142 7185    match(Set dst src);
7143 7186  
7144 7187    ins_cost(200);
7145 7188    format %{ "MOV    $dst,$src.lo\n\t"
7146 7189              "MOV    $dst+4,$src.hi" %}
7147 7190    opcode(0x8B, 0x8B);
7148 7191    ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
7149 7192    ins_pipe( ialu_mem_long_reg );
7150 7193  %}
7151 7194  
7152 7195  // Load Stack Slot
7153 7196  instruct loadSSP(eRegP dst, stackSlotP src) %{
7154 7197    match(Set dst src);
7155 7198    ins_cost(125);
7156 7199  
7157 7200    format %{ "MOV    $dst,$src" %}
7158 7201    opcode(0x8B);
7159 7202    ins_encode( OpcP, RegMem(dst,src));
7160 7203    ins_pipe( ialu_reg_mem );
7161 7204  %}
7162 7205  
7163 7206  // Load Stack Slot
7164 7207  instruct loadSSF(regF dst, stackSlotF src) %{
7165 7208    match(Set dst src);
7166 7209    ins_cost(125);
7167 7210  
7168 7211    format %{ "FLD_S  $src\n\t"
7169 7212              "FSTP   $dst" %}
7170 7213    opcode(0xD9);               /* D9 /0, FLD m32real */
7171 7214    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7172 7215                Pop_Reg_F(dst) );
7173 7216    ins_pipe( fpu_reg_mem );
7174 7217  %}
7175 7218  
7176 7219  // Load Stack Slot
7177 7220  instruct loadSSD(regD dst, stackSlotD src) %{
7178 7221    match(Set dst src);
7179 7222    ins_cost(125);
7180 7223  
7181 7224    format %{ "FLD_D  $src\n\t"
7182 7225              "FSTP   $dst" %}
7183 7226    opcode(0xDD);               /* DD /0, FLD m64real */
7184 7227    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7185 7228                Pop_Reg_D(dst) );
7186 7229    ins_pipe( fpu_reg_mem );
7187 7230  %}
7188 7231  
7189 7232  // Prefetch instructions.
7190 7233  // Must be safe to execute with invalid address (cannot fault).
7191 7234  
7192 7235  instruct prefetchr0( memory mem ) %{
7193 7236    predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7194 7237    match(PrefetchRead mem);
7195 7238    ins_cost(0);
7196 7239    size(0);
7197 7240    format %{ "PREFETCHR (non-SSE is empty encoding)" %}
7198 7241    ins_encode();
7199 7242    ins_pipe(empty);
7200 7243  %}
7201 7244  
7202 7245  instruct prefetchr( memory mem ) %{
7203 7246    predicate(UseSSE==0 && VM_Version::supports_3dnow() || ReadPrefetchInstr==3);
7204 7247    match(PrefetchRead mem);
7205 7248    ins_cost(100);
7206 7249  
7207 7250    format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
7208 7251    opcode(0x0F, 0x0d);     /* Opcode 0F 0d /0 */
7209 7252    ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7210 7253    ins_pipe(ialu_mem);
7211 7254  %}
7212 7255  
7213 7256  instruct prefetchrNTA( memory mem ) %{
7214 7257    predicate(UseSSE>=1 && ReadPrefetchInstr==0);
7215 7258    match(PrefetchRead mem);
7216 7259    ins_cost(100);
7217 7260  
7218 7261    format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
7219 7262    opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7220 7263    ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7221 7264    ins_pipe(ialu_mem);
7222 7265  %}
7223 7266  
7224 7267  instruct prefetchrT0( memory mem ) %{
7225 7268    predicate(UseSSE>=1 && ReadPrefetchInstr==1);
7226 7269    match(PrefetchRead mem);
7227 7270    ins_cost(100);
7228 7271  
7229 7272    format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
7230 7273    opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7231 7274    ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7232 7275    ins_pipe(ialu_mem);
7233 7276  %}
7234 7277  
7235 7278  instruct prefetchrT2( memory mem ) %{
7236 7279    predicate(UseSSE>=1 && ReadPrefetchInstr==2);
7237 7280    match(PrefetchRead mem);
7238 7281    ins_cost(100);
7239 7282  
7240 7283    format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
7241 7284    opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7242 7285    ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7243 7286    ins_pipe(ialu_mem);
7244 7287  %}
7245 7288  
7246 7289  instruct prefetchw0( memory mem ) %{
7247 7290    predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7248 7291    match(PrefetchWrite mem);
7249 7292    ins_cost(0);
7250 7293    size(0);
7251 7294    format %{ "Prefetch (non-SSE is empty encoding)" %}
7252 7295    ins_encode();
7253 7296    ins_pipe(empty);
7254 7297  %}
7255 7298  
7256 7299  instruct prefetchw( memory mem ) %{
7257 7300    predicate(UseSSE==0 && VM_Version::supports_3dnow() || AllocatePrefetchInstr==3);
7258 7301    match( PrefetchWrite mem );
7259 7302    ins_cost(100);
7260 7303  
7261 7304    format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
7262 7305    opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
7263 7306    ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7264 7307    ins_pipe(ialu_mem);
7265 7308  %}
7266 7309  
7267 7310  instruct prefetchwNTA( memory mem ) %{
7268 7311    predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
7269 7312    match(PrefetchWrite mem);
7270 7313    ins_cost(100);
7271 7314  
7272 7315    format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
7273 7316    opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7274 7317    ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7275 7318    ins_pipe(ialu_mem);
7276 7319  %}
7277 7320  
7278 7321  instruct prefetchwT0( memory mem ) %{
7279 7322    predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
7280 7323    match(PrefetchWrite mem);
7281 7324    ins_cost(100);
7282 7325  
7283 7326    format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %}
7284 7327    opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7285 7328    ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7286 7329    ins_pipe(ialu_mem);
7287 7330  %}
7288 7331  
7289 7332  instruct prefetchwT2( memory mem ) %{
7290 7333    predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
7291 7334    match(PrefetchWrite mem);
7292 7335    ins_cost(100);
7293 7336  
7294 7337    format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %}
7295 7338    opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7296 7339    ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7297 7340    ins_pipe(ialu_mem);
7298 7341  %}
7299 7342  
7300 7343  //----------Store Instructions-------------------------------------------------
7301 7344  
7302 7345  // Store Byte
7303 7346  instruct storeB(memory mem, xRegI src) %{
7304 7347    match(Set mem (StoreB mem src));
7305 7348  
7306 7349    ins_cost(125);
7307 7350    format %{ "MOV8   $mem,$src" %}
7308 7351    opcode(0x88);
7309 7352    ins_encode( OpcP, RegMem( src, mem ) );
7310 7353    ins_pipe( ialu_mem_reg );
7311 7354  %}
7312 7355  
7313 7356  // Store Char/Short
7314 7357  instruct storeC(memory mem, eRegI src) %{
7315 7358    match(Set mem (StoreC mem src));
7316 7359  
7317 7360    ins_cost(125);
7318 7361    format %{ "MOV16  $mem,$src" %}
7319 7362    opcode(0x89, 0x66);
7320 7363    ins_encode( OpcS, OpcP, RegMem( src, mem ) );
7321 7364    ins_pipe( ialu_mem_reg );
7322 7365  %}
7323 7366  
7324 7367  // Store Integer
7325 7368  instruct storeI(memory mem, eRegI src) %{
7326 7369    match(Set mem (StoreI mem src));
7327 7370  
7328 7371    ins_cost(125);
7329 7372    format %{ "MOV    $mem,$src" %}
7330 7373    opcode(0x89);
7331 7374    ins_encode( OpcP, RegMem( src, mem ) );
7332 7375    ins_pipe( ialu_mem_reg );
7333 7376  %}
7334 7377  
7335 7378  // Store Long
7336 7379  instruct storeL(long_memory mem, eRegL src) %{
7337 7380    predicate(!((StoreLNode*)n)->require_atomic_access());
7338 7381    match(Set mem (StoreL mem src));
7339 7382  
7340 7383    ins_cost(200);
7341 7384    format %{ "MOV    $mem,$src.lo\n\t"
7342 7385              "MOV    $mem+4,$src.hi" %}
7343 7386    opcode(0x89, 0x89);
7344 7387    ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
7345 7388    ins_pipe( ialu_mem_long_reg );
7346 7389  %}
7347 7390  
7348 7391  // Store Long to Integer
7349 7392  instruct storeL2I(memory mem, eRegL src) %{
7350 7393    match(Set mem (StoreI mem (ConvL2I src)));
7351 7394  
7352 7395    format %{ "MOV    $mem,$src.lo\t# long -> int" %}
7353 7396    ins_encode %{
7354 7397      __ movl($mem$$Address, $src$$Register);
7355 7398    %}
7356 7399    ins_pipe(ialu_mem_reg);
7357 7400  %}
7358 7401  
7359 7402  // Volatile Store Long.  Must be atomic, so move it into
7360 7403  // the FP TOS and then do a 64-bit FIST.  Has to probe the
7361 7404  // target address before the store (for null-ptr checks)
7362 7405  // so the memory operand is used twice in the encoding.
7363 7406  instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
7364 7407    predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7365 7408    match(Set mem (StoreL mem src));
7366 7409    effect( KILL cr );
7367 7410    ins_cost(400);
7368 7411    format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7369 7412              "FILD   $src\n\t"
7370 7413              "FISTp  $mem\t # 64-bit atomic volatile long store" %}
7371 7414    opcode(0x3B);
7372 7415    ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7373 7416    ins_pipe( fpu_reg_mem );
7374 7417  %}
7375 7418  
7376 7419  instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7377 7420    predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7378 7421    match(Set mem (StoreL mem src));
7379 7422    effect( TEMP tmp, KILL cr );
7380 7423    ins_cost(380);
7381 7424    format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7382 7425              "MOVSD  $tmp,$src\n\t"
7383 7426              "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7384 7427    opcode(0x3B);
7385 7428    ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
7386 7429    ins_pipe( pipe_slow );
7387 7430  %}
7388 7431  
7389 7432  instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7390 7433    predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7391 7434    match(Set mem (StoreL mem src));
7392 7435    effect( TEMP tmp2 , TEMP tmp, KILL cr );
7393 7436    ins_cost(360);
7394 7437    format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7395 7438              "MOVD   $tmp,$src.lo\n\t"
7396 7439              "MOVD   $tmp2,$src.hi\n\t"
7397 7440              "PUNPCKLDQ $tmp,$tmp2\n\t"
7398 7441              "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7399 7442    opcode(0x3B);
7400 7443    ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
7401 7444    ins_pipe( pipe_slow );
7402 7445  %}
7403 7446  
7404 7447  // Store Pointer; for storing unknown oops and raw pointers
7405 7448  instruct storeP(memory mem, anyRegP src) %{
7406 7449    match(Set mem (StoreP mem src));
7407 7450  
7408 7451    ins_cost(125);
7409 7452    format %{ "MOV    $mem,$src" %}
7410 7453    opcode(0x89);
7411 7454    ins_encode( OpcP, RegMem( src, mem ) );
7412 7455    ins_pipe( ialu_mem_reg );
7413 7456  %}
7414 7457  
7415 7458  // Store Integer Immediate
7416 7459  instruct storeImmI(memory mem, immI src) %{
7417 7460    match(Set mem (StoreI mem src));
7418 7461  
7419 7462    ins_cost(150);
7420 7463    format %{ "MOV    $mem,$src" %}
7421 7464    opcode(0xC7);               /* C7 /0 */
7422 7465    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7423 7466    ins_pipe( ialu_mem_imm );
7424 7467  %}
7425 7468  
7426 7469  // Store Short/Char Immediate
7427 7470  instruct storeImmI16(memory mem, immI16 src) %{
7428 7471    predicate(UseStoreImmI16);
7429 7472    match(Set mem (StoreC mem src));
7430 7473  
7431 7474    ins_cost(150);
7432 7475    format %{ "MOV16  $mem,$src" %}
7433 7476    opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
7434 7477    ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
7435 7478    ins_pipe( ialu_mem_imm );
7436 7479  %}
7437 7480  
7438 7481  // Store Pointer Immediate; null pointers or constant oops that do not
7439 7482  // need card-mark barriers.
7440 7483  instruct storeImmP(memory mem, immP src) %{
7441 7484    match(Set mem (StoreP mem src));
7442 7485  
7443 7486    ins_cost(150);
7444 7487    format %{ "MOV    $mem,$src" %}
7445 7488    opcode(0xC7);               /* C7 /0 */
7446 7489    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7447 7490    ins_pipe( ialu_mem_imm );
7448 7491  %}
7449 7492  
7450 7493  // Store Byte Immediate
7451 7494  instruct storeImmB(memory mem, immI8 src) %{
7452 7495    match(Set mem (StoreB mem src));
7453 7496  
7454 7497    ins_cost(150);
7455 7498    format %{ "MOV8   $mem,$src" %}
7456 7499    opcode(0xC6);               /* C6 /0 */
7457 7500    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7458 7501    ins_pipe( ialu_mem_imm );
7459 7502  %}
7460 7503  
7461 7504  // Store Aligned Packed Byte XMM register to memory
7462 7505  instruct storeA8B(memory mem, regXD src) %{
7463 7506    predicate(UseSSE>=1);
7464 7507    match(Set mem (Store8B mem src));
7465 7508    ins_cost(145);
7466 7509    format %{ "MOVQ  $mem,$src\t! packed8B" %}
7467 7510    ins_encode( movq_st(mem, src));
7468 7511    ins_pipe( pipe_slow );
7469 7512  %}
7470 7513  
7471 7514  // Store Aligned Packed Char/Short XMM register to memory
7472 7515  instruct storeA4C(memory mem, regXD src) %{
7473 7516    predicate(UseSSE>=1);
7474 7517    match(Set mem (Store4C mem src));
7475 7518    ins_cost(145);
7476 7519    format %{ "MOVQ  $mem,$src\t! packed4C" %}
7477 7520    ins_encode( movq_st(mem, src));
7478 7521    ins_pipe( pipe_slow );
7479 7522  %}
7480 7523  
7481 7524  // Store Aligned Packed Integer XMM register to memory
7482 7525  instruct storeA2I(memory mem, regXD src) %{
7483 7526    predicate(UseSSE>=1);
7484 7527    match(Set mem (Store2I mem src));
7485 7528    ins_cost(145);
7486 7529    format %{ "MOVQ  $mem,$src\t! packed2I" %}
7487 7530    ins_encode( movq_st(mem, src));
7488 7531    ins_pipe( pipe_slow );
7489 7532  %}
7490 7533  
7491 7534  // Store CMS card-mark Immediate
7492 7535  instruct storeImmCM(memory mem, immI8 src) %{
7493 7536    match(Set mem (StoreCM mem src));
7494 7537  
7495 7538    ins_cost(150);
7496 7539    format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
7497 7540    opcode(0xC6);               /* C6 /0 */
7498 7541    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7499 7542    ins_pipe( ialu_mem_imm );
7500 7543  %}
7501 7544  
7502 7545  // Store Double
7503 7546  instruct storeD( memory mem, regDPR1 src) %{
7504 7547    predicate(UseSSE<=1);
7505 7548    match(Set mem (StoreD mem src));
7506 7549  
7507 7550    ins_cost(100);
7508 7551    format %{ "FST_D  $mem,$src" %}
7509 7552    opcode(0xDD);       /* DD /2 */
7510 7553    ins_encode( enc_FP_store(mem,src) );
7511 7554    ins_pipe( fpu_mem_reg );
7512 7555  %}
7513 7556  
7514 7557  // Store double does rounding on x86
7515 7558  instruct storeD_rounded( memory mem, regDPR1 src) %{
7516 7559    predicate(UseSSE<=1);
7517 7560    match(Set mem (StoreD mem (RoundDouble src)));
7518 7561  
7519 7562    ins_cost(100);
7520 7563    format %{ "FST_D  $mem,$src\t# round" %}
7521 7564    opcode(0xDD);       /* DD /2 */
7522 7565    ins_encode( enc_FP_store(mem,src) );
7523 7566    ins_pipe( fpu_mem_reg );
7524 7567  %}
7525 7568  
7526 7569  // Store XMM register to memory (double-precision floating points)
7527 7570  // MOVSD instruction
7528 7571  instruct storeXD(memory mem, regXD src) %{
7529 7572    predicate(UseSSE>=2);
7530 7573    match(Set mem (StoreD mem src));
7531 7574    ins_cost(95);
7532 7575    format %{ "MOVSD  $mem,$src" %}
7533 7576    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7534 7577    ins_pipe( pipe_slow );
7535 7578  %}
7536 7579  
7537 7580  // Store XMM register to memory (single-precision floating point)
7538 7581  // MOVSS instruction
7539 7582  instruct storeX(memory mem, regX src) %{
7540 7583    predicate(UseSSE>=1);
7541 7584    match(Set mem (StoreF mem src));
7542 7585    ins_cost(95);
7543 7586    format %{ "MOVSS  $mem,$src" %}
7544 7587    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7545 7588    ins_pipe( pipe_slow );
7546 7589  %}
7547 7590  
7548 7591  // Store Aligned Packed Single Float XMM register to memory
7549 7592  instruct storeA2F(memory mem, regXD src) %{
7550 7593    predicate(UseSSE>=1);
7551 7594    match(Set mem (Store2F mem src));
7552 7595    ins_cost(145);
7553 7596    format %{ "MOVQ  $mem,$src\t! packed2F" %}
7554 7597    ins_encode( movq_st(mem, src));
7555 7598    ins_pipe( pipe_slow );
7556 7599  %}
7557 7600  
7558 7601  // Store Float
7559 7602  instruct storeF( memory mem, regFPR1 src) %{
7560 7603    predicate(UseSSE==0);
7561 7604    match(Set mem (StoreF mem src));
7562 7605  
7563 7606    ins_cost(100);
7564 7607    format %{ "FST_S  $mem,$src" %}
7565 7608    opcode(0xD9);       /* D9 /2 */
7566 7609    ins_encode( enc_FP_store(mem,src) );
7567 7610    ins_pipe( fpu_mem_reg );
7568 7611  %}
7569 7612  
7570 7613  // Store Float does rounding on x86
7571 7614  instruct storeF_rounded( memory mem, regFPR1 src) %{
7572 7615    predicate(UseSSE==0);
7573 7616    match(Set mem (StoreF mem (RoundFloat src)));
7574 7617  
7575 7618    ins_cost(100);
7576 7619    format %{ "FST_S  $mem,$src\t# round" %}
7577 7620    opcode(0xD9);       /* D9 /2 */
7578 7621    ins_encode( enc_FP_store(mem,src) );
7579 7622    ins_pipe( fpu_mem_reg );
7580 7623  %}
7581 7624  
7582 7625  // Store Float does rounding on x86
7583 7626  instruct storeF_Drounded( memory mem, regDPR1 src) %{
7584 7627    predicate(UseSSE<=1);
7585 7628    match(Set mem (StoreF mem (ConvD2F src)));
7586 7629  
7587 7630    ins_cost(100);
7588 7631    format %{ "FST_S  $mem,$src\t# D-round" %}
7589 7632    opcode(0xD9);       /* D9 /2 */
7590 7633    ins_encode( enc_FP_store(mem,src) );
7591 7634    ins_pipe( fpu_mem_reg );
7592 7635  %}
7593 7636  
7594 7637  // Store immediate Float value (it is faster than store from FPU register)
7595 7638  // The instruction usage is guarded by predicate in operand immF().
7596 7639  instruct storeF_imm( memory mem, immF src) %{
7597 7640    match(Set mem (StoreF mem src));
7598 7641  
7599 7642    ins_cost(50);
7600 7643    format %{ "MOV    $mem,$src\t# store float" %}
7601 7644    opcode(0xC7);               /* C7 /0 */
7602 7645    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
7603 7646    ins_pipe( ialu_mem_imm );
7604 7647  %}
7605 7648  
7606 7649  // Store immediate Float value (it is faster than store from XMM register)
7607 7650  // The instruction usage is guarded by predicate in operand immXF().
7608 7651  instruct storeX_imm( memory mem, immXF src) %{
7609 7652    match(Set mem (StoreF mem src));
7610 7653  
7611 7654    ins_cost(50);
7612 7655    format %{ "MOV    $mem,$src\t# store float" %}
7613 7656    opcode(0xC7);               /* C7 /0 */
7614 7657    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32XF_as_bits( src ));
7615 7658    ins_pipe( ialu_mem_imm );
7616 7659  %}
7617 7660  
7618 7661  // Store Integer to stack slot
7619 7662  instruct storeSSI(stackSlotI dst, eRegI src) %{
7620 7663    match(Set dst src);
7621 7664  
7622 7665    ins_cost(100);
7623 7666    format %{ "MOV    $dst,$src" %}
7624 7667    opcode(0x89);
7625 7668    ins_encode( OpcPRegSS( dst, src ) );
7626 7669    ins_pipe( ialu_mem_reg );
7627 7670  %}
7628 7671  
7629 7672  // Store Integer to stack slot
7630 7673  instruct storeSSP(stackSlotP dst, eRegP src) %{
7631 7674    match(Set dst src);
7632 7675  
7633 7676    ins_cost(100);
7634 7677    format %{ "MOV    $dst,$src" %}
7635 7678    opcode(0x89);
7636 7679    ins_encode( OpcPRegSS( dst, src ) );
7637 7680    ins_pipe( ialu_mem_reg );
7638 7681  %}
7639 7682  
7640 7683  // Store Long to stack slot
7641 7684  instruct storeSSL(stackSlotL dst, eRegL src) %{
7642 7685    match(Set dst src);
7643 7686  
7644 7687    ins_cost(200);
7645 7688    format %{ "MOV    $dst,$src.lo\n\t"
7646 7689              "MOV    $dst+4,$src.hi" %}
7647 7690    opcode(0x89, 0x89);
7648 7691    ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
7649 7692    ins_pipe( ialu_mem_long_reg );
7650 7693  %}
7651 7694  
7652 7695  //----------MemBar Instructions-----------------------------------------------
7653 7696  // Memory barrier flavors
7654 7697  
7655 7698  instruct membar_acquire() %{
7656 7699    match(MemBarAcquire);
7657 7700    ins_cost(400);
7658 7701  
7659 7702    size(0);
7660 7703    format %{ "MEMBAR-acquire ! (empty encoding)" %}
7661 7704    ins_encode();
7662 7705    ins_pipe(empty);
7663 7706  %}
7664 7707  
7665 7708  instruct membar_acquire_lock() %{
7666 7709    match(MemBarAcquire);
7667 7710    predicate(Matcher::prior_fast_lock(n));
7668 7711    ins_cost(0);
7669 7712  
7670 7713    size(0);
7671 7714    format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7672 7715    ins_encode( );
7673 7716    ins_pipe(empty);
7674 7717  %}
7675 7718  
7676 7719  instruct membar_release() %{
7677 7720    match(MemBarRelease);
7678 7721    ins_cost(400);
7679 7722  
7680 7723    size(0);
7681 7724    format %{ "MEMBAR-release ! (empty encoding)" %}
7682 7725    ins_encode( );
7683 7726    ins_pipe(empty);
7684 7727  %}
7685 7728  
7686 7729  instruct membar_release_lock() %{
7687 7730    match(MemBarRelease);
7688 7731    predicate(Matcher::post_fast_unlock(n));
7689 7732    ins_cost(0);
7690 7733  
7691 7734    size(0);
7692 7735    format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7693 7736    ins_encode( );
7694 7737    ins_pipe(empty);
7695 7738  %}
7696 7739  
7697 7740  instruct membar_volatile(eFlagsReg cr) %{
7698 7741    match(MemBarVolatile);
7699 7742    effect(KILL cr);
7700 7743    ins_cost(400);
7701 7744  
7702 7745    format %{ 
7703 7746      $$template
7704 7747      if (os::is_MP()) {
7705 7748        $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
7706 7749      } else {
7707 7750        $$emit$$"MEMBAR-volatile ! (empty encoding)"
7708 7751      }
7709 7752    %}
7710 7753    ins_encode %{
7711 7754      __ membar(Assembler::StoreLoad);
7712 7755    %}
7713 7756    ins_pipe(pipe_slow);
7714 7757  %}
7715 7758  
7716 7759  instruct unnecessary_membar_volatile() %{
7717 7760    match(MemBarVolatile);
7718 7761    predicate(Matcher::post_store_load_barrier(n));
7719 7762    ins_cost(0);
7720 7763  
7721 7764    size(0);
7722 7765    format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7723 7766    ins_encode( );
7724 7767    ins_pipe(empty);
7725 7768  %}
7726 7769  
7727 7770  //----------Move Instructions--------------------------------------------------
7728 7771  instruct castX2P(eAXRegP dst, eAXRegI src) %{
7729 7772    match(Set dst (CastX2P src));
7730 7773    format %{ "# X2P  $dst, $src" %}
7731 7774    ins_encode( /*empty encoding*/ );
7732 7775    ins_cost(0);
7733 7776    ins_pipe(empty);
7734 7777  %}
7735 7778  
7736 7779  instruct castP2X(eRegI dst, eRegP src ) %{
7737 7780    match(Set dst (CastP2X src));
7738 7781    ins_cost(50);
7739 7782    format %{ "MOV    $dst, $src\t# CastP2X" %}
7740 7783    ins_encode( enc_Copy( dst, src) );
7741 7784    ins_pipe( ialu_reg_reg );
7742 7785  %}
7743 7786  
7744 7787  //----------Conditional Move---------------------------------------------------
7745 7788  // Conditional move
7746 7789  instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
7747 7790    predicate(VM_Version::supports_cmov() );
7748 7791    match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7749 7792    ins_cost(200);
7750 7793    format %{ "CMOV$cop $dst,$src" %}
7751 7794    opcode(0x0F,0x40);
7752 7795    ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7753 7796    ins_pipe( pipe_cmov_reg );
7754 7797  %}
7755 7798  
7756 7799  instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
7757 7800    predicate(VM_Version::supports_cmov() );
7758 7801    match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7759 7802    ins_cost(200);
7760 7803    format %{ "CMOV$cop $dst,$src" %}
7761 7804    opcode(0x0F,0x40);
7762 7805    ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7763 7806    ins_pipe( pipe_cmov_reg );
7764 7807  %}
7765 7808  
7766 7809  instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
7767 7810    predicate(VM_Version::supports_cmov() );
7768 7811    match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7769 7812    ins_cost(200);
7770 7813    expand %{
7771 7814      cmovI_regU(cop, cr, dst, src);
7772 7815    %}
7773 7816  %}
7774 7817  
7775 7818  // Conditional move
7776 7819  instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
7777 7820    predicate(VM_Version::supports_cmov() );
7778 7821    match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7779 7822    ins_cost(250);
7780 7823    format %{ "CMOV$cop $dst,$src" %}
7781 7824    opcode(0x0F,0x40);
7782 7825    ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7783 7826    ins_pipe( pipe_cmov_mem );
7784 7827  %}
7785 7828  
7786 7829  // Conditional move
7787 7830  instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
7788 7831    predicate(VM_Version::supports_cmov() );
7789 7832    match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7790 7833    ins_cost(250);
7791 7834    format %{ "CMOV$cop $dst,$src" %}
7792 7835    opcode(0x0F,0x40);
7793 7836    ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7794 7837    ins_pipe( pipe_cmov_mem );
7795 7838  %}
7796 7839  
7797 7840  instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
7798 7841    predicate(VM_Version::supports_cmov() );
7799 7842    match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7800 7843    ins_cost(250);
7801 7844    expand %{
7802 7845      cmovI_memU(cop, cr, dst, src);
7803 7846    %}
7804 7847  %}
7805 7848  
7806 7849  // Conditional move
7807 7850  instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7808 7851    predicate(VM_Version::supports_cmov() );
7809 7852    match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7810 7853    ins_cost(200);
7811 7854    format %{ "CMOV$cop $dst,$src\t# ptr" %}
7812 7855    opcode(0x0F,0x40);
7813 7856    ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7814 7857    ins_pipe( pipe_cmov_reg );
7815 7858  %}
7816 7859  
7817 7860  // Conditional move (non-P6 version)
7818 7861  // Note:  a CMoveP is generated for  stubs and native wrappers
7819 7862  //        regardless of whether we are on a P6, so we
7820 7863  //        emulate a cmov here
7821 7864  instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7822 7865    match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7823 7866    ins_cost(300);
7824 7867    format %{ "Jn$cop   skip\n\t"
7825 7868            "MOV    $dst,$src\t# pointer\n"
7826 7869        "skip:" %}
7827 7870    opcode(0x8b);
7828 7871    ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
7829 7872    ins_pipe( pipe_cmov_reg );
7830 7873  %}
7831 7874  
7832 7875  // Conditional move
7833 7876  instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7834 7877    predicate(VM_Version::supports_cmov() );
7835 7878    match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7836 7879    ins_cost(200);
7837 7880    format %{ "CMOV$cop $dst,$src\t# ptr" %}
7838 7881    opcode(0x0F,0x40);
7839 7882    ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7840 7883    ins_pipe( pipe_cmov_reg );
7841 7884  %}
7842 7885  
7843 7886  instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7844 7887    predicate(VM_Version::supports_cmov() );
7845 7888    match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7846 7889    ins_cost(200);
7847 7890    expand %{
7848 7891      cmovP_regU(cop, cr, dst, src);
7849 7892    %}
7850 7893  %}
7851 7894  
7852 7895  // DISABLED: Requires the ADLC to emit a bottom_type call that
7853 7896  // correctly meets the two pointer arguments; one is an incoming
7854 7897  // register but the other is a memory operand.  ALSO appears to
7855 7898  // be buggy with implicit null checks.
7856 7899  //
7857 7900  //// Conditional move
7858 7901  //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
7859 7902  //  predicate(VM_Version::supports_cmov() );
7860 7903  //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7861 7904  //  ins_cost(250);
7862 7905  //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7863 7906  //  opcode(0x0F,0x40);
7864 7907  //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7865 7908  //  ins_pipe( pipe_cmov_mem );
7866 7909  //%}
7867 7910  //
7868 7911  //// Conditional move
7869 7912  //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
7870 7913  //  predicate(VM_Version::supports_cmov() );
7871 7914  //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7872 7915  //  ins_cost(250);
7873 7916  //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7874 7917  //  opcode(0x0F,0x40);
7875 7918  //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7876 7919  //  ins_pipe( pipe_cmov_mem );
7877 7920  //%}
7878 7921  
7879 7922  // Conditional move
7880 7923  instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
7881 7924    predicate(UseSSE<=1);
7882 7925    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7883 7926    ins_cost(200);
7884 7927    format %{ "FCMOV$cop $dst,$src\t# double" %}
7885 7928    opcode(0xDA);
7886 7929    ins_encode( enc_cmov_d(cop,src) );
7887 7930    ins_pipe( pipe_cmovD_reg );
7888 7931  %}
7889 7932  
7890 7933  // Conditional move
7891 7934  instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
7892 7935    predicate(UseSSE==0);
7893 7936    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7894 7937    ins_cost(200);
7895 7938    format %{ "FCMOV$cop $dst,$src\t# float" %}
7896 7939    opcode(0xDA);
7897 7940    ins_encode( enc_cmov_d(cop,src) );
7898 7941    ins_pipe( pipe_cmovD_reg );
7899 7942  %}
7900 7943  
7901 7944  // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7902 7945  instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7903 7946    predicate(UseSSE<=1);
7904 7947    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7905 7948    ins_cost(200);
7906 7949    format %{ "Jn$cop   skip\n\t"
7907 7950              "MOV    $dst,$src\t# double\n"
7908 7951        "skip:" %}
7909 7952    opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7910 7953    ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
7911 7954    ins_pipe( pipe_cmovD_reg );
7912 7955  %}
7913 7956  
7914 7957  // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7915 7958  instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7916 7959    predicate(UseSSE==0);
7917 7960    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7918 7961    ins_cost(200);
7919 7962    format %{ "Jn$cop    skip\n\t"
7920 7963              "MOV    $dst,$src\t# float\n"
7921 7964        "skip:" %}
7922 7965    opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7923 7966    ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
7924 7967    ins_pipe( pipe_cmovD_reg );
7925 7968  %}
7926 7969  
7927 7970  // No CMOVE with SSE/SSE2
7928 7971  instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
7929 7972    predicate (UseSSE>=1);
7930 7973    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7931 7974    ins_cost(200);
7932 7975    format %{ "Jn$cop   skip\n\t"
7933 7976              "MOVSS  $dst,$src\t# float\n"
7934 7977        "skip:" %}
7935 7978    ins_encode %{
7936 7979      Label skip;
7937 7980      // Invert sense of branch from sense of CMOV
7938 7981      __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7939 7982      __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7940 7983      __ bind(skip);
7941 7984    %}
7942 7985    ins_pipe( pipe_slow );
7943 7986  %}
7944 7987  
7945 7988  // No CMOVE with SSE/SSE2
7946 7989  instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
7947 7990    predicate (UseSSE>=2);
7948 7991    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7949 7992    ins_cost(200);
7950 7993    format %{ "Jn$cop   skip\n\t"
7951 7994              "MOVSD  $dst,$src\t# float\n"
7952 7995        "skip:" %}
7953 7996    ins_encode %{
7954 7997      Label skip;
7955 7998      // Invert sense of branch from sense of CMOV
7956 7999      __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7957 8000      __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7958 8001      __ bind(skip);
7959 8002    %}
7960 8003    ins_pipe( pipe_slow );
7961 8004  %}
7962 8005  
7963 8006  // unsigned version
7964 8007  instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
7965 8008    predicate (UseSSE>=1);
7966 8009    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7967 8010    ins_cost(200);
7968 8011    format %{ "Jn$cop   skip\n\t"
7969 8012              "MOVSS  $dst,$src\t# float\n"
7970 8013        "skip:" %}
7971 8014    ins_encode %{
7972 8015      Label skip;
7973 8016      // Invert sense of branch from sense of CMOV
7974 8017      __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7975 8018      __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7976 8019      __ bind(skip);
7977 8020    %}
7978 8021    ins_pipe( pipe_slow );
7979 8022  %}
7980 8023  
7981 8024  instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
7982 8025    predicate (UseSSE>=1);
7983 8026    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7984 8027    ins_cost(200);
7985 8028    expand %{
7986 8029      fcmovX_regU(cop, cr, dst, src);
7987 8030    %}
7988 8031  %}
7989 8032  
7990 8033  // unsigned version
7991 8034  instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
7992 8035    predicate (UseSSE>=2);
7993 8036    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7994 8037    ins_cost(200);
7995 8038    format %{ "Jn$cop   skip\n\t"
7996 8039              "MOVSD  $dst,$src\t# float\n"
7997 8040        "skip:" %}
7998 8041    ins_encode %{
7999 8042      Label skip;
8000 8043      // Invert sense of branch from sense of CMOV
8001 8044      __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8002 8045      __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8003 8046      __ bind(skip);
8004 8047    %}
8005 8048    ins_pipe( pipe_slow );
8006 8049  %}
8007 8050  
8008 8051  instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
8009 8052    predicate (UseSSE>=2);
8010 8053    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8011 8054    ins_cost(200);
8012 8055    expand %{
8013 8056      fcmovXD_regU(cop, cr, dst, src);
8014 8057    %}
8015 8058  %}
8016 8059  
8017 8060  instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
8018 8061    predicate(VM_Version::supports_cmov() );
8019 8062    match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8020 8063    ins_cost(200);
8021 8064    format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8022 8065              "CMOV$cop $dst.hi,$src.hi" %}
8023 8066    opcode(0x0F,0x40);
8024 8067    ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8025 8068    ins_pipe( pipe_cmov_reg_long );
8026 8069  %}
8027 8070  
8028 8071  instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
8029 8072    predicate(VM_Version::supports_cmov() );
8030 8073    match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8031 8074    ins_cost(200);
8032 8075    format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8033 8076              "CMOV$cop $dst.hi,$src.hi" %}
8034 8077    opcode(0x0F,0x40);
8035 8078    ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8036 8079    ins_pipe( pipe_cmov_reg_long );
8037 8080  %}
8038 8081  
8039 8082  instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
8040 8083    predicate(VM_Version::supports_cmov() );
8041 8084    match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8042 8085    ins_cost(200);
8043 8086    expand %{
8044 8087      cmovL_regU(cop, cr, dst, src);
8045 8088    %}
8046 8089  %}
8047 8090  
8048 8091  //----------Arithmetic Instructions--------------------------------------------
8049 8092  //----------Addition Instructions----------------------------------------------
8050 8093  // Integer Addition Instructions
8051 8094  instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8052 8095    match(Set dst (AddI dst src));
8053 8096    effect(KILL cr);
8054 8097  
8055 8098    size(2);
8056 8099    format %{ "ADD    $dst,$src" %}
8057 8100    opcode(0x03);
8058 8101    ins_encode( OpcP, RegReg( dst, src) );
8059 8102    ins_pipe( ialu_reg_reg );
8060 8103  %}
8061 8104  
8062 8105  instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8063 8106    match(Set dst (AddI dst src));
8064 8107    effect(KILL cr);
8065 8108  
8066 8109    format %{ "ADD    $dst,$src" %}
8067 8110    opcode(0x81, 0x00); /* /0 id */
8068 8111    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8069 8112    ins_pipe( ialu_reg );
8070 8113  %}
8071 8114  
8072 8115  instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
8073 8116    predicate(UseIncDec);
8074 8117    match(Set dst (AddI dst src));
8075 8118    effect(KILL cr);
8076 8119  
8077 8120    size(1);
8078 8121    format %{ "INC    $dst" %}
8079 8122    opcode(0x40); /*  */
8080 8123    ins_encode( Opc_plus( primary, dst ) );
8081 8124    ins_pipe( ialu_reg );
8082 8125  %}
8083 8126  
8084 8127  instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
8085 8128    match(Set dst (AddI src0 src1));
8086 8129    ins_cost(110);
8087 8130  
8088 8131    format %{ "LEA    $dst,[$src0 + $src1]" %}
8089 8132    opcode(0x8D); /* 0x8D /r */
8090 8133    ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8091 8134    ins_pipe( ialu_reg_reg );
8092 8135  %}
8093 8136  
8094 8137  instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
8095 8138    match(Set dst (AddP src0 src1));
8096 8139    ins_cost(110);
8097 8140  
8098 8141    format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
8099 8142    opcode(0x8D); /* 0x8D /r */
8100 8143    ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8101 8144    ins_pipe( ialu_reg_reg );
8102 8145  %}
8103 8146  
8104 8147  instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
8105 8148    predicate(UseIncDec);
8106 8149    match(Set dst (AddI dst src));
8107 8150    effect(KILL cr);
8108 8151  
8109 8152    size(1);
8110 8153    format %{ "DEC    $dst" %}
8111 8154    opcode(0x48); /*  */
8112 8155    ins_encode( Opc_plus( primary, dst ) );
8113 8156    ins_pipe( ialu_reg );
8114 8157  %}
8115 8158  
8116 8159  instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
8117 8160    match(Set dst (AddP dst src));
8118 8161    effect(KILL cr);
8119 8162  
8120 8163    size(2);
8121 8164    format %{ "ADD    $dst,$src" %}
8122 8165    opcode(0x03);
8123 8166    ins_encode( OpcP, RegReg( dst, src) );
8124 8167    ins_pipe( ialu_reg_reg );
8125 8168  %}
8126 8169  
8127 8170  instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
8128 8171    match(Set dst (AddP dst src));
8129 8172    effect(KILL cr);
8130 8173  
8131 8174    format %{ "ADD    $dst,$src" %}
8132 8175    opcode(0x81,0x00); /* Opcode 81 /0 id */
8133 8176    // ins_encode( RegImm( dst, src) );
8134 8177    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8135 8178    ins_pipe( ialu_reg );
8136 8179  %}
8137 8180  
8138 8181  instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8139 8182    match(Set dst (AddI dst (LoadI src)));
8140 8183    effect(KILL cr);
8141 8184  
8142 8185    ins_cost(125);
8143 8186    format %{ "ADD    $dst,$src" %}
8144 8187    opcode(0x03);
8145 8188    ins_encode( OpcP, RegMem( dst, src) );
8146 8189    ins_pipe( ialu_reg_mem );
8147 8190  %}
8148 8191  
8149 8192  instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8150 8193    match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8151 8194    effect(KILL cr);
8152 8195  
8153 8196    ins_cost(150);
8154 8197    format %{ "ADD    $dst,$src" %}
8155 8198    opcode(0x01);  /* Opcode 01 /r */
8156 8199    ins_encode( OpcP, RegMem( src, dst ) );
8157 8200    ins_pipe( ialu_mem_reg );
8158 8201  %}
8159 8202  
8160 8203  // Add Memory with Immediate
8161 8204  instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8162 8205    match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8163 8206    effect(KILL cr);
8164 8207  
8165 8208    ins_cost(125);
8166 8209    format %{ "ADD    $dst,$src" %}
8167 8210    opcode(0x81);               /* Opcode 81 /0 id */
8168 8211    ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
8169 8212    ins_pipe( ialu_mem_imm );
8170 8213  %}
8171 8214  
8172 8215  instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
8173 8216    match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8174 8217    effect(KILL cr);
8175 8218  
8176 8219    ins_cost(125);
8177 8220    format %{ "INC    $dst" %}
8178 8221    opcode(0xFF);               /* Opcode FF /0 */
8179 8222    ins_encode( OpcP, RMopc_Mem(0x00,dst));
8180 8223    ins_pipe( ialu_mem_imm );
8181 8224  %}
8182 8225  
8183 8226  instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
8184 8227    match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8185 8228    effect(KILL cr);
8186 8229  
8187 8230    ins_cost(125);
8188 8231    format %{ "DEC    $dst" %}
8189 8232    opcode(0xFF);               /* Opcode FF /1 */
8190 8233    ins_encode( OpcP, RMopc_Mem(0x01,dst));
8191 8234    ins_pipe( ialu_mem_imm );
8192 8235  %}
8193 8236  
8194 8237  
8195 8238  instruct checkCastPP( eRegP dst ) %{
8196 8239    match(Set dst (CheckCastPP dst));
8197 8240  
8198 8241    size(0);
8199 8242    format %{ "#checkcastPP of $dst" %}
8200 8243    ins_encode( /*empty encoding*/ );
8201 8244    ins_pipe( empty );
8202 8245  %}
8203 8246  
8204 8247  instruct castPP( eRegP dst ) %{
8205 8248    match(Set dst (CastPP dst));
8206 8249    format %{ "#castPP of $dst" %}
8207 8250    ins_encode( /*empty encoding*/ );
8208 8251    ins_pipe( empty );
8209 8252  %}
8210 8253  
8211 8254  instruct castII( eRegI dst ) %{
8212 8255    match(Set dst (CastII dst));
8213 8256    format %{ "#castII of $dst" %}
8214 8257    ins_encode( /*empty encoding*/ );
8215 8258    ins_cost(0);
8216 8259    ins_pipe( empty );
8217 8260  %}
8218 8261  
8219 8262  
8220 8263  // Load-locked - same as a regular pointer load when used with compare-swap
8221 8264  instruct loadPLocked(eRegP dst, memory mem) %{
8222 8265    match(Set dst (LoadPLocked mem));
8223 8266  
8224 8267    ins_cost(125);
8225 8268    format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
8226 8269    opcode(0x8B);
8227 8270    ins_encode( OpcP, RegMem(dst,mem));
8228 8271    ins_pipe( ialu_reg_mem );
8229 8272  %}
8230 8273  
8231 8274  // LoadLong-locked - same as a volatile long load when used with compare-swap
8232 8275  instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
8233 8276    predicate(UseSSE<=1);
8234 8277    match(Set dst (LoadLLocked mem));
8235 8278  
8236 8279    ins_cost(200);
8237 8280    format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
8238 8281              "FISTp  $dst" %}
8239 8282    ins_encode(enc_loadL_volatile(mem,dst));
8240 8283    ins_pipe( fpu_reg_mem );
8241 8284  %}
8242 8285  
8243 8286  instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
8244 8287    predicate(UseSSE>=2);
8245 8288    match(Set dst (LoadLLocked mem));
8246 8289    effect(TEMP tmp);
8247 8290    ins_cost(180);
8248 8291    format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8249 8292              "MOVSD  $dst,$tmp" %}
8250 8293    ins_encode(enc_loadLX_volatile(mem, dst, tmp));
8251 8294    ins_pipe( pipe_slow );
8252 8295  %}
8253 8296  
8254 8297  instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
8255 8298    predicate(UseSSE>=2);
8256 8299    match(Set dst (LoadLLocked mem));
8257 8300    effect(TEMP tmp);
8258 8301    ins_cost(160);
8259 8302    format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8260 8303              "MOVD   $dst.lo,$tmp\n\t"
8261 8304              "PSRLQ  $tmp,32\n\t"
8262 8305              "MOVD   $dst.hi,$tmp" %}
8263 8306    ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
8264 8307    ins_pipe( pipe_slow );
8265 8308  %}
8266 8309  
8267 8310  // Conditional-store of the updated heap-top.
8268 8311  // Used during allocation of the shared heap.
8269 8312  // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8270 8313  instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
8271 8314    match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8272 8315    // EAX is killed if there is contention, but then it's also unused.
8273 8316    // In the common case of no contention, EAX holds the new oop address.
8274 8317    format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
8275 8318    ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
8276 8319    ins_pipe( pipe_cmpxchg );
8277 8320  %}
8278 8321  
8279 8322  // Conditional-store of an int value.
8280 8323  // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
8281 8324  instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
8282 8325    match(Set cr (StoreIConditional mem (Binary oldval newval)));
8283 8326    effect(KILL oldval);
8284 8327    format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
8285 8328    ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
8286 8329    ins_pipe( pipe_cmpxchg );
8287 8330  %}
8288 8331  
8289 8332  // Conditional-store of a long value.
8290 8333  // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
8291 8334  instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8292 8335    match(Set cr (StoreLConditional mem (Binary oldval newval)));
8293 8336    effect(KILL oldval);
8294 8337    format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
8295 8338              "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
8296 8339              "XCHG   EBX,ECX"
8297 8340    %}
8298 8341    ins_encode %{
8299 8342      // Note: we need to swap rbx, and rcx before and after the
8300 8343      //       cmpxchg8 instruction because the instruction uses
8301 8344      //       rcx as the high order word of the new value to store but
8302 8345      //       our register encoding uses rbx.
8303 8346      __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8304 8347      if( os::is_MP() )
8305 8348        __ lock();
8306 8349      __ cmpxchg8($mem$$Address);
8307 8350      __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8308 8351    %}
8309 8352    ins_pipe( pipe_cmpxchg );
8310 8353  %}
8311 8354  
8312 8355  // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8313 8356  
8314 8357  instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8315 8358    match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8316 8359    effect(KILL cr, KILL oldval);
8317 8360    format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8318 8361              "MOV    $res,0\n\t"
8319 8362              "JNE,s  fail\n\t"
8320 8363              "MOV    $res,1\n"
8321 8364            "fail:" %}
8322 8365    ins_encode( enc_cmpxchg8(mem_ptr),
8323 8366                enc_flags_ne_to_boolean(res) );
8324 8367    ins_pipe( pipe_cmpxchg );
8325 8368  %}
8326 8369  
8327 8370  instruct compareAndSwapP( eRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
8328 8371    match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8329 8372    effect(KILL cr, KILL oldval);
8330 8373    format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8331 8374              "MOV    $res,0\n\t"
8332 8375              "JNE,s  fail\n\t"
8333 8376              "MOV    $res,1\n"
8334 8377            "fail:" %}
8335 8378    ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8336 8379    ins_pipe( pipe_cmpxchg );
8337 8380  %}
8338 8381  
8339 8382  instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
8340 8383    match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8341 8384    effect(KILL cr, KILL oldval);
8342 8385    format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8343 8386              "MOV    $res,0\n\t"
8344 8387              "JNE,s  fail\n\t"
8345 8388              "MOV    $res,1\n"
8346 8389            "fail:" %}
8347 8390    ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8348 8391    ins_pipe( pipe_cmpxchg );
8349 8392  %}
8350 8393  
8351 8394  //----------Subtraction Instructions-------------------------------------------
8352 8395  // Integer Subtraction Instructions
8353 8396  instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8354 8397    match(Set dst (SubI dst src));
8355 8398    effect(KILL cr);
8356 8399  
8357 8400    size(2);
8358 8401    format %{ "SUB    $dst,$src" %}
8359 8402    opcode(0x2B);
8360 8403    ins_encode( OpcP, RegReg( dst, src) );
8361 8404    ins_pipe( ialu_reg_reg );
8362 8405  %}
8363 8406  
8364 8407  instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8365 8408    match(Set dst (SubI dst src));
8366 8409    effect(KILL cr);
8367 8410  
8368 8411    format %{ "SUB    $dst,$src" %}
8369 8412    opcode(0x81,0x05);  /* Opcode 81 /5 */
8370 8413    // ins_encode( RegImm( dst, src) );
8371 8414    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8372 8415    ins_pipe( ialu_reg );
8373 8416  %}
8374 8417  
8375 8418  instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8376 8419    match(Set dst (SubI dst (LoadI src)));
8377 8420    effect(KILL cr);
8378 8421  
8379 8422    ins_cost(125);
8380 8423    format %{ "SUB    $dst,$src" %}
8381 8424    opcode(0x2B);
8382 8425    ins_encode( OpcP, RegMem( dst, src) );
8383 8426    ins_pipe( ialu_reg_mem );
8384 8427  %}
8385 8428  
8386 8429  instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8387 8430    match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8388 8431    effect(KILL cr);
8389 8432  
8390 8433    ins_cost(150);
8391 8434    format %{ "SUB    $dst,$src" %}
8392 8435    opcode(0x29);  /* Opcode 29 /r */
8393 8436    ins_encode( OpcP, RegMem( src, dst ) );
8394 8437    ins_pipe( ialu_mem_reg );
8395 8438  %}
8396 8439  
8397 8440  // Subtract from a pointer
8398 8441  instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
8399 8442    match(Set dst (AddP dst (SubI zero src)));
8400 8443    effect(KILL cr);
8401 8444  
8402 8445    size(2);
8403 8446    format %{ "SUB    $dst,$src" %}
8404 8447    opcode(0x2B);
8405 8448    ins_encode( OpcP, RegReg( dst, src) );
8406 8449    ins_pipe( ialu_reg_reg );
8407 8450  %}
8408 8451  
8409 8452  instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
8410 8453    match(Set dst (SubI zero dst));
8411 8454    effect(KILL cr);
8412 8455  
8413 8456    size(2);
8414 8457    format %{ "NEG    $dst" %}
8415 8458    opcode(0xF7,0x03);  // Opcode F7 /3
8416 8459    ins_encode( OpcP, RegOpc( dst ) );
8417 8460    ins_pipe( ialu_reg );
8418 8461  %}
8419 8462  
8420 8463  
8421 8464  //----------Multiplication/Division Instructions-------------------------------
8422 8465  // Integer Multiplication Instructions
8423 8466  // Multiply Register
8424 8467  instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8425 8468    match(Set dst (MulI dst src));
8426 8469    effect(KILL cr);
8427 8470  
8428 8471    size(3);
8429 8472    ins_cost(300);
8430 8473    format %{ "IMUL   $dst,$src" %}
8431 8474    opcode(0xAF, 0x0F);
8432 8475    ins_encode( OpcS, OpcP, RegReg( dst, src) );
8433 8476    ins_pipe( ialu_reg_reg_alu0 );
8434 8477  %}
8435 8478  
8436 8479  // Multiply 32-bit Immediate
8437 8480  instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
8438 8481    match(Set dst (MulI src imm));
8439 8482    effect(KILL cr);
8440 8483  
8441 8484    ins_cost(300);
8442 8485    format %{ "IMUL   $dst,$src,$imm" %}
8443 8486    opcode(0x69);  /* 69 /r id */
8444 8487    ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
8445 8488    ins_pipe( ialu_reg_reg_alu0 );
8446 8489  %}
8447 8490  
8448 8491  instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
8449 8492    match(Set dst src);
8450 8493    effect(KILL cr);
8451 8494  
8452 8495    // Note that this is artificially increased to make it more expensive than loadConL
8453 8496    ins_cost(250);
8454 8497    format %{ "MOV    EAX,$src\t// low word only" %}
8455 8498    opcode(0xB8);
8456 8499    ins_encode( LdImmL_Lo(dst, src) );
8457 8500    ins_pipe( ialu_reg_fat );
8458 8501  %}
8459 8502  
8460 8503  // Multiply by 32-bit Immediate, taking the shifted high order results
8461 8504  //  (special case for shift by 32)
8462 8505  instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
8463 8506    match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8464 8507    predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8465 8508               _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8466 8509               _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8467 8510    effect(USE src1, KILL cr);
8468 8511  
8469 8512    // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8470 8513    ins_cost(0*100 + 1*400 - 150);
8471 8514    format %{ "IMUL   EDX:EAX,$src1" %}
8472 8515    ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8473 8516    ins_pipe( pipe_slow );
8474 8517  %}
8475 8518  
8476 8519  // Multiply by 32-bit Immediate, taking the shifted high order results
8477 8520  instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
8478 8521    match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8479 8522    predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8480 8523               _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8481 8524               _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8482 8525    effect(USE src1, KILL cr);
8483 8526  
8484 8527    // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8485 8528    ins_cost(1*100 + 1*400 - 150);
8486 8529    format %{ "IMUL   EDX:EAX,$src1\n\t"
8487 8530              "SAR    EDX,$cnt-32" %}
8488 8531    ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8489 8532    ins_pipe( pipe_slow );
8490 8533  %}
8491 8534  
8492 8535  // Multiply Memory 32-bit Immediate
8493 8536  instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
8494 8537    match(Set dst (MulI (LoadI src) imm));
8495 8538    effect(KILL cr);
8496 8539  
8497 8540    ins_cost(300);
8498 8541    format %{ "IMUL   $dst,$src,$imm" %}
8499 8542    opcode(0x69);  /* 69 /r id */
8500 8543    ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
8501 8544    ins_pipe( ialu_reg_mem_alu0 );
8502 8545  %}
8503 8546  
8504 8547  // Multiply Memory
8505 8548  instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
8506 8549    match(Set dst (MulI dst (LoadI src)));
8507 8550    effect(KILL cr);
8508 8551  
8509 8552    ins_cost(350);
8510 8553    format %{ "IMUL   $dst,$src" %}
8511 8554    opcode(0xAF, 0x0F);
8512 8555    ins_encode( OpcS, OpcP, RegMem( dst, src) );
8513 8556    ins_pipe( ialu_reg_mem_alu0 );
8514 8557  %}
8515 8558  
8516 8559  // Multiply Register Int to Long
8517 8560  instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
8518 8561    // Basic Idea: long = (long)int * (long)int
8519 8562    match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
8520 8563    effect(DEF dst, USE src, USE src1, KILL flags);
8521 8564  
8522 8565    ins_cost(300);
8523 8566    format %{ "IMUL   $dst,$src1" %}
8524 8567  
8525 8568    ins_encode( long_int_multiply( dst, src1 ) );
8526 8569    ins_pipe( ialu_reg_reg_alu0 );
8527 8570  %}
8528 8571  
8529 8572  instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
8530 8573    // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
8531 8574    match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
8532 8575    effect(KILL flags);
8533 8576  
8534 8577    ins_cost(300);
8535 8578    format %{ "MUL    $dst,$src1" %}
8536 8579  
8537 8580    ins_encode( long_uint_multiply(dst, src1) );
8538 8581    ins_pipe( ialu_reg_reg_alu0 );
8539 8582  %}
8540 8583  
8541 8584  // Multiply Register Long
8542 8585  instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8543 8586    match(Set dst (MulL dst src));
8544 8587    effect(KILL cr, TEMP tmp);
8545 8588    ins_cost(4*100+3*400);
8546 8589  // Basic idea: lo(result) = lo(x_lo * y_lo)
8547 8590  //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
8548 8591    format %{ "MOV    $tmp,$src.lo\n\t"
8549 8592              "IMUL   $tmp,EDX\n\t"
8550 8593              "MOV    EDX,$src.hi\n\t"
8551 8594              "IMUL   EDX,EAX\n\t"
8552 8595              "ADD    $tmp,EDX\n\t"
8553 8596              "MUL    EDX:EAX,$src.lo\n\t"
8554 8597              "ADD    EDX,$tmp" %}
8555 8598    ins_encode( long_multiply( dst, src, tmp ) );
8556 8599    ins_pipe( pipe_slow );
8557 8600  %}
8558 8601  
8559 8602  // Multiply Register Long by small constant
8560 8603  instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
8561 8604    match(Set dst (MulL dst src));
8562 8605    effect(KILL cr, TEMP tmp);
8563 8606    ins_cost(2*100+2*400);
8564 8607    size(12);
8565 8608  // Basic idea: lo(result) = lo(src * EAX)
8566 8609  //             hi(result) = hi(src * EAX) + lo(src * EDX)
8567 8610    format %{ "IMUL   $tmp,EDX,$src\n\t"
8568 8611              "MOV    EDX,$src\n\t"
8569 8612              "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
8570 8613              "ADD    EDX,$tmp" %}
8571 8614    ins_encode( long_multiply_con( dst, src, tmp ) );
8572 8615    ins_pipe( pipe_slow );
8573 8616  %}
8574 8617  
8575 8618  // Integer DIV with Register
8576 8619  instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8577 8620    match(Set rax (DivI rax div));
8578 8621    effect(KILL rdx, KILL cr);
8579 8622    size(26);
8580 8623    ins_cost(30*100+10*100);
8581 8624    format %{ "CMP    EAX,0x80000000\n\t"
8582 8625              "JNE,s  normal\n\t"
8583 8626              "XOR    EDX,EDX\n\t"
8584 8627              "CMP    ECX,-1\n\t"
8585 8628              "JE,s   done\n"
8586 8629      "normal: CDQ\n\t"
8587 8630              "IDIV   $div\n\t"
8588 8631      "done:"        %}
8589 8632    opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8590 8633    ins_encode( cdq_enc, OpcP, RegOpc(div) );
8591 8634    ins_pipe( ialu_reg_reg_alu0 );
8592 8635  %}
8593 8636  
8594 8637  // Divide Register Long
8595 8638  instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8596 8639    match(Set dst (DivL src1 src2));
8597 8640    effect( KILL cr, KILL cx, KILL bx );
8598 8641    ins_cost(10000);
8599 8642    format %{ "PUSH   $src1.hi\n\t"
8600 8643              "PUSH   $src1.lo\n\t"
8601 8644              "PUSH   $src2.hi\n\t"
8602 8645              "PUSH   $src2.lo\n\t"
8603 8646              "CALL   SharedRuntime::ldiv\n\t"
8604 8647              "ADD    ESP,16" %}
8605 8648    ins_encode( long_div(src1,src2) );
8606 8649    ins_pipe( pipe_slow );
8607 8650  %}
8608 8651  
8609 8652  // Integer DIVMOD with Register, both quotient and mod results
8610 8653  instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8611 8654    match(DivModI rax div);
8612 8655    effect(KILL cr);
8613 8656    size(26);
8614 8657    ins_cost(30*100+10*100);
8615 8658    format %{ "CMP    EAX,0x80000000\n\t"
8616 8659              "JNE,s  normal\n\t"
8617 8660              "XOR    EDX,EDX\n\t"
8618 8661              "CMP    ECX,-1\n\t"
8619 8662              "JE,s   done\n"
8620 8663      "normal: CDQ\n\t"
8621 8664              "IDIV   $div\n\t"
8622 8665      "done:"        %}
8623 8666    opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8624 8667    ins_encode( cdq_enc, OpcP, RegOpc(div) );
8625 8668    ins_pipe( pipe_slow );
8626 8669  %}
8627 8670  
8628 8671  // Integer MOD with Register
8629 8672  instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8630 8673    match(Set rdx (ModI rax div));
8631 8674    effect(KILL rax, KILL cr);
8632 8675  
8633 8676    size(26);
8634 8677    ins_cost(300);
8635 8678    format %{ "CDQ\n\t"
8636 8679              "IDIV   $div" %}
8637 8680    opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8638 8681    ins_encode( cdq_enc, OpcP, RegOpc(div) );
8639 8682    ins_pipe( ialu_reg_reg_alu0 );
8640 8683  %}
8641 8684  
8642 8685  // Remainder Register Long
8643 8686  instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8644 8687    match(Set dst (ModL src1 src2));
8645 8688    effect( KILL cr, KILL cx, KILL bx );
8646 8689    ins_cost(10000);
8647 8690    format %{ "PUSH   $src1.hi\n\t"
8648 8691              "PUSH   $src1.lo\n\t"
8649 8692              "PUSH   $src2.hi\n\t"
8650 8693              "PUSH   $src2.lo\n\t"
8651 8694              "CALL   SharedRuntime::lrem\n\t"
8652 8695              "ADD    ESP,16" %}
8653 8696    ins_encode( long_mod(src1,src2) );
8654 8697    ins_pipe( pipe_slow );
8655 8698  %}
8656 8699  
8657 8700  // Integer Shift Instructions
8658 8701  // Shift Left by one
8659 8702  instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8660 8703    match(Set dst (LShiftI dst shift));
8661 8704    effect(KILL cr);
8662 8705  
8663 8706    size(2);
8664 8707    format %{ "SHL    $dst,$shift" %}
8665 8708    opcode(0xD1, 0x4);  /* D1 /4 */
8666 8709    ins_encode( OpcP, RegOpc( dst ) );
8667 8710    ins_pipe( ialu_reg );
8668 8711  %}
8669 8712  
8670 8713  // Shift Left by 8-bit immediate
8671 8714  instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8672 8715    match(Set dst (LShiftI dst shift));
8673 8716    effect(KILL cr);
8674 8717  
8675 8718    size(3);
8676 8719    format %{ "SHL    $dst,$shift" %}
8677 8720    opcode(0xC1, 0x4);  /* C1 /4 ib */
8678 8721    ins_encode( RegOpcImm( dst, shift) );
8679 8722    ins_pipe( ialu_reg );
8680 8723  %}
8681 8724  
8682 8725  // Shift Left by variable
8683 8726  instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8684 8727    match(Set dst (LShiftI dst shift));
8685 8728    effect(KILL cr);
8686 8729  
8687 8730    size(2);
8688 8731    format %{ "SHL    $dst,$shift" %}
8689 8732    opcode(0xD3, 0x4);  /* D3 /4 */
8690 8733    ins_encode( OpcP, RegOpc( dst ) );
8691 8734    ins_pipe( ialu_reg_reg );
8692 8735  %}
8693 8736  
8694 8737  // Arithmetic shift right by one
8695 8738  instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8696 8739    match(Set dst (RShiftI dst shift));
8697 8740    effect(KILL cr);
8698 8741  
8699 8742    size(2);
8700 8743    format %{ "SAR    $dst,$shift" %}
8701 8744    opcode(0xD1, 0x7);  /* D1 /7 */
8702 8745    ins_encode( OpcP, RegOpc( dst ) );
8703 8746    ins_pipe( ialu_reg );
8704 8747  %}
8705 8748  
8706 8749  // Arithmetic shift right by one
8707 8750  instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8708 8751    match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8709 8752    effect(KILL cr);
8710 8753    format %{ "SAR    $dst,$shift" %}
8711 8754    opcode(0xD1, 0x7);  /* D1 /7 */
8712 8755    ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8713 8756    ins_pipe( ialu_mem_imm );
8714 8757  %}
8715 8758  
8716 8759  // Arithmetic Shift Right by 8-bit immediate
8717 8760  instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8718 8761    match(Set dst (RShiftI dst shift));
8719 8762    effect(KILL cr);
8720 8763  
8721 8764    size(3);
8722 8765    format %{ "SAR    $dst,$shift" %}
8723 8766    opcode(0xC1, 0x7);  /* C1 /7 ib */
8724 8767    ins_encode( RegOpcImm( dst, shift ) );
8725 8768    ins_pipe( ialu_mem_imm );
8726 8769  %}
8727 8770  
8728 8771  // Arithmetic Shift Right by 8-bit immediate
8729 8772  instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8730 8773    match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8731 8774    effect(KILL cr);
8732 8775  
8733 8776    format %{ "SAR    $dst,$shift" %}
8734 8777    opcode(0xC1, 0x7);  /* C1 /7 ib */
8735 8778    ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8736 8779    ins_pipe( ialu_mem_imm );
8737 8780  %}
8738 8781  
8739 8782  // Arithmetic Shift Right by variable
8740 8783  instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8741 8784    match(Set dst (RShiftI dst shift));
8742 8785    effect(KILL cr);
8743 8786  
8744 8787    size(2);
8745 8788    format %{ "SAR    $dst,$shift" %}
8746 8789    opcode(0xD3, 0x7);  /* D3 /7 */
8747 8790    ins_encode( OpcP, RegOpc( dst ) );
8748 8791    ins_pipe( ialu_reg_reg );
8749 8792  %}
8750 8793  
8751 8794  // Logical shift right by one
8752 8795  instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8753 8796    match(Set dst (URShiftI dst shift));
8754 8797    effect(KILL cr);
8755 8798  
8756 8799    size(2);
8757 8800    format %{ "SHR    $dst,$shift" %}
8758 8801    opcode(0xD1, 0x5);  /* D1 /5 */
8759 8802    ins_encode( OpcP, RegOpc( dst ) );
8760 8803    ins_pipe( ialu_reg );
8761 8804  %}
8762 8805  
8763 8806  // Logical Shift Right by 8-bit immediate
8764 8807  instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8765 8808    match(Set dst (URShiftI dst shift));
8766 8809    effect(KILL cr);
8767 8810  
8768 8811    size(3);
8769 8812    format %{ "SHR    $dst,$shift" %}
8770 8813    opcode(0xC1, 0x5);  /* C1 /5 ib */
8771 8814    ins_encode( RegOpcImm( dst, shift) );
8772 8815    ins_pipe( ialu_reg );
8773 8816  %}
8774 8817  
8775 8818  
8776 8819  // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8777 8820  // This idiom is used by the compiler for the i2b bytecode.
8778 8821  instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
8779 8822    match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8780 8823  
8781 8824    size(3);
8782 8825    format %{ "MOVSX  $dst,$src :8" %}
8783 8826    ins_encode %{
8784 8827      __ movsbl($dst$$Register, $src$$Register);
8785 8828    %}
8786 8829    ins_pipe(ialu_reg_reg);
8787 8830  %}
8788 8831  
8789 8832  // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8790 8833  // This idiom is used by the compiler the i2s bytecode.
8791 8834  instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
8792 8835    match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8793 8836  
8794 8837    size(3);
8795 8838    format %{ "MOVSX  $dst,$src :16" %}
8796 8839    ins_encode %{
8797 8840      __ movswl($dst$$Register, $src$$Register);
8798 8841    %}
8799 8842    ins_pipe(ialu_reg_reg);
8800 8843  %}
8801 8844  
8802 8845  
8803 8846  // Logical Shift Right by variable
8804 8847  instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8805 8848    match(Set dst (URShiftI dst shift));
8806 8849    effect(KILL cr);
8807 8850  
8808 8851    size(2);
8809 8852    format %{ "SHR    $dst,$shift" %}
8810 8853    opcode(0xD3, 0x5);  /* D3 /5 */
8811 8854    ins_encode( OpcP, RegOpc( dst ) );
8812 8855    ins_pipe( ialu_reg_reg );
8813 8856  %}
8814 8857  
8815 8858  
8816 8859  //----------Logical Instructions-----------------------------------------------
8817 8860  //----------Integer Logical Instructions---------------------------------------
8818 8861  // And Instructions
8819 8862  // And Register with Register
8820 8863  instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8821 8864    match(Set dst (AndI dst src));
8822 8865    effect(KILL cr);
8823 8866  
8824 8867    size(2);
8825 8868    format %{ "AND    $dst,$src" %}
8826 8869    opcode(0x23);
8827 8870    ins_encode( OpcP, RegReg( dst, src) );
8828 8871    ins_pipe( ialu_reg_reg );
8829 8872  %}
8830 8873  
8831 8874  // And Register with Immediate
8832 8875  instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8833 8876    match(Set dst (AndI dst src));
8834 8877    effect(KILL cr);
8835 8878  
8836 8879    format %{ "AND    $dst,$src" %}
8837 8880    opcode(0x81,0x04);  /* Opcode 81 /4 */
8838 8881    // ins_encode( RegImm( dst, src) );
8839 8882    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8840 8883    ins_pipe( ialu_reg );
8841 8884  %}
8842 8885  
8843 8886  // And Register with Memory
8844 8887  instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8845 8888    match(Set dst (AndI dst (LoadI src)));
8846 8889    effect(KILL cr);
8847 8890  
8848 8891    ins_cost(125);
8849 8892    format %{ "AND    $dst,$src" %}
8850 8893    opcode(0x23);
8851 8894    ins_encode( OpcP, RegMem( dst, src) );
8852 8895    ins_pipe( ialu_reg_mem );
8853 8896  %}
8854 8897  
8855 8898  // And Memory with Register
8856 8899  instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8857 8900    match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8858 8901    effect(KILL cr);
8859 8902  
8860 8903    ins_cost(150);
8861 8904    format %{ "AND    $dst,$src" %}
8862 8905    opcode(0x21);  /* Opcode 21 /r */
8863 8906    ins_encode( OpcP, RegMem( src, dst ) );
8864 8907    ins_pipe( ialu_mem_reg );
8865 8908  %}
8866 8909  
8867 8910  // And Memory with Immediate
8868 8911  instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8869 8912    match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8870 8913    effect(KILL cr);
8871 8914  
8872 8915    ins_cost(125);
8873 8916    format %{ "AND    $dst,$src" %}
8874 8917    opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8875 8918    // ins_encode( MemImm( dst, src) );
8876 8919    ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8877 8920    ins_pipe( ialu_mem_imm );
8878 8921  %}
8879 8922  
8880 8923  // Or Instructions
8881 8924  // Or Register with Register
8882 8925  instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8883 8926    match(Set dst (OrI dst src));
8884 8927    effect(KILL cr);
8885 8928  
8886 8929    size(2);
8887 8930    format %{ "OR     $dst,$src" %}
8888 8931    opcode(0x0B);
8889 8932    ins_encode( OpcP, RegReg( dst, src) );
8890 8933    ins_pipe( ialu_reg_reg );
8891 8934  %}
8892 8935  
8893 8936  instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
8894 8937    match(Set dst (OrI dst (CastP2X src)));
8895 8938    effect(KILL cr);
8896 8939  
8897 8940    size(2);
8898 8941    format %{ "OR     $dst,$src" %}
8899 8942    opcode(0x0B);
8900 8943    ins_encode( OpcP, RegReg( dst, src) );
8901 8944    ins_pipe( ialu_reg_reg );
8902 8945  %}
8903 8946  
8904 8947  
8905 8948  // Or Register with Immediate
8906 8949  instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8907 8950    match(Set dst (OrI dst src));
8908 8951    effect(KILL cr);
8909 8952  
8910 8953    format %{ "OR     $dst,$src" %}
8911 8954    opcode(0x81,0x01);  /* Opcode 81 /1 id */
8912 8955    // ins_encode( RegImm( dst, src) );
8913 8956    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8914 8957    ins_pipe( ialu_reg );
8915 8958  %}
8916 8959  
8917 8960  // Or Register with Memory
8918 8961  instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8919 8962    match(Set dst (OrI dst (LoadI src)));
8920 8963    effect(KILL cr);
8921 8964  
8922 8965    ins_cost(125);
8923 8966    format %{ "OR     $dst,$src" %}
8924 8967    opcode(0x0B);
8925 8968    ins_encode( OpcP, RegMem( dst, src) );
8926 8969    ins_pipe( ialu_reg_mem );
8927 8970  %}
8928 8971  
8929 8972  // Or Memory with Register
8930 8973  instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8931 8974    match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8932 8975    effect(KILL cr);
8933 8976  
8934 8977    ins_cost(150);
8935 8978    format %{ "OR     $dst,$src" %}
8936 8979    opcode(0x09);  /* Opcode 09 /r */
8937 8980    ins_encode( OpcP, RegMem( src, dst ) );
8938 8981    ins_pipe( ialu_mem_reg );
8939 8982  %}
8940 8983  
8941 8984  // Or Memory with Immediate
8942 8985  instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8943 8986    match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8944 8987    effect(KILL cr);
8945 8988  
8946 8989    ins_cost(125);
8947 8990    format %{ "OR     $dst,$src" %}
8948 8991    opcode(0x81,0x1);  /* Opcode 81 /1 id */
8949 8992    // ins_encode( MemImm( dst, src) );
8950 8993    ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8951 8994    ins_pipe( ialu_mem_imm );
8952 8995  %}
8953 8996  
8954 8997  // ROL/ROR
8955 8998  // ROL expand
8956 8999  instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8957 9000    effect(USE_DEF dst, USE shift, KILL cr);
8958 9001  
8959 9002    format %{ "ROL    $dst, $shift" %}
8960 9003    opcode(0xD1, 0x0); /* Opcode D1 /0 */
8961 9004    ins_encode( OpcP, RegOpc( dst ));
8962 9005    ins_pipe( ialu_reg );
8963 9006  %}
8964 9007  
8965 9008  instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
8966 9009    effect(USE_DEF dst, USE shift, KILL cr);
8967 9010  
8968 9011    format %{ "ROL    $dst, $shift" %}
8969 9012    opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8970 9013    ins_encode( RegOpcImm(dst, shift) );
8971 9014    ins_pipe(ialu_reg);
8972 9015  %}
8973 9016  
8974 9017  instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8975 9018    effect(USE_DEF dst, USE shift, KILL cr);
8976 9019  
8977 9020    format %{ "ROL    $dst, $shift" %}
8978 9021    opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8979 9022    ins_encode(OpcP, RegOpc(dst));
8980 9023    ins_pipe( ialu_reg_reg );
8981 9024  %}
8982 9025  // end of ROL expand
8983 9026  
8984 9027  // ROL 32bit by one once
8985 9028  instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8986 9029    match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8987 9030  
8988 9031    expand %{
8989 9032      rolI_eReg_imm1(dst, lshift, cr);
8990 9033    %}
8991 9034  %}
8992 9035  
8993 9036  // ROL 32bit var by imm8 once
8994 9037  instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8995 9038    predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8996 9039    match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8997 9040  
8998 9041    expand %{
8999 9042      rolI_eReg_imm8(dst, lshift, cr);
9000 9043    %}
9001 9044  %}
9002 9045  
9003 9046  // ROL 32bit var by var once
9004 9047  instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9005 9048    match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9006 9049  
9007 9050    expand %{
9008 9051      rolI_eReg_CL(dst, shift, cr);
9009 9052    %}
9010 9053  %}
9011 9054  
9012 9055  // ROL 32bit var by var once
9013 9056  instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9014 9057    match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9015 9058  
9016 9059    expand %{
9017 9060      rolI_eReg_CL(dst, shift, cr);
9018 9061    %}
9019 9062  %}
9020 9063  
9021 9064  // ROR expand
9022 9065  instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9023 9066    effect(USE_DEF dst, USE shift, KILL cr);
9024 9067  
9025 9068    format %{ "ROR    $dst, $shift" %}
9026 9069    opcode(0xD1,0x1);  /* Opcode D1 /1 */
9027 9070    ins_encode( OpcP, RegOpc( dst ) );
9028 9071    ins_pipe( ialu_reg );
9029 9072  %}
9030 9073  
9031 9074  instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9032 9075    effect (USE_DEF dst, USE shift, KILL cr);
9033 9076  
9034 9077    format %{ "ROR    $dst, $shift" %}
9035 9078    opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
9036 9079    ins_encode( RegOpcImm(dst, shift) );
9037 9080    ins_pipe( ialu_reg );
9038 9081  %}
9039 9082  
9040 9083  instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
9041 9084    effect(USE_DEF dst, USE shift, KILL cr);
9042 9085  
9043 9086    format %{ "ROR    $dst, $shift" %}
9044 9087    opcode(0xD3, 0x1);    /* Opcode D3 /1 */
9045 9088    ins_encode(OpcP, RegOpc(dst));
9046 9089    ins_pipe( ialu_reg_reg );
9047 9090  %}
9048 9091  // end of ROR expand
9049 9092  
9050 9093  // ROR right once
9051 9094  instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
9052 9095    match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9053 9096  
9054 9097    expand %{
9055 9098      rorI_eReg_imm1(dst, rshift, cr);
9056 9099    %}
9057 9100  %}
9058 9101  
9059 9102  // ROR 32bit by immI8 once
9060 9103  instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
9061 9104    predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9062 9105    match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9063 9106  
9064 9107    expand %{
9065 9108      rorI_eReg_imm8(dst, rshift, cr);
9066 9109    %}
9067 9110  %}
9068 9111  
9069 9112  // ROR 32bit var by var once
9070 9113  instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9071 9114    match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9072 9115  
9073 9116    expand %{
9074 9117      rorI_eReg_CL(dst, shift, cr);
9075 9118    %}
9076 9119  %}
9077 9120  
9078 9121  // ROR 32bit var by var once
9079 9122  instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9080 9123    match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9081 9124  
9082 9125    expand %{
9083 9126      rorI_eReg_CL(dst, shift, cr);
9084 9127    %}
9085 9128  %}
9086 9129  
9087 9130  // Xor Instructions
9088 9131  // Xor Register with Register
9089 9132  instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9090 9133    match(Set dst (XorI dst src));
9091 9134    effect(KILL cr);
9092 9135  
9093 9136    size(2);
9094 9137    format %{ "XOR    $dst,$src" %}
9095 9138    opcode(0x33);
9096 9139    ins_encode( OpcP, RegReg( dst, src) );
9097 9140    ins_pipe( ialu_reg_reg );
9098 9141  %}
9099 9142  
9100 9143  // Xor Register with Immediate -1
9101 9144  instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
9102 9145    match(Set dst (XorI dst imm));  
9103 9146  
9104 9147    size(2);
9105 9148    format %{ "NOT    $dst" %}  
9106 9149    ins_encode %{
9107 9150       __ notl($dst$$Register);
9108 9151    %}
9109 9152    ins_pipe( ialu_reg );
9110 9153  %}
9111 9154  
9112 9155  // Xor Register with Immediate
9113 9156  instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9114 9157    match(Set dst (XorI dst src));
9115 9158    effect(KILL cr);
9116 9159  
9117 9160    format %{ "XOR    $dst,$src" %}
9118 9161    opcode(0x81,0x06);  /* Opcode 81 /6 id */
9119 9162    // ins_encode( RegImm( dst, src) );
9120 9163    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9121 9164    ins_pipe( ialu_reg );
9122 9165  %}
9123 9166  
9124 9167  // Xor Register with Memory
9125 9168  instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9126 9169    match(Set dst (XorI dst (LoadI src)));
9127 9170    effect(KILL cr);
9128 9171  
9129 9172    ins_cost(125);
9130 9173    format %{ "XOR    $dst,$src" %}
9131 9174    opcode(0x33);
9132 9175    ins_encode( OpcP, RegMem(dst, src) );
9133 9176    ins_pipe( ialu_reg_mem );
9134 9177  %}
9135 9178  
9136 9179  // Xor Memory with Register
9137 9180  instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9138 9181    match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9139 9182    effect(KILL cr);
9140 9183  
9141 9184    ins_cost(150);
9142 9185    format %{ "XOR    $dst,$src" %}
9143 9186    opcode(0x31);  /* Opcode 31 /r */
9144 9187    ins_encode( OpcP, RegMem( src, dst ) );
9145 9188    ins_pipe( ialu_mem_reg );
9146 9189  %}
9147 9190  
9148 9191  // Xor Memory with Immediate
9149 9192  instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9150 9193    match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9151 9194    effect(KILL cr);
9152 9195  
9153 9196    ins_cost(125);
9154 9197    format %{ "XOR    $dst,$src" %}
9155 9198    opcode(0x81,0x6);  /* Opcode 81 /6 id */
9156 9199    ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9157 9200    ins_pipe( ialu_mem_imm );
9158 9201  %}
9159 9202  
9160 9203  //----------Convert Int to Boolean---------------------------------------------
9161 9204  
9162 9205  instruct movI_nocopy(eRegI dst, eRegI src) %{
9163 9206    effect( DEF dst, USE src );
9164 9207    format %{ "MOV    $dst,$src" %}
9165 9208    ins_encode( enc_Copy( dst, src) );
9166 9209    ins_pipe( ialu_reg_reg );
9167 9210  %}
9168 9211  
9169 9212  instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
9170 9213    effect( USE_DEF dst, USE src, KILL cr );
9171 9214  
9172 9215    size(4);
9173 9216    format %{ "NEG    $dst\n\t"
9174 9217              "ADC    $dst,$src" %}
9175 9218    ins_encode( neg_reg(dst),
9176 9219                OpcRegReg(0x13,dst,src) );
9177 9220    ins_pipe( ialu_reg_reg_long );
9178 9221  %}
9179 9222  
9180 9223  instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
9181 9224    match(Set dst (Conv2B src));
9182 9225  
9183 9226    expand %{
9184 9227      movI_nocopy(dst,src);
9185 9228      ci2b(dst,src,cr);
9186 9229    %}
9187 9230  %}
9188 9231  
9189 9232  instruct movP_nocopy(eRegI dst, eRegP src) %{
9190 9233    effect( DEF dst, USE src );
9191 9234    format %{ "MOV    $dst,$src" %}
9192 9235    ins_encode( enc_Copy( dst, src) );
9193 9236    ins_pipe( ialu_reg_reg );
9194 9237  %}
9195 9238  
9196 9239  instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
9197 9240    effect( USE_DEF dst, USE src, KILL cr );
9198 9241    format %{ "NEG    $dst\n\t"
9199 9242              "ADC    $dst,$src" %}
9200 9243    ins_encode( neg_reg(dst),
9201 9244                OpcRegReg(0x13,dst,src) );
9202 9245    ins_pipe( ialu_reg_reg_long );
9203 9246  %}
9204 9247  
9205 9248  instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
9206 9249    match(Set dst (Conv2B src));
9207 9250  
9208 9251    expand %{
9209 9252      movP_nocopy(dst,src);
9210 9253      cp2b(dst,src,cr);
9211 9254    %}
9212 9255  %}
9213 9256  
9214 9257  instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
9215 9258    match(Set dst (CmpLTMask p q));
9216 9259    effect( KILL cr );
9217 9260    ins_cost(400);
9218 9261  
9219 9262    // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
9220 9263    format %{ "XOR    $dst,$dst\n\t"
9221 9264              "CMP    $p,$q\n\t"
9222 9265              "SETlt  $dst\n\t"
9223 9266              "NEG    $dst" %}
9224 9267    ins_encode( OpcRegReg(0x33,dst,dst),
9225 9268                OpcRegReg(0x3B,p,q),
9226 9269                setLT_reg(dst), neg_reg(dst) );
9227 9270    ins_pipe( pipe_slow );
9228 9271  %}
9229 9272  
9230 9273  instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
9231 9274    match(Set dst (CmpLTMask dst zero));
9232 9275    effect( DEF dst, KILL cr );
9233 9276    ins_cost(100);
9234 9277  
9235 9278    format %{ "SAR    $dst,31" %}
9236 9279    opcode(0xC1, 0x7);  /* C1 /7 ib */
9237 9280    ins_encode( RegOpcImm( dst, 0x1F ) );
9238 9281    ins_pipe( ialu_reg );
9239 9282  %}
9240 9283  
9241 9284  
9242 9285  instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
9243 9286    match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9244 9287    effect( KILL tmp, KILL cr );
9245 9288    ins_cost(400);
9246 9289    // annoyingly, $tmp has no edges so you cant ask for it in
9247 9290    // any format or encoding
9248 9291    format %{ "SUB    $p,$q\n\t"
9249 9292              "SBB    ECX,ECX\n\t"
9250 9293              "AND    ECX,$y\n\t"
9251 9294              "ADD    $p,ECX" %}
9252 9295    ins_encode( enc_cmpLTP(p,q,y,tmp) );
9253 9296    ins_pipe( pipe_cmplt );
9254 9297  %}
9255 9298  
9256 9299  /* If I enable this, I encourage spilling in the inner loop of compress.
9257 9300  instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
9258 9301    match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9259 9302    effect( USE_KILL tmp, KILL cr );
9260 9303    ins_cost(400);
9261 9304  
9262 9305    format %{ "SUB    $p,$q\n\t"
9263 9306              "SBB    ECX,ECX\n\t"
9264 9307              "AND    ECX,$y\n\t"
9265 9308              "ADD    $p,ECX" %}
9266 9309    ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9267 9310  %}
9268 9311  */
9269 9312  
9270 9313  //----------Long Instructions------------------------------------------------
9271 9314  // Add Long Register with Register
9272 9315  instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9273 9316    match(Set dst (AddL dst src));
9274 9317    effect(KILL cr);
9275 9318    ins_cost(200);
9276 9319    format %{ "ADD    $dst.lo,$src.lo\n\t"
9277 9320              "ADC    $dst.hi,$src.hi" %}
9278 9321    opcode(0x03, 0x13);
9279 9322    ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9280 9323    ins_pipe( ialu_reg_reg_long );
9281 9324  %}
9282 9325  
9283 9326  // Add Long Register with Immediate
9284 9327  instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9285 9328    match(Set dst (AddL dst src));
9286 9329    effect(KILL cr);
9287 9330    format %{ "ADD    $dst.lo,$src.lo\n\t"
9288 9331              "ADC    $dst.hi,$src.hi" %}
9289 9332    opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
9290 9333    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9291 9334    ins_pipe( ialu_reg_long );
9292 9335  %}
9293 9336  
9294 9337  // Add Long Register with Memory
9295 9338  instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9296 9339    match(Set dst (AddL dst (LoadL mem)));
9297 9340    effect(KILL cr);
9298 9341    ins_cost(125);
9299 9342    format %{ "ADD    $dst.lo,$mem\n\t"
9300 9343              "ADC    $dst.hi,$mem+4" %}
9301 9344    opcode(0x03, 0x13);
9302 9345    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9303 9346    ins_pipe( ialu_reg_long_mem );
9304 9347  %}
9305 9348  
9306 9349  // Subtract Long Register with Register.
9307 9350  instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9308 9351    match(Set dst (SubL dst src));
9309 9352    effect(KILL cr);
9310 9353    ins_cost(200);
9311 9354    format %{ "SUB    $dst.lo,$src.lo\n\t"
9312 9355              "SBB    $dst.hi,$src.hi" %}
9313 9356    opcode(0x2B, 0x1B);
9314 9357    ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9315 9358    ins_pipe( ialu_reg_reg_long );
9316 9359  %}
9317 9360  
9318 9361  // Subtract Long Register with Immediate
9319 9362  instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9320 9363    match(Set dst (SubL dst src));
9321 9364    effect(KILL cr);
9322 9365    format %{ "SUB    $dst.lo,$src.lo\n\t"
9323 9366              "SBB    $dst.hi,$src.hi" %}
9324 9367    opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9325 9368    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9326 9369    ins_pipe( ialu_reg_long );
9327 9370  %}
9328 9371  
9329 9372  // Subtract Long Register with Memory
9330 9373  instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9331 9374    match(Set dst (SubL dst (LoadL mem)));
9332 9375    effect(KILL cr);
9333 9376    ins_cost(125);
9334 9377    format %{ "SUB    $dst.lo,$mem\n\t"
9335 9378              "SBB    $dst.hi,$mem+4" %}
9336 9379    opcode(0x2B, 0x1B);
9337 9380    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9338 9381    ins_pipe( ialu_reg_long_mem );
9339 9382  %}
9340 9383  
9341 9384  instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9342 9385    match(Set dst (SubL zero dst));
9343 9386    effect(KILL cr);
9344 9387    ins_cost(300);
9345 9388    format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9346 9389    ins_encode( neg_long(dst) );
9347 9390    ins_pipe( ialu_reg_reg_long );
9348 9391  %}
9349 9392  
9350 9393  // And Long Register with Register
9351 9394  instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9352 9395    match(Set dst (AndL dst src));
9353 9396    effect(KILL cr);
9354 9397    format %{ "AND    $dst.lo,$src.lo\n\t"
9355 9398              "AND    $dst.hi,$src.hi" %}
9356 9399    opcode(0x23,0x23);
9357 9400    ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9358 9401    ins_pipe( ialu_reg_reg_long );
9359 9402  %}
9360 9403  
9361 9404  // And Long Register with Immediate
9362 9405  instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9363 9406    match(Set dst (AndL dst src));
9364 9407    effect(KILL cr);
9365 9408    format %{ "AND    $dst.lo,$src.lo\n\t"
9366 9409              "AND    $dst.hi,$src.hi" %}
9367 9410    opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9368 9411    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9369 9412    ins_pipe( ialu_reg_long );
9370 9413  %}
9371 9414  
9372 9415  // And Long Register with Memory
9373 9416  instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9374 9417    match(Set dst (AndL dst (LoadL mem)));
9375 9418    effect(KILL cr);
9376 9419    ins_cost(125);
9377 9420    format %{ "AND    $dst.lo,$mem\n\t"
9378 9421              "AND    $dst.hi,$mem+4" %}
9379 9422    opcode(0x23, 0x23);
9380 9423    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9381 9424    ins_pipe( ialu_reg_long_mem );
9382 9425  %}
9383 9426  
9384 9427  // Or Long Register with Register
9385 9428  instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9386 9429    match(Set dst (OrL dst src));
9387 9430    effect(KILL cr);
9388 9431    format %{ "OR     $dst.lo,$src.lo\n\t"
9389 9432              "OR     $dst.hi,$src.hi" %}
9390 9433    opcode(0x0B,0x0B);
9391 9434    ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9392 9435    ins_pipe( ialu_reg_reg_long );
9393 9436  %}
9394 9437  
9395 9438  // Or Long Register with Immediate
9396 9439  instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9397 9440    match(Set dst (OrL dst src));
9398 9441    effect(KILL cr);
9399 9442    format %{ "OR     $dst.lo,$src.lo\n\t"
9400 9443              "OR     $dst.hi,$src.hi" %}
9401 9444    opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9402 9445    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9403 9446    ins_pipe( ialu_reg_long );
9404 9447  %}
9405 9448  
9406 9449  // Or Long Register with Memory
9407 9450  instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9408 9451    match(Set dst (OrL dst (LoadL mem)));
9409 9452    effect(KILL cr);
9410 9453    ins_cost(125);
9411 9454    format %{ "OR     $dst.lo,$mem\n\t"
9412 9455              "OR     $dst.hi,$mem+4" %}
9413 9456    opcode(0x0B,0x0B);
9414 9457    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9415 9458    ins_pipe( ialu_reg_long_mem );
9416 9459  %}
9417 9460  
9418 9461  // Xor Long Register with Register
9419 9462  instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9420 9463    match(Set dst (XorL dst src));
9421 9464    effect(KILL cr);
9422 9465    format %{ "XOR    $dst.lo,$src.lo\n\t"
9423 9466              "XOR    $dst.hi,$src.hi" %}
9424 9467    opcode(0x33,0x33);
9425 9468    ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9426 9469    ins_pipe( ialu_reg_reg_long );
9427 9470  %}
9428 9471  
9429 9472  // Xor Long Register with Immediate -1
9430 9473  instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9431 9474    match(Set dst (XorL dst imm));  
9432 9475    format %{ "NOT    $dst.lo\n\t"
9433 9476              "NOT    $dst.hi" %}
9434 9477    ins_encode %{
9435 9478       __ notl($dst$$Register);
9436 9479       __ notl(HIGH_FROM_LOW($dst$$Register));
9437 9480    %}
9438 9481    ins_pipe( ialu_reg_long );
9439 9482  %}
9440 9483  
9441 9484  // Xor Long Register with Immediate
9442 9485  instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9443 9486    match(Set dst (XorL dst src));
9444 9487    effect(KILL cr);
9445 9488    format %{ "XOR    $dst.lo,$src.lo\n\t"
9446 9489              "XOR    $dst.hi,$src.hi" %}
9447 9490    opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9448 9491    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9449 9492    ins_pipe( ialu_reg_long );
9450 9493  %}
9451 9494  
9452 9495  // Xor Long Register with Memory
9453 9496  instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9454 9497    match(Set dst (XorL dst (LoadL mem)));
9455 9498    effect(KILL cr);
9456 9499    ins_cost(125);
9457 9500    format %{ "XOR    $dst.lo,$mem\n\t"
9458 9501              "XOR    $dst.hi,$mem+4" %}
9459 9502    opcode(0x33,0x33);
9460 9503    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9461 9504    ins_pipe( ialu_reg_long_mem );
9462 9505  %}
9463 9506  
9464 9507  // Shift Left Long by 1
9465 9508  instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9466 9509    predicate(UseNewLongLShift);
9467 9510    match(Set dst (LShiftL dst cnt));
9468 9511    effect(KILL cr);
9469 9512    ins_cost(100);
9470 9513    format %{ "ADD    $dst.lo,$dst.lo\n\t"
9471 9514              "ADC    $dst.hi,$dst.hi" %}
9472 9515    ins_encode %{
9473 9516      __ addl($dst$$Register,$dst$$Register);
9474 9517      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9475 9518    %}
9476 9519    ins_pipe( ialu_reg_long );
9477 9520  %}
9478 9521  
9479 9522  // Shift Left Long by 2
9480 9523  instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9481 9524    predicate(UseNewLongLShift);
9482 9525    match(Set dst (LShiftL dst cnt));
9483 9526    effect(KILL cr);
9484 9527    ins_cost(100);
9485 9528    format %{ "ADD    $dst.lo,$dst.lo\n\t"
9486 9529              "ADC    $dst.hi,$dst.hi\n\t" 
9487 9530              "ADD    $dst.lo,$dst.lo\n\t"
9488 9531              "ADC    $dst.hi,$dst.hi" %}
9489 9532    ins_encode %{
9490 9533      __ addl($dst$$Register,$dst$$Register);
9491 9534      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9492 9535      __ addl($dst$$Register,$dst$$Register);
9493 9536      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9494 9537    %}
9495 9538    ins_pipe( ialu_reg_long );
9496 9539  %}
9497 9540  
9498 9541  // Shift Left Long by 3
9499 9542  instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9500 9543    predicate(UseNewLongLShift);
9501 9544    match(Set dst (LShiftL dst cnt));
9502 9545    effect(KILL cr);
9503 9546    ins_cost(100);
9504 9547    format %{ "ADD    $dst.lo,$dst.lo\n\t"
9505 9548              "ADC    $dst.hi,$dst.hi\n\t" 
9506 9549              "ADD    $dst.lo,$dst.lo\n\t"
9507 9550              "ADC    $dst.hi,$dst.hi\n\t" 
9508 9551              "ADD    $dst.lo,$dst.lo\n\t"
9509 9552              "ADC    $dst.hi,$dst.hi" %}
9510 9553    ins_encode %{
9511 9554      __ addl($dst$$Register,$dst$$Register);
9512 9555      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9513 9556      __ addl($dst$$Register,$dst$$Register);
9514 9557      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9515 9558      __ addl($dst$$Register,$dst$$Register);
9516 9559      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9517 9560    %}
9518 9561    ins_pipe( ialu_reg_long );
9519 9562  %}
9520 9563  
9521 9564  // Shift Left Long by 1-31
9522 9565  instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9523 9566    match(Set dst (LShiftL dst cnt));
9524 9567    effect(KILL cr);
9525 9568    ins_cost(200);
9526 9569    format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9527 9570              "SHL    $dst.lo,$cnt" %}
9528 9571    opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9529 9572    ins_encode( move_long_small_shift(dst,cnt) );
9530 9573    ins_pipe( ialu_reg_long );
9531 9574  %}
9532 9575  
9533 9576  // Shift Left Long by 32-63
9534 9577  instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9535 9578    match(Set dst (LShiftL dst cnt));
9536 9579    effect(KILL cr);
9537 9580    ins_cost(300);
9538 9581    format %{ "MOV    $dst.hi,$dst.lo\n"
9539 9582            "\tSHL    $dst.hi,$cnt-32\n"
9540 9583            "\tXOR    $dst.lo,$dst.lo" %}
9541 9584    opcode(0xC1, 0x4);  /* C1 /4 ib */
9542 9585    ins_encode( move_long_big_shift_clr(dst,cnt) );
9543 9586    ins_pipe( ialu_reg_long );
9544 9587  %}
9545 9588  
9546 9589  // Shift Left Long by variable
9547 9590  instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9548 9591    match(Set dst (LShiftL dst shift));
9549 9592    effect(KILL cr);
9550 9593    ins_cost(500+200);
9551 9594    size(17);
9552 9595    format %{ "TEST   $shift,32\n\t"
9553 9596              "JEQ,s  small\n\t"
9554 9597              "MOV    $dst.hi,$dst.lo\n\t"
9555 9598              "XOR    $dst.lo,$dst.lo\n"
9556 9599      "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9557 9600              "SHL    $dst.lo,$shift" %}
9558 9601    ins_encode( shift_left_long( dst, shift ) );
9559 9602    ins_pipe( pipe_slow );
9560 9603  %}
9561 9604  
9562 9605  // Shift Right Long by 1-31
9563 9606  instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9564 9607    match(Set dst (URShiftL dst cnt));
9565 9608    effect(KILL cr);
9566 9609    ins_cost(200);
9567 9610    format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9568 9611              "SHR    $dst.hi,$cnt" %}
9569 9612    opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9570 9613    ins_encode( move_long_small_shift(dst,cnt) );
9571 9614    ins_pipe( ialu_reg_long );
9572 9615  %}
9573 9616  
9574 9617  // Shift Right Long by 32-63
9575 9618  instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9576 9619    match(Set dst (URShiftL dst cnt));
9577 9620    effect(KILL cr);
9578 9621    ins_cost(300);
9579 9622    format %{ "MOV    $dst.lo,$dst.hi\n"
9580 9623            "\tSHR    $dst.lo,$cnt-32\n"
9581 9624            "\tXOR    $dst.hi,$dst.hi" %}
9582 9625    opcode(0xC1, 0x5);  /* C1 /5 ib */
9583 9626    ins_encode( move_long_big_shift_clr(dst,cnt) );
9584 9627    ins_pipe( ialu_reg_long );
9585 9628  %}
9586 9629  
9587 9630  // Shift Right Long by variable
9588 9631  instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9589 9632    match(Set dst (URShiftL dst shift));
9590 9633    effect(KILL cr);
9591 9634    ins_cost(600);
9592 9635    size(17);
9593 9636    format %{ "TEST   $shift,32\n\t"
9594 9637              "JEQ,s  small\n\t"
9595 9638              "MOV    $dst.lo,$dst.hi\n\t"
9596 9639              "XOR    $dst.hi,$dst.hi\n"
9597 9640      "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9598 9641              "SHR    $dst.hi,$shift" %}
9599 9642    ins_encode( shift_right_long( dst, shift ) );
9600 9643    ins_pipe( pipe_slow );
9601 9644  %}
9602 9645  
9603 9646  // Shift Right Long by 1-31
9604 9647  instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9605 9648    match(Set dst (RShiftL dst cnt));
9606 9649    effect(KILL cr);
9607 9650    ins_cost(200);
9608 9651    format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9609 9652              "SAR    $dst.hi,$cnt" %}
9610 9653    opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9611 9654    ins_encode( move_long_small_shift(dst,cnt) );
9612 9655    ins_pipe( ialu_reg_long );
9613 9656  %}
9614 9657  
9615 9658  // Shift Right Long by 32-63
9616 9659  instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9617 9660    match(Set dst (RShiftL dst cnt));
9618 9661    effect(KILL cr);
9619 9662    ins_cost(300);
9620 9663    format %{ "MOV    $dst.lo,$dst.hi\n"
9621 9664            "\tSAR    $dst.lo,$cnt-32\n"
9622 9665            "\tSAR    $dst.hi,31" %}
9623 9666    opcode(0xC1, 0x7);  /* C1 /7 ib */
9624 9667    ins_encode( move_long_big_shift_sign(dst,cnt) );
9625 9668    ins_pipe( ialu_reg_long );
9626 9669  %}
9627 9670  
9628 9671  // Shift Right arithmetic Long by variable
9629 9672  instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9630 9673    match(Set dst (RShiftL dst shift));
9631 9674    effect(KILL cr);
9632 9675    ins_cost(600);
9633 9676    size(18);
9634 9677    format %{ "TEST   $shift,32\n\t"
9635 9678              "JEQ,s  small\n\t"
9636 9679              "MOV    $dst.lo,$dst.hi\n\t"
9637 9680              "SAR    $dst.hi,31\n"
9638 9681      "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9639 9682              "SAR    $dst.hi,$shift" %}
9640 9683    ins_encode( shift_right_arith_long( dst, shift ) );
9641 9684    ins_pipe( pipe_slow );
9642 9685  %}
9643 9686  
9644 9687  
9645 9688  //----------Double Instructions------------------------------------------------
9646 9689  // Double Math
9647 9690  
9648 9691  // Compare & branch
9649 9692  
9650 9693  // P6 version of float compare, sets condition codes in EFLAGS
9651 9694  instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9652 9695    predicate(VM_Version::supports_cmov() && UseSSE <=1);
9653 9696    match(Set cr (CmpD src1 src2));
9654 9697    effect(KILL rax);
9655 9698    ins_cost(150);
9656 9699    format %{ "FLD    $src1\n\t"
9657 9700              "FUCOMIP ST,$src2  // P6 instruction\n\t"
9658 9701              "JNP    exit\n\t"
9659 9702              "MOV    ah,1       // saw a NaN, set CF\n\t"
9660 9703              "SAHF\n"
9661 9704       "exit:\tNOP               // avoid branch to branch" %}
9662 9705    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9663 9706    ins_encode( Push_Reg_D(src1),
9664 9707                OpcP, RegOpc(src2),
9665 9708                cmpF_P6_fixup );
9666 9709    ins_pipe( pipe_slow );
9667 9710  %}
9668 9711  
9669 9712  instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
9670 9713    predicate(VM_Version::supports_cmov() && UseSSE <=1);
9671 9714    match(Set cr (CmpD src1 src2));
9672 9715    ins_cost(150);
9673 9716    format %{ "FLD    $src1\n\t"
9674 9717              "FUCOMIP ST,$src2  // P6 instruction" %}
9675 9718    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9676 9719    ins_encode( Push_Reg_D(src1),
9677 9720                OpcP, RegOpc(src2));
9678 9721    ins_pipe( pipe_slow );
9679 9722  %}
9680 9723  
9681 9724  // Compare & branch
9682 9725  instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9683 9726    predicate(UseSSE<=1);
9684 9727    match(Set cr (CmpD src1 src2));
9685 9728    effect(KILL rax);
9686 9729    ins_cost(200);
9687 9730    format %{ "FLD    $src1\n\t"
9688 9731              "FCOMp  $src2\n\t"
9689 9732              "FNSTSW AX\n\t"
9690 9733              "TEST   AX,0x400\n\t"
9691 9734              "JZ,s   flags\n\t"
9692 9735              "MOV    AH,1\t# unordered treat as LT\n"
9693 9736      "flags:\tSAHF" %}
9694 9737    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9695 9738    ins_encode( Push_Reg_D(src1),
9696 9739                OpcP, RegOpc(src2),
9697 9740                fpu_flags);
9698 9741    ins_pipe( pipe_slow );
9699 9742  %}
9700 9743  
9701 9744  // Compare vs zero into -1,0,1
9702 9745  instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
9703 9746    predicate(UseSSE<=1);
9704 9747    match(Set dst (CmpD3 src1 zero));
9705 9748    effect(KILL cr, KILL rax);
9706 9749    ins_cost(280);
9707 9750    format %{ "FTSTD  $dst,$src1" %}
9708 9751    opcode(0xE4, 0xD9);
9709 9752    ins_encode( Push_Reg_D(src1),
9710 9753                OpcS, OpcP, PopFPU,
9711 9754                CmpF_Result(dst));
9712 9755    ins_pipe( pipe_slow );
9713 9756  %}
9714 9757  
9715 9758  // Compare into -1,0,1
9716 9759  instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
9717 9760    predicate(UseSSE<=1);
9718 9761    match(Set dst (CmpD3 src1 src2));
9719 9762    effect(KILL cr, KILL rax);
9720 9763    ins_cost(300);
9721 9764    format %{ "FCMPD  $dst,$src1,$src2" %}
9722 9765    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9723 9766    ins_encode( Push_Reg_D(src1),
9724 9767                OpcP, RegOpc(src2),
9725 9768                CmpF_Result(dst));
9726 9769    ins_pipe( pipe_slow );
9727 9770  %}
9728 9771  
9729 9772  // float compare and set condition codes in EFLAGS by XMM regs
9730 9773  instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
9731 9774    predicate(UseSSE>=2);
9732 9775    match(Set cr (CmpD dst src));
9733 9776    effect(KILL rax);
9734 9777    ins_cost(125);
9735 9778    format %{ "COMISD $dst,$src\n"
9736 9779            "\tJNP    exit\n"
9737 9780            "\tMOV    ah,1       // saw a NaN, set CF\n"
9738 9781            "\tSAHF\n"
9739 9782       "exit:\tNOP               // avoid branch to branch" %}
9740 9783    opcode(0x66, 0x0F, 0x2F);
9741 9784    ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
9742 9785    ins_pipe( pipe_slow );
9743 9786  %}
9744 9787  
9745 9788  instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
9746 9789    predicate(UseSSE>=2);
9747 9790    match(Set cr (CmpD dst src));
9748 9791    ins_cost(100);
9749 9792    format %{ "COMISD $dst,$src" %}
9750 9793    opcode(0x66, 0x0F, 0x2F);
9751 9794    ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
9752 9795    ins_pipe( pipe_slow );
9753 9796  %}
9754 9797  
9755 9798  // float compare and set condition codes in EFLAGS by XMM regs
9756 9799  instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
9757 9800    predicate(UseSSE>=2);
9758 9801    match(Set cr (CmpD dst (LoadD src)));
9759 9802    effect(KILL rax);
9760 9803    ins_cost(145);
9761 9804    format %{ "COMISD $dst,$src\n"
9762 9805            "\tJNP    exit\n"
9763 9806            "\tMOV    ah,1       // saw a NaN, set CF\n"
9764 9807            "\tSAHF\n"
9765 9808       "exit:\tNOP               // avoid branch to branch" %}
9766 9809    opcode(0x66, 0x0F, 0x2F);
9767 9810    ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
9768 9811    ins_pipe( pipe_slow );
9769 9812  %}
9770 9813  
9771 9814  instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
9772 9815    predicate(UseSSE>=2);
9773 9816    match(Set cr (CmpD dst (LoadD src)));
9774 9817    ins_cost(100);
9775 9818    format %{ "COMISD $dst,$src" %}
9776 9819    opcode(0x66, 0x0F, 0x2F);
9777 9820    ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
9778 9821    ins_pipe( pipe_slow );
9779 9822  %}
9780 9823  
9781 9824  // Compare into -1,0,1 in XMM
9782 9825  instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
9783 9826    predicate(UseSSE>=2);
9784 9827    match(Set dst (CmpD3 src1 src2));
9785 9828    effect(KILL cr);
9786 9829    ins_cost(255);
9787 9830    format %{ "XOR    $dst,$dst\n"
9788 9831            "\tCOMISD $src1,$src2\n"
9789 9832            "\tJP,s   nan\n"
9790 9833            "\tJEQ,s  exit\n"
9791 9834            "\tJA,s   inc\n"
9792 9835        "nan:\tDEC    $dst\n"
9793 9836            "\tJMP,s  exit\n"
9794 9837        "inc:\tINC    $dst\n"
9795 9838        "exit:"
9796 9839                  %}
9797 9840    opcode(0x66, 0x0F, 0x2F);
9798 9841    ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
9799 9842               CmpX_Result(dst));
9800 9843    ins_pipe( pipe_slow );
9801 9844  %}
9802 9845  
9803 9846  // Compare into -1,0,1 in XMM and memory
9804 9847  instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
9805 9848    predicate(UseSSE>=2);
9806 9849    match(Set dst (CmpD3 src1 (LoadD mem)));
9807 9850    effect(KILL cr);
9808 9851    ins_cost(275);
9809 9852    format %{ "COMISD $src1,$mem\n"
9810 9853            "\tMOV    $dst,0\t\t# do not blow flags\n"
9811 9854            "\tJP,s   nan\n"
9812 9855            "\tJEQ,s  exit\n"
9813 9856            "\tJA,s   inc\n"
9814 9857        "nan:\tDEC    $dst\n"
9815 9858            "\tJMP,s  exit\n"
9816 9859        "inc:\tINC    $dst\n"
9817 9860        "exit:"
9818 9861                  %}
9819 9862    opcode(0x66, 0x0F, 0x2F);
9820 9863    ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
9821 9864               LdImmI(dst,0x0), CmpX_Result(dst));
9822 9865    ins_pipe( pipe_slow );
9823 9866  %}
9824 9867  
9825 9868  
9826 9869  instruct subD_reg(regD dst, regD src) %{
9827 9870    predicate (UseSSE <=1);
9828 9871    match(Set dst (SubD dst src));
9829 9872  
9830 9873    format %{ "FLD    $src\n\t"
9831 9874              "DSUBp  $dst,ST" %}
9832 9875    opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9833 9876    ins_cost(150);
9834 9877    ins_encode( Push_Reg_D(src),
9835 9878                OpcP, RegOpc(dst) );
9836 9879    ins_pipe( fpu_reg_reg );
9837 9880  %}
9838 9881  
9839 9882  instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
9840 9883    predicate (UseSSE <=1);
9841 9884    match(Set dst (RoundDouble (SubD src1 src2)));
9842 9885    ins_cost(250);
9843 9886  
9844 9887    format %{ "FLD    $src2\n\t"
9845 9888              "DSUB   ST,$src1\n\t"
9846 9889              "FSTP_D $dst\t# D-round" %}
9847 9890    opcode(0xD8, 0x5);
9848 9891    ins_encode( Push_Reg_D(src2),
9849 9892                OpcP, RegOpc(src1), Pop_Mem_D(dst) );
9850 9893    ins_pipe( fpu_mem_reg_reg );
9851 9894  %}
9852 9895  
9853 9896  
9854 9897  instruct subD_reg_mem(regD dst, memory src) %{
9855 9898    predicate (UseSSE <=1);
9856 9899    match(Set dst (SubD dst (LoadD src)));
9857 9900    ins_cost(150);
9858 9901  
9859 9902    format %{ "FLD    $src\n\t"
9860 9903              "DSUBp  $dst,ST" %}
9861 9904    opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9862 9905    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9863 9906                OpcP, RegOpc(dst) );
9864 9907    ins_pipe( fpu_reg_mem );
9865 9908  %}
9866 9909  
9867 9910  instruct absD_reg(regDPR1 dst, regDPR1 src) %{
9868 9911    predicate (UseSSE<=1);
9869 9912    match(Set dst (AbsD src));
9870 9913    ins_cost(100);
9871 9914    format %{ "FABS" %}
9872 9915    opcode(0xE1, 0xD9);
9873 9916    ins_encode( OpcS, OpcP );
9874 9917    ins_pipe( fpu_reg_reg );
9875 9918  %}
9876 9919  
9877 9920  instruct absXD_reg( regXD dst ) %{
9878 9921    predicate(UseSSE>=2);
9879 9922    match(Set dst (AbsD dst));
9880 9923    format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
9881 9924    ins_encode( AbsXD_encoding(dst));
9882 9925    ins_pipe( pipe_slow );
9883 9926  %}
9884 9927  
9885 9928  instruct negD_reg(regDPR1 dst, regDPR1 src) %{
9886 9929    predicate(UseSSE<=1);
9887 9930    match(Set dst (NegD src));
9888 9931    ins_cost(100);
9889 9932    format %{ "FCHS" %}
9890 9933    opcode(0xE0, 0xD9);
9891 9934    ins_encode( OpcS, OpcP );
9892 9935    ins_pipe( fpu_reg_reg );
9893 9936  %}
9894 9937  
9895 9938  instruct negXD_reg( regXD dst ) %{
9896 9939    predicate(UseSSE>=2);
9897 9940    match(Set dst (NegD dst));
9898 9941    format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
9899 9942    ins_encode %{
9900 9943       __ xorpd($dst$$XMMRegister,
9901 9944                ExternalAddress((address)double_signflip_pool));
9902 9945    %}
9903 9946    ins_pipe( pipe_slow );
9904 9947  %}
9905 9948  
9906 9949  instruct addD_reg(regD dst, regD src) %{
9907 9950    predicate(UseSSE<=1);
9908 9951    match(Set dst (AddD dst src));
9909 9952    format %{ "FLD    $src\n\t"
9910 9953              "DADD   $dst,ST" %}
9911 9954    size(4);
9912 9955    ins_cost(150);
9913 9956    opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9914 9957    ins_encode( Push_Reg_D(src),
9915 9958                OpcP, RegOpc(dst) );
9916 9959    ins_pipe( fpu_reg_reg );
9917 9960  %}
9918 9961  
9919 9962  
9920 9963  instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
9921 9964    predicate(UseSSE<=1);
9922 9965    match(Set dst (RoundDouble (AddD src1 src2)));
9923 9966    ins_cost(250);
9924 9967  
9925 9968    format %{ "FLD    $src2\n\t"
9926 9969              "DADD   ST,$src1\n\t"
9927 9970              "FSTP_D $dst\t# D-round" %}
9928 9971    opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9929 9972    ins_encode( Push_Reg_D(src2),
9930 9973                OpcP, RegOpc(src1), Pop_Mem_D(dst) );
9931 9974    ins_pipe( fpu_mem_reg_reg );
9932 9975  %}
9933 9976  
9934 9977  
9935 9978  instruct addD_reg_mem(regD dst, memory src) %{
9936 9979    predicate(UseSSE<=1);
9937 9980    match(Set dst (AddD dst (LoadD src)));
9938 9981    ins_cost(150);
9939 9982  
9940 9983    format %{ "FLD    $src\n\t"
9941 9984              "DADDp  $dst,ST" %}
9942 9985    opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9943 9986    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9944 9987                OpcP, RegOpc(dst) );
9945 9988    ins_pipe( fpu_reg_mem );
9946 9989  %}
9947 9990  
9948 9991  // add-to-memory
9949 9992  instruct addD_mem_reg(memory dst, regD src) %{
9950 9993    predicate(UseSSE<=1);
9951 9994    match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9952 9995    ins_cost(150);
9953 9996  
9954 9997    format %{ "FLD_D  $dst\n\t"
9955 9998              "DADD   ST,$src\n\t"
9956 9999              "FST_D  $dst" %}
9957 10000    opcode(0xDD, 0x0);
9958 10001    ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9959 10002                Opcode(0xD8), RegOpc(src),
9960 10003                set_instruction_start,
9961 10004                Opcode(0xDD), RMopc_Mem(0x03,dst) );
9962 10005    ins_pipe( fpu_reg_mem );
9963 10006  %}
9964 10007  
9965 10008  instruct addD_reg_imm1(regD dst, immD1 src) %{
9966 10009    predicate(UseSSE<=1);
9967 10010    match(Set dst (AddD dst src));
9968 10011    ins_cost(125);
9969 10012    format %{ "FLD1\n\t"
9970 10013              "DADDp  $dst,ST" %}
9971 10014    opcode(0xDE, 0x00);
9972 10015    ins_encode( LdImmD(src),
9973 10016                OpcP, RegOpc(dst) );
9974 10017    ins_pipe( fpu_reg );
9975 10018  %}
9976 10019  
9977 10020  instruct addD_reg_imm(regD dst, immD src) %{
9978 10021    predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9979 10022    match(Set dst (AddD dst src));
9980 10023    ins_cost(200);
9981 10024    format %{ "FLD_D  [$src]\n\t"
9982 10025              "DADDp  $dst,ST" %}
9983 10026    opcode(0xDE, 0x00);       /* DE /0 */
9984 10027    ins_encode( LdImmD(src),
9985 10028                OpcP, RegOpc(dst));
9986 10029    ins_pipe( fpu_reg_mem );
9987 10030  %}
9988 10031  
9989 10032  instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
9990 10033    predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9991 10034    match(Set dst (RoundDouble (AddD src con)));
9992 10035    ins_cost(200);
9993 10036    format %{ "FLD_D  [$con]\n\t"
9994 10037              "DADD   ST,$src\n\t"
9995 10038              "FSTP_D $dst\t# D-round" %}
9996 10039    opcode(0xD8, 0x00);       /* D8 /0 */
9997 10040    ins_encode( LdImmD(con),
9998 10041                OpcP, RegOpc(src), Pop_Mem_D(dst));
9999 10042    ins_pipe( fpu_mem_reg_con );
10000 10043  %}
10001 10044  
10002 10045  // Add two double precision floating point values in xmm
10003 10046  instruct addXD_reg(regXD dst, regXD src) %{
10004 10047    predicate(UseSSE>=2);
10005 10048    match(Set dst (AddD dst src));
10006 10049    format %{ "ADDSD  $dst,$src" %}
10007 10050    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10008 10051    ins_pipe( pipe_slow );
10009 10052  %}
10010 10053  
10011 10054  instruct addXD_imm(regXD dst, immXD con) %{
10012 10055    predicate(UseSSE>=2);
10013 10056    match(Set dst (AddD dst con));
10014 10057    format %{ "ADDSD  $dst,[$con]" %}
10015 10058    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), LdImmXD(dst, con) );
10016 10059    ins_pipe( pipe_slow );
10017 10060  %}
10018 10061  
10019 10062  instruct addXD_mem(regXD dst, memory mem) %{
10020 10063    predicate(UseSSE>=2);
10021 10064    match(Set dst (AddD dst (LoadD mem)));
10022 10065    format %{ "ADDSD  $dst,$mem" %}
10023 10066    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
10024 10067    ins_pipe( pipe_slow );
10025 10068  %}
10026 10069  
10027 10070  // Sub two double precision floating point values in xmm
10028 10071  instruct subXD_reg(regXD dst, regXD src) %{
10029 10072    predicate(UseSSE>=2);
10030 10073    match(Set dst (SubD dst src));
10031 10074    format %{ "SUBSD  $dst,$src" %}
10032 10075    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10033 10076    ins_pipe( pipe_slow );
10034 10077  %}
10035 10078  
10036 10079  instruct subXD_imm(regXD dst, immXD con) %{
10037 10080    predicate(UseSSE>=2);
10038 10081    match(Set dst (SubD dst con));
10039 10082    format %{ "SUBSD  $dst,[$con]" %}
10040 10083    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), LdImmXD(dst, con) );
10041 10084    ins_pipe( pipe_slow );
10042 10085  %}
10043 10086  
10044 10087  instruct subXD_mem(regXD dst, memory mem) %{
10045 10088    predicate(UseSSE>=2);
10046 10089    match(Set dst (SubD dst (LoadD mem)));
10047 10090    format %{ "SUBSD  $dst,$mem" %}
10048 10091    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10049 10092    ins_pipe( pipe_slow );
10050 10093  %}
10051 10094  
10052 10095  // Mul two double precision floating point values in xmm
10053 10096  instruct mulXD_reg(regXD dst, regXD src) %{
10054 10097    predicate(UseSSE>=2);
10055 10098    match(Set dst (MulD dst src));
10056 10099    format %{ "MULSD  $dst,$src" %}
10057 10100    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10058 10101    ins_pipe( pipe_slow );
10059 10102  %}
10060 10103  
10061 10104  instruct mulXD_imm(regXD dst, immXD con) %{
10062 10105    predicate(UseSSE>=2);
10063 10106    match(Set dst (MulD dst con));
10064 10107    format %{ "MULSD  $dst,[$con]" %}
10065 10108    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), LdImmXD(dst, con) );
10066 10109    ins_pipe( pipe_slow );
10067 10110  %}
10068 10111  
10069 10112  instruct mulXD_mem(regXD dst, memory mem) %{
10070 10113    predicate(UseSSE>=2);
10071 10114    match(Set dst (MulD dst (LoadD mem)));
10072 10115    format %{ "MULSD  $dst,$mem" %}
10073 10116    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10074 10117    ins_pipe( pipe_slow );
10075 10118  %}
10076 10119  
10077 10120  // Div two double precision floating point values in xmm
10078 10121  instruct divXD_reg(regXD dst, regXD src) %{
10079 10122    predicate(UseSSE>=2);
10080 10123    match(Set dst (DivD dst src));
10081 10124    format %{ "DIVSD  $dst,$src" %}
10082 10125    opcode(0xF2, 0x0F, 0x5E);
10083 10126    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10084 10127    ins_pipe( pipe_slow );
10085 10128  %}
10086 10129  
10087 10130  instruct divXD_imm(regXD dst, immXD con) %{
10088 10131    predicate(UseSSE>=2);
10089 10132    match(Set dst (DivD dst con));
10090 10133    format %{ "DIVSD  $dst,[$con]" %}
10091 10134    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), LdImmXD(dst, con));
10092 10135    ins_pipe( pipe_slow );
10093 10136  %}
10094 10137  
10095 10138  instruct divXD_mem(regXD dst, memory mem) %{
10096 10139    predicate(UseSSE>=2);
10097 10140    match(Set dst (DivD dst (LoadD mem)));
10098 10141    format %{ "DIVSD  $dst,$mem" %}
10099 10142    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10100 10143    ins_pipe( pipe_slow );
10101 10144  %}
10102 10145  
10103 10146  
10104 10147  instruct mulD_reg(regD dst, regD src) %{
10105 10148    predicate(UseSSE<=1);
10106 10149    match(Set dst (MulD dst src));
10107 10150    format %{ "FLD    $src\n\t"
10108 10151              "DMULp  $dst,ST" %}
10109 10152    opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10110 10153    ins_cost(150);
10111 10154    ins_encode( Push_Reg_D(src),
10112 10155                OpcP, RegOpc(dst) );
10113 10156    ins_pipe( fpu_reg_reg );
10114 10157  %}
10115 10158  
10116 10159  // Strict FP instruction biases argument before multiply then
10117 10160  // biases result to avoid double rounding of subnormals.
10118 10161  //
10119 10162  // scale arg1 by multiplying arg1 by 2^(-15360)
10120 10163  // load arg2
10121 10164  // multiply scaled arg1 by arg2
10122 10165  // rescale product by 2^(15360)
10123 10166  //
10124 10167  instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
10125 10168    predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10126 10169    match(Set dst (MulD dst src));
10127 10170    ins_cost(1);   // Select this instruction for all strict FP double multiplies
10128 10171  
10129 10172    format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10130 10173              "DMULp  $dst,ST\n\t"
10131 10174              "FLD    $src\n\t"
10132 10175              "DMULp  $dst,ST\n\t"
10133 10176              "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10134 10177              "DMULp  $dst,ST\n\t" %}
10135 10178    opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10136 10179    ins_encode( strictfp_bias1(dst),
10137 10180                Push_Reg_D(src),
10138 10181                OpcP, RegOpc(dst),
10139 10182                strictfp_bias2(dst) );
10140 10183    ins_pipe( fpu_reg_reg );
10141 10184  %}
10142 10185  
10143 10186  instruct mulD_reg_imm(regD dst, immD src) %{
10144 10187    predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10145 10188    match(Set dst (MulD dst src));
10146 10189    ins_cost(200);
10147 10190    format %{ "FLD_D  [$src]\n\t"
10148 10191              "DMULp  $dst,ST" %}
10149 10192    opcode(0xDE, 0x1); /* DE /1 */
10150 10193    ins_encode( LdImmD(src),
10151 10194                OpcP, RegOpc(dst) );
10152 10195    ins_pipe( fpu_reg_mem );
10153 10196  %}
10154 10197  
10155 10198  
10156 10199  instruct mulD_reg_mem(regD dst, memory src) %{
10157 10200    predicate( UseSSE<=1 );
10158 10201    match(Set dst (MulD dst (LoadD src)));
10159 10202    ins_cost(200);
10160 10203    format %{ "FLD_D  $src\n\t"
10161 10204              "DMULp  $dst,ST" %}
10162 10205    opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
10163 10206    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10164 10207                OpcP, RegOpc(dst) );
10165 10208    ins_pipe( fpu_reg_mem );
10166 10209  %}
10167 10210  
10168 10211  //
10169 10212  // Cisc-alternate to reg-reg multiply
10170 10213  instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
10171 10214    predicate( UseSSE<=1 );
10172 10215    match(Set dst (MulD src (LoadD mem)));
10173 10216    ins_cost(250);
10174 10217    format %{ "FLD_D  $mem\n\t"
10175 10218              "DMUL   ST,$src\n\t"
10176 10219              "FSTP_D $dst" %}
10177 10220    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
10178 10221    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10179 10222                OpcReg_F(src),
10180 10223                Pop_Reg_D(dst) );
10181 10224    ins_pipe( fpu_reg_reg_mem );
10182 10225  %}
10183 10226  
10184 10227  
10185 10228  // MACRO3 -- addD a mulD
10186 10229  // This instruction is a '2-address' instruction in that the result goes
10187 10230  // back to src2.  This eliminates a move from the macro; possibly the
10188 10231  // register allocator will have to add it back (and maybe not).
10189 10232  instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
10190 10233    predicate( UseSSE<=1 );
10191 10234    match(Set src2 (AddD (MulD src0 src1) src2));
10192 10235    format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10193 10236              "DMUL   ST,$src1\n\t"
10194 10237              "DADDp  $src2,ST" %}
10195 10238    ins_cost(250);
10196 10239    opcode(0xDD); /* LoadD DD /0 */
10197 10240    ins_encode( Push_Reg_F(src0),
10198 10241                FMul_ST_reg(src1),
10199 10242                FAddP_reg_ST(src2) );
10200 10243    ins_pipe( fpu_reg_reg_reg );
10201 10244  %}
10202 10245  
10203 10246  
10204 10247  // MACRO3 -- subD a mulD
10205 10248  instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
10206 10249    predicate( UseSSE<=1 );
10207 10250    match(Set src2 (SubD (MulD src0 src1) src2));
10208 10251    format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10209 10252              "DMUL   ST,$src1\n\t"
10210 10253              "DSUBRp $src2,ST" %}
10211 10254    ins_cost(250);
10212 10255    ins_encode( Push_Reg_F(src0),
10213 10256                FMul_ST_reg(src1),
10214 10257                Opcode(0xDE), Opc_plus(0xE0,src2));
10215 10258    ins_pipe( fpu_reg_reg_reg );
10216 10259  %}
10217 10260  
10218 10261  
10219 10262  instruct divD_reg(regD dst, regD src) %{
10220 10263    predicate( UseSSE<=1 );
10221 10264    match(Set dst (DivD dst src));
10222 10265  
10223 10266    format %{ "FLD    $src\n\t"
10224 10267              "FDIVp  $dst,ST" %}
10225 10268    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10226 10269    ins_cost(150);
10227 10270    ins_encode( Push_Reg_D(src),
10228 10271                OpcP, RegOpc(dst) );
10229 10272    ins_pipe( fpu_reg_reg );
10230 10273  %}
10231 10274  
10232 10275  // Strict FP instruction biases argument before division then
10233 10276  // biases result, to avoid double rounding of subnormals.
10234 10277  //
10235 10278  // scale dividend by multiplying dividend by 2^(-15360)
10236 10279  // load divisor
10237 10280  // divide scaled dividend by divisor
10238 10281  // rescale quotient by 2^(15360)
10239 10282  //
10240 10283  instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
10241 10284    predicate (UseSSE<=1);
10242 10285    match(Set dst (DivD dst src));
10243 10286    predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10244 10287    ins_cost(01);
10245 10288  
10246 10289    format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10247 10290              "DMULp  $dst,ST\n\t"
10248 10291              "FLD    $src\n\t"
10249 10292              "FDIVp  $dst,ST\n\t"
10250 10293              "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10251 10294              "DMULp  $dst,ST\n\t" %}
10252 10295    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10253 10296    ins_encode( strictfp_bias1(dst),
10254 10297                Push_Reg_D(src),
10255 10298                OpcP, RegOpc(dst),
10256 10299                strictfp_bias2(dst) );
10257 10300    ins_pipe( fpu_reg_reg );
10258 10301  %}
10259 10302  
10260 10303  instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10261 10304    predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10262 10305    match(Set dst (RoundDouble (DivD src1 src2)));
10263 10306  
10264 10307    format %{ "FLD    $src1\n\t"
10265 10308              "FDIV   ST,$src2\n\t"
10266 10309              "FSTP_D $dst\t# D-round" %}
10267 10310    opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10268 10311    ins_encode( Push_Reg_D(src1),
10269 10312                OpcP, RegOpc(src2), Pop_Mem_D(dst) );
10270 10313    ins_pipe( fpu_mem_reg_reg );
10271 10314  %}
10272 10315  
10273 10316  
10274 10317  instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
10275 10318    predicate(UseSSE<=1);
10276 10319    match(Set dst (ModD dst src));
10277 10320    effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
10278 10321  
10279 10322    format %{ "DMOD   $dst,$src" %}
10280 10323    ins_cost(250);
10281 10324    ins_encode(Push_Reg_Mod_D(dst, src),
10282 10325                emitModD(),
10283 10326                Push_Result_Mod_D(src),
10284 10327                Pop_Reg_D(dst));
10285 10328    ins_pipe( pipe_slow );
10286 10329  %}
10287 10330  
10288 10331  instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
10289 10332    predicate(UseSSE>=2);
10290 10333    match(Set dst (ModD src0 src1));
10291 10334    effect(KILL rax, KILL cr);
10292 10335  
10293 10336    format %{ "SUB    ESP,8\t # DMOD\n"
10294 10337            "\tMOVSD  [ESP+0],$src1\n"
10295 10338            "\tFLD_D  [ESP+0]\n"
10296 10339            "\tMOVSD  [ESP+0],$src0\n"
10297 10340            "\tFLD_D  [ESP+0]\n"
10298 10341       "loop:\tFPREM\n"
10299 10342            "\tFWAIT\n"
10300 10343            "\tFNSTSW AX\n"
10301 10344            "\tSAHF\n"
10302 10345            "\tJP     loop\n"
10303 10346            "\tFSTP_D [ESP+0]\n"
10304 10347            "\tMOVSD  $dst,[ESP+0]\n"
10305 10348            "\tADD    ESP,8\n"
10306 10349            "\tFSTP   ST0\t # Restore FPU Stack"
10307 10350      %}
10308 10351    ins_cost(250);
10309 10352    ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
10310 10353    ins_pipe( pipe_slow );
10311 10354  %}
10312 10355  
10313 10356  instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
10314 10357    predicate (UseSSE<=1);
10315 10358    match(Set dst (SinD src));
10316 10359    ins_cost(1800);
10317 10360    format %{ "DSIN   $dst" %}
10318 10361    opcode(0xD9, 0xFE);
10319 10362    ins_encode( OpcP, OpcS );
10320 10363    ins_pipe( pipe_slow );
10321 10364  %}
10322 10365  
10323 10366  instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
10324 10367    predicate (UseSSE>=2);
10325 10368    match(Set dst (SinD dst));
10326 10369    effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10327 10370    ins_cost(1800);
10328 10371    format %{ "DSIN   $dst" %}
10329 10372    opcode(0xD9, 0xFE);
10330 10373    ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10331 10374    ins_pipe( pipe_slow );
10332 10375  %}
10333 10376  
10334 10377  instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
10335 10378    predicate (UseSSE<=1);
10336 10379    match(Set dst (CosD src));
10337 10380    ins_cost(1800);
10338 10381    format %{ "DCOS   $dst" %}
10339 10382    opcode(0xD9, 0xFF);
10340 10383    ins_encode( OpcP, OpcS );
10341 10384    ins_pipe( pipe_slow );
10342 10385  %}
10343 10386  
10344 10387  instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
10345 10388    predicate (UseSSE>=2);
10346 10389    match(Set dst (CosD dst));
10347 10390    effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10348 10391    ins_cost(1800);
10349 10392    format %{ "DCOS   $dst" %}
10350 10393    opcode(0xD9, 0xFF);
10351 10394    ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10352 10395    ins_pipe( pipe_slow );
10353 10396  %}
10354 10397  
10355 10398  instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
10356 10399    predicate (UseSSE<=1);
10357 10400    match(Set dst(TanD src));
10358 10401    format %{ "DTAN   $dst" %}
10359 10402    ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
10360 10403                Opcode(0xDD), Opcode(0xD8));   // fstp st
10361 10404    ins_pipe( pipe_slow );
10362 10405  %}
10363 10406  
10364 10407  instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
10365 10408    predicate (UseSSE>=2);
10366 10409    match(Set dst(TanD dst));
10367 10410    effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10368 10411    format %{ "DTAN   $dst" %}
10369 10412    ins_encode( Push_SrcXD(dst),
10370 10413                Opcode(0xD9), Opcode(0xF2),    // fptan
10371 10414                Opcode(0xDD), Opcode(0xD8),   // fstp st
10372 10415                Push_ResultXD(dst) );
10373 10416    ins_pipe( pipe_slow );
10374 10417  %}
10375 10418  
10376 10419  instruct atanD_reg(regD dst, regD src) %{
10377 10420    predicate (UseSSE<=1);
10378 10421    match(Set dst(AtanD dst src));
10379 10422    format %{ "DATA   $dst,$src" %}
10380 10423    opcode(0xD9, 0xF3);
10381 10424    ins_encode( Push_Reg_D(src),
10382 10425                OpcP, OpcS, RegOpc(dst) );
10383 10426    ins_pipe( pipe_slow );
10384 10427  %}
10385 10428  
10386 10429  instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10387 10430    predicate (UseSSE>=2);
10388 10431    match(Set dst(AtanD dst src));
10389 10432    effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10390 10433    format %{ "DATA   $dst,$src" %}
10391 10434    opcode(0xD9, 0xF3);
10392 10435    ins_encode( Push_SrcXD(src),
10393 10436                OpcP, OpcS, Push_ResultXD(dst) );
10394 10437    ins_pipe( pipe_slow );
10395 10438  %}
10396 10439  
10397 10440  instruct sqrtD_reg(regD dst, regD src) %{
10398 10441    predicate (UseSSE<=1);
10399 10442    match(Set dst (SqrtD src));
10400 10443    format %{ "DSQRT  $dst,$src" %}
10401 10444    opcode(0xFA, 0xD9);
10402 10445    ins_encode( Push_Reg_D(src),
10403 10446                OpcS, OpcP, Pop_Reg_D(dst) );
10404 10447    ins_pipe( pipe_slow );
10405 10448  %}
10406 10449  
10407 10450  instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10408 10451    predicate (UseSSE<=1);
10409 10452    match(Set Y (PowD X Y));  // Raise X to the Yth power
10410 10453    effect(KILL rax, KILL rbx, KILL rcx);
10411 10454    format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10412 10455              "FLD_D  $X\n\t"
10413 10456              "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10414 10457  
10415 10458              "FDUP   \t\t\t# Q Q\n\t"
10416 10459              "FRNDINT\t\t\t# int(Q) Q\n\t"
10417 10460              "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10418 10461              "FISTP  dword [ESP]\n\t"
10419 10462              "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10420 10463              "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10421 10464              "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10422 10465              "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10423 10466              "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10424 10467              "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10425 10468              "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10426 10469              "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10427 10470              "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10428 10471              "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10429 10472              "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10430 10473              "MOV    [ESP+0],0\n\t"
10431 10474              "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10432 10475  
10433 10476              "ADD    ESP,8"
10434 10477               %}
10435 10478    ins_encode( push_stack_temp_qword,
10436 10479                Push_Reg_D(X),
10437 10480                Opcode(0xD9), Opcode(0xF1),   // fyl2x
10438 10481                pow_exp_core_encoding,
10439 10482                pop_stack_temp_qword);
10440 10483    ins_pipe( pipe_slow );
10441 10484  %}
10442 10485  
10443 10486  instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10444 10487    predicate (UseSSE>=2);
10445 10488    match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
10446 10489    effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10447 10490    format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10448 10491              "MOVSD  [ESP],$src1\n\t"
10449 10492              "FLD    FPR1,$src1\n\t"
10450 10493              "MOVSD  [ESP],$src0\n\t"
10451 10494              "FLD    FPR1,$src0\n\t"
10452 10495              "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10453 10496  
10454 10497              "FDUP   \t\t\t# Q Q\n\t"
10455 10498              "FRNDINT\t\t\t# int(Q) Q\n\t"
10456 10499              "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10457 10500              "FISTP  dword [ESP]\n\t"
10458 10501              "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10459 10502              "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10460 10503              "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10461 10504              "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10462 10505              "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10463 10506              "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10464 10507              "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10465 10508              "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10466 10509              "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10467 10510              "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10468 10511              "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10469 10512              "MOV    [ESP+0],0\n\t"
10470 10513              "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10471 10514  
10472 10515              "FST_D  [ESP]\n\t"
10473 10516              "MOVSD  $dst,[ESP]\n\t"
10474 10517              "ADD    ESP,8"
10475 10518               %}
10476 10519    ins_encode( push_stack_temp_qword,
10477 10520                push_xmm_to_fpr1(src1),
10478 10521                push_xmm_to_fpr1(src0),
10479 10522                Opcode(0xD9), Opcode(0xF1),   // fyl2x
10480 10523                pow_exp_core_encoding,
10481 10524                Push_ResultXD(dst) );
10482 10525    ins_pipe( pipe_slow );
10483 10526  %}
10484 10527  
10485 10528  
10486 10529  instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10487 10530    predicate (UseSSE<=1);
10488 10531    match(Set dpr1 (ExpD dpr1));
10489 10532    effect(KILL rax, KILL rbx, KILL rcx);
10490 10533    format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding"
10491 10534              "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10492 10535              "FMULP  \t\t\t# Q=X*log2(e)\n\t"
10493 10536  
10494 10537              "FDUP   \t\t\t# Q Q\n\t"
10495 10538              "FRNDINT\t\t\t# int(Q) Q\n\t"
10496 10539              "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10497 10540              "FISTP  dword [ESP]\n\t"
10498 10541              "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10499 10542              "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10500 10543              "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10501 10544              "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10502 10545              "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10503 10546              "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10504 10547              "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10505 10548              "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10506 10549              "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10507 10550              "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10508 10551              "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10509 10552              "MOV    [ESP+0],0\n\t"
10510 10553              "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10511 10554  
10512 10555              "ADD    ESP,8"
10513 10556               %}
10514 10557    ins_encode( push_stack_temp_qword,
10515 10558                Opcode(0xD9), Opcode(0xEA),   // fldl2e
10516 10559                Opcode(0xDE), Opcode(0xC9),   // fmulp
10517 10560                pow_exp_core_encoding,
10518 10561                pop_stack_temp_qword);
10519 10562    ins_pipe( pipe_slow );
10520 10563  %}
10521 10564  
10522 10565  instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10523 10566    predicate (UseSSE>=2);
10524 10567    match(Set dst (ExpD src));
10525 10568    effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10526 10569    format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding\n\t"
10527 10570              "MOVSD  [ESP],$src\n\t"
10528 10571              "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10529 10572              "FMULP  \t\t\t# Q=X*log2(e) X\n\t"
10530 10573  
10531 10574              "FDUP   \t\t\t# Q Q\n\t"
10532 10575              "FRNDINT\t\t\t# int(Q) Q\n\t"
10533 10576              "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10534 10577              "FISTP  dword [ESP]\n\t"
10535 10578              "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10536 10579              "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10537 10580              "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10538 10581              "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10539 10582              "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10540 10583              "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10541 10584              "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10542 10585              "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10543 10586              "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10544 10587              "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10545 10588              "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10546 10589              "MOV    [ESP+0],0\n\t"
10547 10590              "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10548 10591  
10549 10592              "FST_D  [ESP]\n\t"
10550 10593              "MOVSD  $dst,[ESP]\n\t"
10551 10594              "ADD    ESP,8"
10552 10595               %}
10553 10596    ins_encode( Push_SrcXD(src),
10554 10597                Opcode(0xD9), Opcode(0xEA),   // fldl2e
10555 10598                Opcode(0xDE), Opcode(0xC9),   // fmulp
10556 10599                pow_exp_core_encoding,
10557 10600                Push_ResultXD(dst) );
10558 10601    ins_pipe( pipe_slow );
10559 10602  %}
10560 10603  
10561 10604  
10562 10605  
10563 10606  instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
10564 10607    predicate (UseSSE<=1);
10565 10608    // The source Double operand on FPU stack
10566 10609    match(Set dst (Log10D src));
10567 10610    // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10568 10611    // fxch         ; swap ST(0) with ST(1)
10569 10612    // fyl2x        ; compute log_10(2) * log_2(x)
10570 10613    format %{ "FLDLG2 \t\t\t#Log10\n\t"
10571 10614              "FXCH   \n\t"
10572 10615              "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10573 10616           %}
10574 10617    ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10575 10618                Opcode(0xD9), Opcode(0xC9),   // fxch
10576 10619                Opcode(0xD9), Opcode(0xF1));  // fyl2x
10577 10620  
10578 10621    ins_pipe( pipe_slow );
10579 10622  %}
10580 10623  
10581 10624  instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10582 10625    predicate (UseSSE>=2);
10583 10626    effect(KILL cr);
10584 10627    match(Set dst (Log10D src));
10585 10628    // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10586 10629    // fyl2x        ; compute log_10(2) * log_2(x)
10587 10630    format %{ "FLDLG2 \t\t\t#Log10\n\t"
10588 10631              "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10589 10632           %}
10590 10633    ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10591 10634                Push_SrcXD(src),
10592 10635                Opcode(0xD9), Opcode(0xF1),   // fyl2x
10593 10636                Push_ResultXD(dst));
10594 10637  
10595 10638    ins_pipe( pipe_slow );
10596 10639  %}
10597 10640  
10598 10641  instruct logD_reg(regDPR1 dst, regDPR1 src) %{
10599 10642    predicate (UseSSE<=1);
10600 10643    // The source Double operand on FPU stack
10601 10644    match(Set dst (LogD src));
10602 10645    // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10603 10646    // fxch         ; swap ST(0) with ST(1)
10604 10647    // fyl2x        ; compute log_e(2) * log_2(x)
10605 10648    format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10606 10649              "FXCH   \n\t"
10607 10650              "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10608 10651           %}
10609 10652    ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10610 10653                Opcode(0xD9), Opcode(0xC9),   // fxch
10611 10654                Opcode(0xD9), Opcode(0xF1));  // fyl2x
10612 10655  
10613 10656    ins_pipe( pipe_slow );
10614 10657  %}
10615 10658  
10616 10659  instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10617 10660    predicate (UseSSE>=2);
10618 10661    effect(KILL cr);
10619 10662    // The source and result Double operands in XMM registers
10620 10663    match(Set dst (LogD src));
10621 10664    // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10622 10665    // fyl2x        ; compute log_e(2) * log_2(x)
10623 10666    format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10624 10667              "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10625 10668           %}
10626 10669    ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10627 10670                Push_SrcXD(src),
10628 10671                Opcode(0xD9), Opcode(0xF1),   // fyl2x
10629 10672                Push_ResultXD(dst));
10630 10673    ins_pipe( pipe_slow );
10631 10674  %}
10632 10675  
10633 10676  //-------------Float Instructions-------------------------------
10634 10677  // Float Math
10635 10678  
10636 10679  // Code for float compare:
10637 10680  //     fcompp();
10638 10681  //     fwait(); fnstsw_ax();
10639 10682  //     sahf();
10640 10683  //     movl(dst, unordered_result);
10641 10684  //     jcc(Assembler::parity, exit);
10642 10685  //     movl(dst, less_result);
10643 10686  //     jcc(Assembler::below, exit);
10644 10687  //     movl(dst, equal_result);
10645 10688  //     jcc(Assembler::equal, exit);
10646 10689  //     movl(dst, greater_result);
10647 10690  //   exit:
10648 10691  
10649 10692  // P6 version of float compare, sets condition codes in EFLAGS
10650 10693  instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10651 10694    predicate(VM_Version::supports_cmov() && UseSSE == 0);
10652 10695    match(Set cr (CmpF src1 src2));
10653 10696    effect(KILL rax);
10654 10697    ins_cost(150);
10655 10698    format %{ "FLD    $src1\n\t"
10656 10699              "FUCOMIP ST,$src2  // P6 instruction\n\t"
10657 10700              "JNP    exit\n\t"
10658 10701              "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10659 10702              "SAHF\n"
10660 10703       "exit:\tNOP               // avoid branch to branch" %}
10661 10704    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10662 10705    ins_encode( Push_Reg_D(src1),
10663 10706                OpcP, RegOpc(src2),
10664 10707                cmpF_P6_fixup );
10665 10708    ins_pipe( pipe_slow );
10666 10709  %}
10667 10710  
10668 10711  instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
10669 10712    predicate(VM_Version::supports_cmov() && UseSSE == 0);
10670 10713    match(Set cr (CmpF src1 src2));
10671 10714    ins_cost(100);
10672 10715    format %{ "FLD    $src1\n\t"
10673 10716              "FUCOMIP ST,$src2  // P6 instruction" %}
10674 10717    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10675 10718    ins_encode( Push_Reg_D(src1),
10676 10719                OpcP, RegOpc(src2));
10677 10720    ins_pipe( pipe_slow );
10678 10721  %}
10679 10722  
10680 10723  
10681 10724  // Compare & branch
10682 10725  instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10683 10726    predicate(UseSSE == 0);
10684 10727    match(Set cr (CmpF src1 src2));
10685 10728    effect(KILL rax);
10686 10729    ins_cost(200);
10687 10730    format %{ "FLD    $src1\n\t"
10688 10731              "FCOMp  $src2\n\t"
10689 10732              "FNSTSW AX\n\t"
10690 10733              "TEST   AX,0x400\n\t"
10691 10734              "JZ,s   flags\n\t"
10692 10735              "MOV    AH,1\t# unordered treat as LT\n"
10693 10736      "flags:\tSAHF" %}
10694 10737    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10695 10738    ins_encode( Push_Reg_D(src1),
10696 10739                OpcP, RegOpc(src2),
10697 10740                fpu_flags);
10698 10741    ins_pipe( pipe_slow );
10699 10742  %}
10700 10743  
10701 10744  // Compare vs zero into -1,0,1
10702 10745  instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
10703 10746    predicate(UseSSE == 0);
10704 10747    match(Set dst (CmpF3 src1 zero));
10705 10748    effect(KILL cr, KILL rax);
10706 10749    ins_cost(280);
10707 10750    format %{ "FTSTF  $dst,$src1" %}
10708 10751    opcode(0xE4, 0xD9);
10709 10752    ins_encode( Push_Reg_D(src1),
10710 10753                OpcS, OpcP, PopFPU,
10711 10754                CmpF_Result(dst));
10712 10755    ins_pipe( pipe_slow );
10713 10756  %}
10714 10757  
10715 10758  // Compare into -1,0,1
10716 10759  instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
10717 10760    predicate(UseSSE == 0);
10718 10761    match(Set dst (CmpF3 src1 src2));
10719 10762    effect(KILL cr, KILL rax);
10720 10763    ins_cost(300);
10721 10764    format %{ "FCMPF  $dst,$src1,$src2" %}
10722 10765    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10723 10766    ins_encode( Push_Reg_D(src1),
10724 10767                OpcP, RegOpc(src2),
10725 10768                CmpF_Result(dst));
10726 10769    ins_pipe( pipe_slow );
10727 10770  %}
10728 10771  
10729 10772  // float compare and set condition codes in EFLAGS by XMM regs
10730 10773  instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
10731 10774    predicate(UseSSE>=1);
10732 10775    match(Set cr (CmpF dst src));
10733 10776    effect(KILL rax);
10734 10777    ins_cost(145);
10735 10778    format %{ "COMISS $dst,$src\n"
10736 10779            "\tJNP    exit\n"
10737 10780            "\tMOV    ah,1       // saw a NaN, set CF\n"
10738 10781            "\tSAHF\n"
10739 10782       "exit:\tNOP               // avoid branch to branch" %}
10740 10783    opcode(0x0F, 0x2F);
10741 10784    ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
10742 10785    ins_pipe( pipe_slow );
10743 10786  %}
10744 10787  
10745 10788  instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
10746 10789    predicate(UseSSE>=1);
10747 10790    match(Set cr (CmpF dst src));
10748 10791    ins_cost(100);
10749 10792    format %{ "COMISS $dst,$src" %}
10750 10793    opcode(0x0F, 0x2F);
10751 10794    ins_encode(OpcP, OpcS, RegReg(dst, src));
10752 10795    ins_pipe( pipe_slow );
10753 10796  %}
10754 10797  
10755 10798  // float compare and set condition codes in EFLAGS by XMM regs
10756 10799  instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
10757 10800    predicate(UseSSE>=1);
10758 10801    match(Set cr (CmpF dst (LoadF src)));
10759 10802    effect(KILL rax);
10760 10803    ins_cost(165);
10761 10804    format %{ "COMISS $dst,$src\n"
10762 10805            "\tJNP    exit\n"
10763 10806            "\tMOV    ah,1       // saw a NaN, set CF\n"
10764 10807            "\tSAHF\n"
10765 10808       "exit:\tNOP               // avoid branch to branch" %}
10766 10809    opcode(0x0F, 0x2F);
10767 10810    ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
10768 10811    ins_pipe( pipe_slow );
10769 10812  %}
10770 10813  
10771 10814  instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
10772 10815    predicate(UseSSE>=1);
10773 10816    match(Set cr (CmpF dst (LoadF src)));
10774 10817    ins_cost(100);
10775 10818    format %{ "COMISS $dst,$src" %}
10776 10819    opcode(0x0F, 0x2F);
10777 10820    ins_encode(OpcP, OpcS, RegMem(dst, src));
10778 10821    ins_pipe( pipe_slow );
10779 10822  %}
10780 10823  
10781 10824  // Compare into -1,0,1 in XMM
10782 10825  instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
10783 10826    predicate(UseSSE>=1);
10784 10827    match(Set dst (CmpF3 src1 src2));
10785 10828    effect(KILL cr);
10786 10829    ins_cost(255);
10787 10830    format %{ "XOR    $dst,$dst\n"
10788 10831            "\tCOMISS $src1,$src2\n"
10789 10832            "\tJP,s   nan\n"
10790 10833            "\tJEQ,s  exit\n"
10791 10834            "\tJA,s   inc\n"
10792 10835        "nan:\tDEC    $dst\n"
10793 10836            "\tJMP,s  exit\n"
10794 10837        "inc:\tINC    $dst\n"
10795 10838        "exit:"
10796 10839                  %}
10797 10840    opcode(0x0F, 0x2F);
10798 10841    ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
10799 10842    ins_pipe( pipe_slow );
10800 10843  %}
10801 10844  
10802 10845  // Compare into -1,0,1 in XMM and memory
10803 10846  instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
10804 10847    predicate(UseSSE>=1);
10805 10848    match(Set dst (CmpF3 src1 (LoadF mem)));
10806 10849    effect(KILL cr);
10807 10850    ins_cost(275);
10808 10851    format %{ "COMISS $src1,$mem\n"
10809 10852            "\tMOV    $dst,0\t\t# do not blow flags\n"
10810 10853            "\tJP,s   nan\n"
10811 10854            "\tJEQ,s  exit\n"
10812 10855            "\tJA,s   inc\n"
10813 10856        "nan:\tDEC    $dst\n"
10814 10857            "\tJMP,s  exit\n"
10815 10858        "inc:\tINC    $dst\n"
10816 10859        "exit:"
10817 10860                  %}
10818 10861    opcode(0x0F, 0x2F);
10819 10862    ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
10820 10863    ins_pipe( pipe_slow );
10821 10864  %}
10822 10865  
10823 10866  // Spill to obtain 24-bit precision
10824 10867  instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
10825 10868    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10826 10869    match(Set dst (SubF src1 src2));
10827 10870  
10828 10871    format %{ "FSUB   $dst,$src1 - $src2" %}
10829 10872    opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10830 10873    ins_encode( Push_Reg_F(src1),
10831 10874                OpcReg_F(src2),
10832 10875                Pop_Mem_F(dst) );
10833 10876    ins_pipe( fpu_mem_reg_reg );
10834 10877  %}
10835 10878  //
10836 10879  // This instruction does not round to 24-bits
10837 10880  instruct subF_reg(regF dst, regF src) %{
10838 10881    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10839 10882    match(Set dst (SubF dst src));
10840 10883  
10841 10884    format %{ "FSUB   $dst,$src" %}
10842 10885    opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10843 10886    ins_encode( Push_Reg_F(src),
10844 10887                OpcP, RegOpc(dst) );
10845 10888    ins_pipe( fpu_reg_reg );
10846 10889  %}
10847 10890  
10848 10891  // Spill to obtain 24-bit precision
10849 10892  instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
10850 10893    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10851 10894    match(Set dst (AddF src1 src2));
10852 10895  
10853 10896    format %{ "FADD   $dst,$src1,$src2" %}
10854 10897    opcode(0xD8, 0x0); /* D8 C0+i */
10855 10898    ins_encode( Push_Reg_F(src2),
10856 10899                OpcReg_F(src1),
10857 10900                Pop_Mem_F(dst) );
10858 10901    ins_pipe( fpu_mem_reg_reg );
10859 10902  %}
10860 10903  //
10861 10904  // This instruction does not round to 24-bits
10862 10905  instruct addF_reg(regF dst, regF src) %{
10863 10906    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10864 10907    match(Set dst (AddF dst src));
10865 10908  
10866 10909    format %{ "FLD    $src\n\t"
10867 10910              "FADDp  $dst,ST" %}
10868 10911    opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10869 10912    ins_encode( Push_Reg_F(src),
10870 10913                OpcP, RegOpc(dst) );
10871 10914    ins_pipe( fpu_reg_reg );
10872 10915  %}
10873 10916  
10874 10917  // Add two single precision floating point values in xmm
10875 10918  instruct addX_reg(regX dst, regX src) %{
10876 10919    predicate(UseSSE>=1);
10877 10920    match(Set dst (AddF dst src));
10878 10921    format %{ "ADDSS  $dst,$src" %}
10879 10922    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10880 10923    ins_pipe( pipe_slow );
10881 10924  %}
10882 10925  
10883 10926  instruct addX_imm(regX dst, immXF con) %{
10884 10927    predicate(UseSSE>=1);
10885 10928    match(Set dst (AddF dst con));
10886 10929    format %{ "ADDSS  $dst,[$con]" %}
10887 10930    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), LdImmX(dst, con) );
10888 10931    ins_pipe( pipe_slow );
10889 10932  %}
10890 10933  
10891 10934  instruct addX_mem(regX dst, memory mem) %{
10892 10935    predicate(UseSSE>=1);
10893 10936    match(Set dst (AddF dst (LoadF mem)));
10894 10937    format %{ "ADDSS  $dst,$mem" %}
10895 10938    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
10896 10939    ins_pipe( pipe_slow );
10897 10940  %}
10898 10941  
10899 10942  // Subtract two single precision floating point values in xmm
10900 10943  instruct subX_reg(regX dst, regX src) %{
10901 10944    predicate(UseSSE>=1);
10902 10945    match(Set dst (SubF dst src));
10903 10946    format %{ "SUBSS  $dst,$src" %}
10904 10947    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10905 10948    ins_pipe( pipe_slow );
10906 10949  %}
10907 10950  
10908 10951  instruct subX_imm(regX dst, immXF con) %{
10909 10952    predicate(UseSSE>=1);
10910 10953    match(Set dst (SubF dst con));
10911 10954    format %{ "SUBSS  $dst,[$con]" %}
10912 10955    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), LdImmX(dst, con) );
10913 10956    ins_pipe( pipe_slow );
10914 10957  %}
10915 10958  
10916 10959  instruct subX_mem(regX dst, memory mem) %{
10917 10960    predicate(UseSSE>=1);
10918 10961    match(Set dst (SubF dst (LoadF mem)));
10919 10962    format %{ "SUBSS  $dst,$mem" %}
10920 10963    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10921 10964    ins_pipe( pipe_slow );
10922 10965  %}
10923 10966  
10924 10967  // Multiply two single precision floating point values in xmm
10925 10968  instruct mulX_reg(regX dst, regX src) %{
10926 10969    predicate(UseSSE>=1);
10927 10970    match(Set dst (MulF dst src));
10928 10971    format %{ "MULSS  $dst,$src" %}
10929 10972    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10930 10973    ins_pipe( pipe_slow );
10931 10974  %}
10932 10975  
10933 10976  instruct mulX_imm(regX dst, immXF con) %{
10934 10977    predicate(UseSSE>=1);
10935 10978    match(Set dst (MulF dst con));
10936 10979    format %{ "MULSS  $dst,[$con]" %}
10937 10980    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), LdImmX(dst, con) );
10938 10981    ins_pipe( pipe_slow );
10939 10982  %}
10940 10983  
10941 10984  instruct mulX_mem(regX dst, memory mem) %{
10942 10985    predicate(UseSSE>=1);
10943 10986    match(Set dst (MulF dst (LoadF mem)));
10944 10987    format %{ "MULSS  $dst,$mem" %}
10945 10988    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10946 10989    ins_pipe( pipe_slow );
10947 10990  %}
10948 10991  
10949 10992  // Divide two single precision floating point values in xmm
10950 10993  instruct divX_reg(regX dst, regX src) %{
10951 10994    predicate(UseSSE>=1);
10952 10995    match(Set dst (DivF dst src));
10953 10996    format %{ "DIVSS  $dst,$src" %}
10954 10997    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10955 10998    ins_pipe( pipe_slow );
10956 10999  %}
10957 11000  
10958 11001  instruct divX_imm(regX dst, immXF con) %{
10959 11002    predicate(UseSSE>=1);
10960 11003    match(Set dst (DivF dst con));
10961 11004    format %{ "DIVSS  $dst,[$con]" %}
10962 11005    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), LdImmX(dst, con) );
10963 11006    ins_pipe( pipe_slow );
10964 11007  %}
10965 11008  
10966 11009  instruct divX_mem(regX dst, memory mem) %{
10967 11010    predicate(UseSSE>=1);
10968 11011    match(Set dst (DivF dst (LoadF mem)));
10969 11012    format %{ "DIVSS  $dst,$mem" %}
10970 11013    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10971 11014    ins_pipe( pipe_slow );
10972 11015  %}
10973 11016  
10974 11017  // Get the square root of a single precision floating point values in xmm
10975 11018  instruct sqrtX_reg(regX dst, regX src) %{
10976 11019    predicate(UseSSE>=1);
10977 11020    match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10978 11021    format %{ "SQRTSS $dst,$src" %}
10979 11022    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
10980 11023    ins_pipe( pipe_slow );
10981 11024  %}
10982 11025  
10983 11026  instruct sqrtX_mem(regX dst, memory mem) %{
10984 11027    predicate(UseSSE>=1);
10985 11028    match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
10986 11029    format %{ "SQRTSS $dst,$mem" %}
10987 11030    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
10988 11031    ins_pipe( pipe_slow );
10989 11032  %}
10990 11033  
10991 11034  // Get the square root of a double precision floating point values in xmm
10992 11035  instruct sqrtXD_reg(regXD dst, regXD src) %{
10993 11036    predicate(UseSSE>=2);
10994 11037    match(Set dst (SqrtD src));
10995 11038    format %{ "SQRTSD $dst,$src" %}
10996 11039    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
10997 11040    ins_pipe( pipe_slow );
10998 11041  %}
10999 11042  
11000 11043  instruct sqrtXD_mem(regXD dst, memory mem) %{
11001 11044    predicate(UseSSE>=2);
11002 11045    match(Set dst (SqrtD (LoadD mem)));
11003 11046    format %{ "SQRTSD $dst,$mem" %}
11004 11047    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11005 11048    ins_pipe( pipe_slow );
11006 11049  %}
11007 11050  
11008 11051  instruct absF_reg(regFPR1 dst, regFPR1 src) %{
11009 11052    predicate(UseSSE==0);
11010 11053    match(Set dst (AbsF src));
11011 11054    ins_cost(100);
11012 11055    format %{ "FABS" %}
11013 11056    opcode(0xE1, 0xD9);
11014 11057    ins_encode( OpcS, OpcP );
11015 11058    ins_pipe( fpu_reg_reg );
11016 11059  %}
11017 11060  
11018 11061  instruct absX_reg(regX dst ) %{
11019 11062    predicate(UseSSE>=1);
11020 11063    match(Set dst (AbsF dst));
11021 11064    format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11022 11065    ins_encode( AbsXF_encoding(dst));
11023 11066    ins_pipe( pipe_slow );
11024 11067  %}
11025 11068  
11026 11069  instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11027 11070    predicate(UseSSE==0);
11028 11071    match(Set dst (NegF src));
11029 11072    ins_cost(100);
11030 11073    format %{ "FCHS" %}
11031 11074    opcode(0xE0, 0xD9);
11032 11075    ins_encode( OpcS, OpcP );
11033 11076    ins_pipe( fpu_reg_reg );
11034 11077  %}
11035 11078  
11036 11079  instruct negX_reg( regX dst ) %{
11037 11080    predicate(UseSSE>=1);
11038 11081    match(Set dst (NegF dst));
11039 11082    format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
11040 11083    ins_encode( NegXF_encoding(dst));
11041 11084    ins_pipe( pipe_slow );
11042 11085  %}
11043 11086  
11044 11087  // Cisc-alternate to addF_reg
11045 11088  // Spill to obtain 24-bit precision
11046 11089  instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11047 11090    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11048 11091    match(Set dst (AddF src1 (LoadF src2)));
11049 11092  
11050 11093    format %{ "FLD    $src2\n\t"
11051 11094              "FADD   ST,$src1\n\t"
11052 11095              "FSTP_S $dst" %}
11053 11096    opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11054 11097    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11055 11098                OpcReg_F(src1),
11056 11099                Pop_Mem_F(dst) );
11057 11100    ins_pipe( fpu_mem_reg_mem );
11058 11101  %}
11059 11102  //
11060 11103  // Cisc-alternate to addF_reg
11061 11104  // This instruction does not round to 24-bits
11062 11105  instruct addF_reg_mem(regF dst, memory src) %{
11063 11106    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11064 11107    match(Set dst (AddF dst (LoadF src)));
11065 11108  
11066 11109    format %{ "FADD   $dst,$src" %}
11067 11110    opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
11068 11111    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
11069 11112                OpcP, RegOpc(dst) );
11070 11113    ins_pipe( fpu_reg_mem );
11071 11114  %}
11072 11115  
11073 11116  // // Following two instructions for _222_mpegaudio
11074 11117  // Spill to obtain 24-bit precision
11075 11118  instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
11076 11119    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11077 11120    match(Set dst (AddF src1 src2));
11078 11121  
11079 11122    format %{ "FADD   $dst,$src1,$src2" %}
11080 11123    opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11081 11124    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
11082 11125                OpcReg_F(src2),
11083 11126                Pop_Mem_F(dst) );
11084 11127    ins_pipe( fpu_mem_reg_mem );
11085 11128  %}
11086 11129  
11087 11130  // Cisc-spill variant
11088 11131  // Spill to obtain 24-bit precision
11089 11132  instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
11090 11133    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11091 11134    match(Set dst (AddF src1 (LoadF src2)));
11092 11135  
11093 11136    format %{ "FADD   $dst,$src1,$src2 cisc" %}
11094 11137    opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11095 11138    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11096 11139                set_instruction_start,
11097 11140                OpcP, RMopc_Mem(secondary,src1),
11098 11141                Pop_Mem_F(dst) );
11099 11142    ins_pipe( fpu_mem_mem_mem );
11100 11143  %}
11101 11144  
11102 11145  // Spill to obtain 24-bit precision
11103 11146  instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11104 11147    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11105 11148    match(Set dst (AddF src1 src2));
11106 11149  
11107 11150    format %{ "FADD   $dst,$src1,$src2" %}
11108 11151    opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
11109 11152    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11110 11153                set_instruction_start,
11111 11154                OpcP, RMopc_Mem(secondary,src1),
11112 11155                Pop_Mem_F(dst) );
11113 11156    ins_pipe( fpu_mem_mem_mem );
11114 11157  %}
11115 11158  
11116 11159  
11117 11160  // Spill to obtain 24-bit precision
11118 11161  instruct addF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
11119 11162    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11120 11163    match(Set dst (AddF src1 src2));
11121 11164    format %{ "FLD    $src1\n\t"
11122 11165              "FADD   $src2\n\t"
11123 11166              "FSTP_S $dst"  %}
11124 11167    opcode(0xD8, 0x00);       /* D8 /0 */
11125 11168    ins_encode( Push_Reg_F(src1),
11126 11169                Opc_MemImm_F(src2),
11127 11170                Pop_Mem_F(dst));
11128 11171    ins_pipe( fpu_mem_reg_con );
11129 11172  %}
11130 11173  //
11131 11174  // This instruction does not round to 24-bits
11132 11175  instruct addF_reg_imm(regF dst, regF src1, immF src2) %{
11133 11176    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11134 11177    match(Set dst (AddF src1 src2));
11135 11178    format %{ "FLD    $src1\n\t"
11136 11179              "FADD   $src2\n\t"
11137 11180              "FSTP_S $dst"  %}
11138 11181    opcode(0xD8, 0x00);       /* D8 /0 */
11139 11182    ins_encode( Push_Reg_F(src1),
11140 11183                Opc_MemImm_F(src2),
11141 11184                Pop_Reg_F(dst));
11142 11185    ins_pipe( fpu_reg_reg_con );
11143 11186  %}
11144 11187  
11145 11188  // Spill to obtain 24-bit precision
11146 11189  instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
11147 11190    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11148 11191    match(Set dst (MulF src1 src2));
11149 11192  
11150 11193    format %{ "FLD    $src1\n\t"
11151 11194              "FMUL   $src2\n\t"
11152 11195              "FSTP_S $dst"  %}
11153 11196    opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
11154 11197    ins_encode( Push_Reg_F(src1),
11155 11198                OpcReg_F(src2),
11156 11199                Pop_Mem_F(dst) );
11157 11200    ins_pipe( fpu_mem_reg_reg );
11158 11201  %}
11159 11202  //
11160 11203  // This instruction does not round to 24-bits
11161 11204  instruct mulF_reg(regF dst, regF src1, regF src2) %{
11162 11205    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11163 11206    match(Set dst (MulF src1 src2));
11164 11207  
11165 11208    format %{ "FLD    $src1\n\t"
11166 11209              "FMUL   $src2\n\t"
11167 11210              "FSTP_S $dst"  %}
11168 11211    opcode(0xD8, 0x1); /* D8 C8+i */
11169 11212    ins_encode( Push_Reg_F(src2),
11170 11213                OpcReg_F(src1),
11171 11214                Pop_Reg_F(dst) );
11172 11215    ins_pipe( fpu_reg_reg_reg );
11173 11216  %}
11174 11217  
11175 11218  
11176 11219  // Spill to obtain 24-bit precision
11177 11220  // Cisc-alternate to reg-reg multiply
11178 11221  instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11179 11222    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11180 11223    match(Set dst (MulF src1 (LoadF src2)));
11181 11224  
11182 11225    format %{ "FLD_S  $src2\n\t"
11183 11226              "FMUL   $src1\n\t"
11184 11227              "FSTP_S $dst"  %}
11185 11228    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
11186 11229    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11187 11230                OpcReg_F(src1),
11188 11231                Pop_Mem_F(dst) );
11189 11232    ins_pipe( fpu_mem_reg_mem );
11190 11233  %}
11191 11234  //
11192 11235  // This instruction does not round to 24-bits
11193 11236  // Cisc-alternate to reg-reg multiply
11194 11237  instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
11195 11238    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11196 11239    match(Set dst (MulF src1 (LoadF src2)));
11197 11240  
11198 11241    format %{ "FMUL   $dst,$src1,$src2" %}
11199 11242    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
11200 11243    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11201 11244                OpcReg_F(src1),
11202 11245                Pop_Reg_F(dst) );
11203 11246    ins_pipe( fpu_reg_reg_mem );
11204 11247  %}
11205 11248  
11206 11249  // Spill to obtain 24-bit precision
11207 11250  instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11208 11251    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11209 11252    match(Set dst (MulF src1 src2));
11210 11253  
11211 11254    format %{ "FMUL   $dst,$src1,$src2" %}
11212 11255    opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
11213 11256    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11214 11257                set_instruction_start,
11215 11258                OpcP, RMopc_Mem(secondary,src1),
11216 11259                Pop_Mem_F(dst) );
11217 11260    ins_pipe( fpu_mem_mem_mem );
11218 11261  %}
11219 11262  
11220 11263  // Spill to obtain 24-bit precision
11221 11264  instruct mulF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
11222 11265    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11223 11266    match(Set dst (MulF src1 src2));
11224 11267  
11225 11268    format %{ "FMULc $dst,$src1,$src2" %}
11226 11269    opcode(0xD8, 0x1);  /* D8 /1*/
11227 11270    ins_encode( Push_Reg_F(src1),
11228 11271                Opc_MemImm_F(src2),
11229 11272                Pop_Mem_F(dst));
11230 11273    ins_pipe( fpu_mem_reg_con );
11231 11274  %}
11232 11275  //
11233 11276  // This instruction does not round to 24-bits
11234 11277  instruct mulF_reg_imm(regF dst, regF src1, immF src2) %{
11235 11278    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11236 11279    match(Set dst (MulF src1 src2));
11237 11280  
11238 11281    format %{ "FMULc $dst. $src1, $src2" %}
11239 11282    opcode(0xD8, 0x1);  /* D8 /1*/
11240 11283    ins_encode( Push_Reg_F(src1),
11241 11284                Opc_MemImm_F(src2),
11242 11285                Pop_Reg_F(dst));
11243 11286    ins_pipe( fpu_reg_reg_con );
11244 11287  %}
11245 11288  
11246 11289  
11247 11290  //
11248 11291  // MACRO1 -- subsume unshared load into mulF
11249 11292  // This instruction does not round to 24-bits
11250 11293  instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
11251 11294    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11252 11295    match(Set dst (MulF (LoadF mem1) src));
11253 11296  
11254 11297    format %{ "FLD    $mem1    ===MACRO1===\n\t"
11255 11298              "FMUL   ST,$src\n\t"
11256 11299              "FSTP   $dst" %}
11257 11300    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
11258 11301    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
11259 11302                OpcReg_F(src),
11260 11303                Pop_Reg_F(dst) );
11261 11304    ins_pipe( fpu_reg_reg_mem );
11262 11305  %}
11263 11306  //
11264 11307  // MACRO2 -- addF a mulF which subsumed an unshared load
11265 11308  // This instruction does not round to 24-bits
11266 11309  instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
11267 11310    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11268 11311    match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
11269 11312    ins_cost(95);
11270 11313  
11271 11314    format %{ "FLD    $mem1     ===MACRO2===\n\t"
11272 11315              "FMUL   ST,$src1  subsume mulF left load\n\t"
11273 11316              "FADD   ST,$src2\n\t"
11274 11317              "FSTP   $dst" %}
11275 11318    opcode(0xD9); /* LoadF D9 /0 */
11276 11319    ins_encode( OpcP, RMopc_Mem(0x00,mem1),
11277 11320                FMul_ST_reg(src1),
11278 11321                FAdd_ST_reg(src2),
11279 11322                Pop_Reg_F(dst) );
11280 11323    ins_pipe( fpu_reg_mem_reg_reg );
11281 11324  %}
11282 11325  
11283 11326  // MACRO3 -- addF a mulF
11284 11327  // This instruction does not round to 24-bits.  It is a '2-address'
11285 11328  // instruction in that the result goes back to src2.  This eliminates
11286 11329  // a move from the macro; possibly the register allocator will have
11287 11330  // to add it back (and maybe not).
11288 11331  instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
11289 11332    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11290 11333    match(Set src2 (AddF (MulF src0 src1) src2));
11291 11334  
11292 11335    format %{ "FLD    $src0     ===MACRO3===\n\t"
11293 11336              "FMUL   ST,$src1\n\t"
11294 11337              "FADDP  $src2,ST" %}
11295 11338    opcode(0xD9); /* LoadF D9 /0 */
11296 11339    ins_encode( Push_Reg_F(src0),
11297 11340                FMul_ST_reg(src1),
11298 11341                FAddP_reg_ST(src2) );
11299 11342    ins_pipe( fpu_reg_reg_reg );
11300 11343  %}
11301 11344  
11302 11345  // MACRO4 -- divF subF
11303 11346  // This instruction does not round to 24-bits
11304 11347  instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
11305 11348    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11306 11349    match(Set dst (DivF (SubF src2 src1) src3));
11307 11350  
11308 11351    format %{ "FLD    $src2   ===MACRO4===\n\t"
11309 11352              "FSUB   ST,$src1\n\t"
11310 11353              "FDIV   ST,$src3\n\t"
11311 11354              "FSTP  $dst" %}
11312 11355    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11313 11356    ins_encode( Push_Reg_F(src2),
11314 11357                subF_divF_encode(src1,src3),
11315 11358                Pop_Reg_F(dst) );
11316 11359    ins_pipe( fpu_reg_reg_reg_reg );
11317 11360  %}
11318 11361  
11319 11362  // Spill to obtain 24-bit precision
11320 11363  instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
11321 11364    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11322 11365    match(Set dst (DivF src1 src2));
11323 11366  
11324 11367    format %{ "FDIV   $dst,$src1,$src2" %}
11325 11368    opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
11326 11369    ins_encode( Push_Reg_F(src1),
11327 11370                OpcReg_F(src2),
11328 11371                Pop_Mem_F(dst) );
11329 11372    ins_pipe( fpu_mem_reg_reg );
11330 11373  %}
11331 11374  //
11332 11375  // This instruction does not round to 24-bits
11333 11376  instruct divF_reg(regF dst, regF src) %{
11334 11377    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11335 11378    match(Set dst (DivF dst src));
11336 11379  
11337 11380    format %{ "FDIV   $dst,$src" %}
11338 11381    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11339 11382    ins_encode( Push_Reg_F(src),
11340 11383                OpcP, RegOpc(dst) );
11341 11384    ins_pipe( fpu_reg_reg );
11342 11385  %}
11343 11386  
11344 11387  
11345 11388  // Spill to obtain 24-bit precision
11346 11389  instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11347 11390    predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11348 11391    match(Set dst (ModF src1 src2));
11349 11392    effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11350 11393  
11351 11394    format %{ "FMOD   $dst,$src1,$src2" %}
11352 11395    ins_encode( Push_Reg_Mod_D(src1, src2),
11353 11396                emitModD(),
11354 11397                Push_Result_Mod_D(src2),
11355 11398                Pop_Mem_F(dst));
11356 11399    ins_pipe( pipe_slow );
11357 11400  %}
11358 11401  //
11359 11402  // This instruction does not round to 24-bits
11360 11403  instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
11361 11404    predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11362 11405    match(Set dst (ModF dst src));
11363 11406    effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11364 11407  
11365 11408    format %{ "FMOD   $dst,$src" %}
11366 11409    ins_encode(Push_Reg_Mod_D(dst, src),
11367 11410                emitModD(),
11368 11411                Push_Result_Mod_D(src),
11369 11412                Pop_Reg_F(dst));
11370 11413    ins_pipe( pipe_slow );
11371 11414  %}
11372 11415  
11373 11416  instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
11374 11417    predicate(UseSSE>=1);
11375 11418    match(Set dst (ModF src0 src1));
11376 11419    effect(KILL rax, KILL cr);
11377 11420    format %{ "SUB    ESP,4\t # FMOD\n"
11378 11421            "\tMOVSS  [ESP+0],$src1\n"
11379 11422            "\tFLD_S  [ESP+0]\n"
11380 11423            "\tMOVSS  [ESP+0],$src0\n"
11381 11424            "\tFLD_S  [ESP+0]\n"
11382 11425       "loop:\tFPREM\n"
11383 11426            "\tFWAIT\n"
11384 11427            "\tFNSTSW AX\n"
11385 11428            "\tSAHF\n"
11386 11429            "\tJP     loop\n"
11387 11430            "\tFSTP_S [ESP+0]\n"
11388 11431            "\tMOVSS  $dst,[ESP+0]\n"
11389 11432            "\tADD    ESP,4\n"
11390 11433            "\tFSTP   ST0\t # Restore FPU Stack"
11391 11434      %}
11392 11435    ins_cost(250);
11393 11436    ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
11394 11437    ins_pipe( pipe_slow );
11395 11438  %}
11396 11439  
11397 11440  
11398 11441  //----------Arithmetic Conversion Instructions---------------------------------
11399 11442  // The conversions operations are all Alpha sorted.  Please keep it that way!
11400 11443  
11401 11444  instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
11402 11445    predicate(UseSSE==0);
11403 11446    match(Set dst (RoundFloat src));
11404 11447    ins_cost(125);
11405 11448    format %{ "FST_S  $dst,$src\t# F-round" %}
11406 11449    ins_encode( Pop_Mem_Reg_F(dst, src) );
11407 11450    ins_pipe( fpu_mem_reg );
11408 11451  %}
11409 11452  
11410 11453  instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
11411 11454    predicate(UseSSE<=1);
11412 11455    match(Set dst (RoundDouble src));
11413 11456    ins_cost(125);
11414 11457    format %{ "FST_D  $dst,$src\t# D-round" %}
11415 11458    ins_encode( Pop_Mem_Reg_D(dst, src) );
11416 11459    ins_pipe( fpu_mem_reg );
11417 11460  %}
11418 11461  
11419 11462  // Force rounding to 24-bit precision and 6-bit exponent
11420 11463  instruct convD2F_reg(stackSlotF dst, regD src) %{
11421 11464    predicate(UseSSE==0);
11422 11465    match(Set dst (ConvD2F src));
11423 11466    format %{ "FST_S  $dst,$src\t# F-round" %}
11424 11467    expand %{
11425 11468      roundFloat_mem_reg(dst,src);
11426 11469    %}
11427 11470  %}
11428 11471  
11429 11472  // Force rounding to 24-bit precision and 6-bit exponent
11430 11473  instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11431 11474    predicate(UseSSE==1);
11432 11475    match(Set dst (ConvD2F src));
11433 11476    effect( KILL cr );
11434 11477    format %{ "SUB    ESP,4\n\t"
11435 11478              "FST_S  [ESP],$src\t# F-round\n\t"
11436 11479              "MOVSS  $dst,[ESP]\n\t"
11437 11480              "ADD ESP,4" %}
11438 11481    ins_encode( D2X_encoding(dst, src) );
11439 11482    ins_pipe( pipe_slow );
11440 11483  %}
11441 11484  
11442 11485  // Force rounding double precision to single precision
11443 11486  instruct convXD2X_reg(regX dst, regXD src) %{
11444 11487    predicate(UseSSE>=2);
11445 11488    match(Set dst (ConvD2F src));
11446 11489    format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11447 11490    opcode(0xF2, 0x0F, 0x5A);
11448 11491    ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11449 11492    ins_pipe( pipe_slow );
11450 11493  %}
11451 11494  
11452 11495  instruct convF2D_reg_reg(regD dst, regF src) %{
11453 11496    predicate(UseSSE==0);
11454 11497    match(Set dst (ConvF2D src));
11455 11498    format %{ "FST_S  $dst,$src\t# D-round" %}
11456 11499    ins_encode( Pop_Reg_Reg_D(dst, src));
11457 11500    ins_pipe( fpu_reg_reg );
11458 11501  %}
11459 11502  
11460 11503  instruct convF2D_reg(stackSlotD dst, regF src) %{
11461 11504    predicate(UseSSE==1);
11462 11505    match(Set dst (ConvF2D src));
11463 11506    format %{ "FST_D  $dst,$src\t# D-round" %}
11464 11507    expand %{
11465 11508      roundDouble_mem_reg(dst,src);
11466 11509    %}
11467 11510  %}
11468 11511  
11469 11512  instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11470 11513    predicate(UseSSE==1);
11471 11514    match(Set dst (ConvF2D src));
11472 11515    effect( KILL cr );
11473 11516    format %{ "SUB    ESP,4\n\t"
11474 11517              "MOVSS  [ESP] $src\n\t"
11475 11518              "FLD_S  [ESP]\n\t"
11476 11519              "ADD    ESP,4\n\t"
11477 11520              "FSTP   $dst\t# D-round" %}
11478 11521    ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
11479 11522    ins_pipe( pipe_slow );
11480 11523  %}
11481 11524  
11482 11525  instruct convX2XD_reg(regXD dst, regX src) %{
11483 11526    predicate(UseSSE>=2);
11484 11527    match(Set dst (ConvF2D src));
11485 11528    format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11486 11529    opcode(0xF3, 0x0F, 0x5A);
11487 11530    ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11488 11531    ins_pipe( pipe_slow );
11489 11532  %}
11490 11533  
11491 11534  // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11492 11535  instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11493 11536    predicate(UseSSE<=1);
11494 11537    match(Set dst (ConvD2I src));
11495 11538    effect( KILL tmp, KILL cr );
11496 11539    format %{ "FLD    $src\t# Convert double to int \n\t"
11497 11540              "FLDCW  trunc mode\n\t"
11498 11541              "SUB    ESP,4\n\t"
11499 11542              "FISTp  [ESP + #0]\n\t"
11500 11543              "FLDCW  std/24-bit mode\n\t"
11501 11544              "POP    EAX\n\t"
11502 11545              "CMP    EAX,0x80000000\n\t"
11503 11546              "JNE,s  fast\n\t"
11504 11547              "FLD_D  $src\n\t"
11505 11548              "CALL   d2i_wrapper\n"
11506 11549        "fast:" %}
11507 11550    ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11508 11551    ins_pipe( pipe_slow );
11509 11552  %}
11510 11553  
11511 11554  // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11512 11555  instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11513 11556    predicate(UseSSE>=2);
11514 11557    match(Set dst (ConvD2I src));
11515 11558    effect( KILL tmp, KILL cr );
11516 11559    format %{ "CVTTSD2SI $dst, $src\n\t"
11517 11560              "CMP    $dst,0x80000000\n\t"
11518 11561              "JNE,s  fast\n\t"
11519 11562              "SUB    ESP, 8\n\t"
11520 11563              "MOVSD  [ESP], $src\n\t"
11521 11564              "FLD_D  [ESP]\n\t"
11522 11565              "ADD    ESP, 8\n\t"
11523 11566              "CALL   d2i_wrapper\n"
11524 11567        "fast:" %}
11525 11568    opcode(0x1); // double-precision conversion
11526 11569    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11527 11570    ins_pipe( pipe_slow );
11528 11571  %}
11529 11572  
11530 11573  instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11531 11574    predicate(UseSSE<=1);
11532 11575    match(Set dst (ConvD2L src));
11533 11576    effect( KILL cr );
11534 11577    format %{ "FLD    $src\t# Convert double to long\n\t"
11535 11578              "FLDCW  trunc mode\n\t"
11536 11579              "SUB    ESP,8\n\t"
11537 11580              "FISTp  [ESP + #0]\n\t"
11538 11581              "FLDCW  std/24-bit mode\n\t"
11539 11582              "POP    EAX\n\t"
11540 11583              "POP    EDX\n\t"
11541 11584              "CMP    EDX,0x80000000\n\t"
11542 11585              "JNE,s  fast\n\t"
11543 11586              "TEST   EAX,EAX\n\t"
11544 11587              "JNE,s  fast\n\t"
11545 11588              "FLD    $src\n\t"
11546 11589              "CALL   d2l_wrapper\n"
11547 11590        "fast:" %}
11548 11591    ins_encode( Push_Reg_D(src),  D2L_encoding(src) );
11549 11592    ins_pipe( pipe_slow );
11550 11593  %}
11551 11594  
11552 11595  // XMM lacks a float/double->long conversion, so use the old FPU stack.
11553 11596  instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11554 11597    predicate (UseSSE>=2);
11555 11598    match(Set dst (ConvD2L src));
11556 11599    effect( KILL cr );
11557 11600    format %{ "SUB    ESP,8\t# Convert double to long\n\t"
11558 11601              "MOVSD  [ESP],$src\n\t"
11559 11602              "FLD_D  [ESP]\n\t"
11560 11603              "FLDCW  trunc mode\n\t"
11561 11604              "FISTp  [ESP + #0]\n\t"
11562 11605              "FLDCW  std/24-bit mode\n\t"
11563 11606              "POP    EAX\n\t"
11564 11607              "POP    EDX\n\t"
11565 11608              "CMP    EDX,0x80000000\n\t"
11566 11609              "JNE,s  fast\n\t"
11567 11610              "TEST   EAX,EAX\n\t"
11568 11611              "JNE,s  fast\n\t"
11569 11612              "SUB    ESP,8\n\t"
11570 11613              "MOVSD  [ESP],$src\n\t"
11571 11614              "FLD_D  [ESP]\n\t"
11572 11615              "CALL   d2l_wrapper\n"
11573 11616        "fast:" %}
11574 11617    ins_encode( XD2L_encoding(src) );
11575 11618    ins_pipe( pipe_slow );
11576 11619  %}
11577 11620  
11578 11621  // Convert a double to an int.  Java semantics require we do complex
11579 11622  // manglations in the corner cases.  So we set the rounding mode to
11580 11623  // 'zero', store the darned double down as an int, and reset the
11581 11624  // rounding mode to 'nearest'.  The hardware stores a flag value down
11582 11625  // if we would overflow or converted a NAN; we check for this and
11583 11626  // and go the slow path if needed.
11584 11627  instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11585 11628    predicate(UseSSE==0);
11586 11629    match(Set dst (ConvF2I src));
11587 11630    effect( KILL tmp, KILL cr );
11588 11631    format %{ "FLD    $src\t# Convert float to int \n\t"
11589 11632              "FLDCW  trunc mode\n\t"
11590 11633              "SUB    ESP,4\n\t"
11591 11634              "FISTp  [ESP + #0]\n\t"
11592 11635              "FLDCW  std/24-bit mode\n\t"
11593 11636              "POP    EAX\n\t"
11594 11637              "CMP    EAX,0x80000000\n\t"
11595 11638              "JNE,s  fast\n\t"
11596 11639              "FLD    $src\n\t"
11597 11640              "CALL   d2i_wrapper\n"
11598 11641        "fast:" %}
11599 11642    // D2I_encoding works for F2I
11600 11643    ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11601 11644    ins_pipe( pipe_slow );
11602 11645  %}
11603 11646  
11604 11647  // Convert a float in xmm to an int reg.
11605 11648  instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11606 11649    predicate(UseSSE>=1);
11607 11650    match(Set dst (ConvF2I src));
11608 11651    effect( KILL tmp, KILL cr );
11609 11652    format %{ "CVTTSS2SI $dst, $src\n\t"
11610 11653              "CMP    $dst,0x80000000\n\t"
11611 11654              "JNE,s  fast\n\t"
11612 11655              "SUB    ESP, 4\n\t"
11613 11656              "MOVSS  [ESP], $src\n\t"
11614 11657              "FLD    [ESP]\n\t"
11615 11658              "ADD    ESP, 4\n\t"
11616 11659              "CALL   d2i_wrapper\n"
11617 11660        "fast:" %}
11618 11661    opcode(0x0); // single-precision conversion
11619 11662    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11620 11663    ins_pipe( pipe_slow );
11621 11664  %}
11622 11665  
11623 11666  instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11624 11667    predicate(UseSSE==0);
11625 11668    match(Set dst (ConvF2L src));
11626 11669    effect( KILL cr );
11627 11670    format %{ "FLD    $src\t# Convert float to long\n\t"
11628 11671              "FLDCW  trunc mode\n\t"
11629 11672              "SUB    ESP,8\n\t"
11630 11673              "FISTp  [ESP + #0]\n\t"
11631 11674              "FLDCW  std/24-bit mode\n\t"
11632 11675              "POP    EAX\n\t"
11633 11676              "POP    EDX\n\t"
11634 11677              "CMP    EDX,0x80000000\n\t"
11635 11678              "JNE,s  fast\n\t"
11636 11679              "TEST   EAX,EAX\n\t"
11637 11680              "JNE,s  fast\n\t"
11638 11681              "FLD    $src\n\t"
11639 11682              "CALL   d2l_wrapper\n"
11640 11683        "fast:" %}
11641 11684    // D2L_encoding works for F2L
11642 11685    ins_encode( Push_Reg_F(src), D2L_encoding(src) );
11643 11686    ins_pipe( pipe_slow );
11644 11687  %}
11645 11688  
11646 11689  // XMM lacks a float/double->long conversion, so use the old FPU stack.
11647 11690  instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
11648 11691    predicate (UseSSE>=1);
11649 11692    match(Set dst (ConvF2L src));
11650 11693    effect( KILL cr );
11651 11694    format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11652 11695              "MOVSS  [ESP],$src\n\t"
11653 11696              "FLD_S  [ESP]\n\t"
11654 11697              "FLDCW  trunc mode\n\t"
11655 11698              "FISTp  [ESP + #0]\n\t"
11656 11699              "FLDCW  std/24-bit mode\n\t"
11657 11700              "POP    EAX\n\t"
11658 11701              "POP    EDX\n\t"
11659 11702              "CMP    EDX,0x80000000\n\t"
11660 11703              "JNE,s  fast\n\t"
11661 11704              "TEST   EAX,EAX\n\t"
11662 11705              "JNE,s  fast\n\t"
11663 11706              "SUB    ESP,4\t# Convert float to long\n\t"
11664 11707              "MOVSS  [ESP],$src\n\t"
11665 11708              "FLD_S  [ESP]\n\t"
11666 11709              "ADD    ESP,4\n\t"
11667 11710              "CALL   d2l_wrapper\n"
11668 11711        "fast:" %}
11669 11712    ins_encode( X2L_encoding(src) );
11670 11713    ins_pipe( pipe_slow );
11671 11714  %}
11672 11715  
11673 11716  instruct convI2D_reg(regD dst, stackSlotI src) %{
11674 11717    predicate( UseSSE<=1 );
11675 11718    match(Set dst (ConvI2D src));
11676 11719    format %{ "FILD   $src\n\t"
11677 11720              "FSTP   $dst" %}
11678 11721    opcode(0xDB, 0x0);  /* DB /0 */
11679 11722    ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
11680 11723    ins_pipe( fpu_reg_mem );
11681 11724  %}
11682 11725  
11683 11726  instruct convI2XD_reg(regXD dst, eRegI src) %{
11684 11727    predicate( UseSSE>=2 && !UseXmmI2D );
11685 11728    match(Set dst (ConvI2D src));
11686 11729    format %{ "CVTSI2SD $dst,$src" %}
11687 11730    opcode(0xF2, 0x0F, 0x2A);
11688 11731    ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11689 11732    ins_pipe( pipe_slow );
11690 11733  %}
11691 11734  
11692 11735  instruct convI2XD_mem(regXD dst, memory mem) %{
11693 11736    predicate( UseSSE>=2 );
11694 11737    match(Set dst (ConvI2D (LoadI mem)));
11695 11738    format %{ "CVTSI2SD $dst,$mem" %}
11696 11739    opcode(0xF2, 0x0F, 0x2A);
11697 11740    ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
11698 11741    ins_pipe( pipe_slow );
11699 11742  %}
11700 11743  
11701 11744  instruct convXI2XD_reg(regXD dst, eRegI src)
11702 11745  %{
11703 11746    predicate( UseSSE>=2 && UseXmmI2D );
11704 11747    match(Set dst (ConvI2D src));
11705 11748  
11706 11749    format %{ "MOVD  $dst,$src\n\t"
11707 11750              "CVTDQ2PD $dst,$dst\t# i2d" %}
11708 11751    ins_encode %{
11709 11752      __ movdl($dst$$XMMRegister, $src$$Register);
11710 11753      __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11711 11754    %}
11712 11755    ins_pipe(pipe_slow); // XXX
11713 11756  %}
11714 11757  
11715 11758  instruct convI2D_mem(regD dst, memory mem) %{
11716 11759    predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11717 11760    match(Set dst (ConvI2D (LoadI mem)));
11718 11761    format %{ "FILD   $mem\n\t"
11719 11762              "FSTP   $dst" %}
11720 11763    opcode(0xDB);      /* DB /0 */
11721 11764    ins_encode( OpcP, RMopc_Mem(0x00,mem),
11722 11765                Pop_Reg_D(dst));
11723 11766    ins_pipe( fpu_reg_mem );
11724 11767  %}
11725 11768  
11726 11769  // Convert a byte to a float; no rounding step needed.
11727 11770  instruct conv24I2F_reg(regF dst, stackSlotI src) %{
11728 11771    predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11729 11772    match(Set dst (ConvI2F src));
11730 11773    format %{ "FILD   $src\n\t"
11731 11774              "FSTP   $dst" %}
11732 11775  
11733 11776    opcode(0xDB, 0x0);  /* DB /0 */
11734 11777    ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
11735 11778    ins_pipe( fpu_reg_mem );
11736 11779  %}
11737 11780  
11738 11781  // In 24-bit mode, force exponent rounding by storing back out
11739 11782  instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
11740 11783    predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11741 11784    match(Set dst (ConvI2F src));
11742 11785    ins_cost(200);
11743 11786    format %{ "FILD   $src\n\t"
11744 11787              "FSTP_S $dst" %}
11745 11788    opcode(0xDB, 0x0);  /* DB /0 */
11746 11789    ins_encode( Push_Mem_I(src),
11747 11790                Pop_Mem_F(dst));
11748 11791    ins_pipe( fpu_mem_mem );
11749 11792  %}
11750 11793  
11751 11794  // In 24-bit mode, force exponent rounding by storing back out
11752 11795  instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
11753 11796    predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11754 11797    match(Set dst (ConvI2F (LoadI mem)));
11755 11798    ins_cost(200);
11756 11799    format %{ "FILD   $mem\n\t"
11757 11800              "FSTP_S $dst" %}
11758 11801    opcode(0xDB);  /* DB /0 */
11759 11802    ins_encode( OpcP, RMopc_Mem(0x00,mem),
11760 11803                Pop_Mem_F(dst));
11761 11804    ins_pipe( fpu_mem_mem );
11762 11805  %}
11763 11806  
11764 11807  // This instruction does not round to 24-bits
11765 11808  instruct convI2F_reg(regF dst, stackSlotI src) %{
11766 11809    predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11767 11810    match(Set dst (ConvI2F src));
11768 11811    format %{ "FILD   $src\n\t"
11769 11812              "FSTP   $dst" %}
11770 11813    opcode(0xDB, 0x0);  /* DB /0 */
11771 11814    ins_encode( Push_Mem_I(src),
11772 11815                Pop_Reg_F(dst));
11773 11816    ins_pipe( fpu_reg_mem );
11774 11817  %}
11775 11818  
11776 11819  // This instruction does not round to 24-bits
11777 11820  instruct convI2F_mem(regF dst, memory mem) %{
11778 11821    predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11779 11822    match(Set dst (ConvI2F (LoadI mem)));
11780 11823    format %{ "FILD   $mem\n\t"
11781 11824              "FSTP   $dst" %}
11782 11825    opcode(0xDB);      /* DB /0 */
11783 11826    ins_encode( OpcP, RMopc_Mem(0x00,mem),
11784 11827                Pop_Reg_F(dst));
11785 11828    ins_pipe( fpu_reg_mem );
11786 11829  %}
11787 11830  
11788 11831  // Convert an int to a float in xmm; no rounding step needed.
11789 11832  instruct convI2X_reg(regX dst, eRegI src) %{
11790 11833    predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11791 11834    match(Set dst (ConvI2F src));
11792 11835    format %{ "CVTSI2SS $dst, $src" %}
11793 11836  
11794 11837    opcode(0xF3, 0x0F, 0x2A);  /* F3 0F 2A /r */
11795 11838    ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11796 11839    ins_pipe( pipe_slow );
11797 11840  %}
11798 11841  
11799 11842   instruct convXI2X_reg(regX dst, eRegI src)
11800 11843  %{
11801 11844    predicate( UseSSE>=2 && UseXmmI2F );
11802 11845    match(Set dst (ConvI2F src));
11803 11846  
11804 11847    format %{ "MOVD  $dst,$src\n\t"
11805 11848              "CVTDQ2PS $dst,$dst\t# i2f" %}
11806 11849    ins_encode %{
11807 11850      __ movdl($dst$$XMMRegister, $src$$Register);
11808 11851      __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11809 11852    %}
11810 11853    ins_pipe(pipe_slow); // XXX
11811 11854  %}
11812 11855  
11813 11856  instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
11814 11857    match(Set dst (ConvI2L src));
11815 11858    effect(KILL cr);
11816 11859    ins_cost(375);
11817 11860    format %{ "MOV    $dst.lo,$src\n\t"
11818 11861              "MOV    $dst.hi,$src\n\t"
11819 11862              "SAR    $dst.hi,31" %}
11820 11863    ins_encode(convert_int_long(dst,src));
11821 11864    ins_pipe( ialu_reg_reg_long );
11822 11865  %}
11823 11866  
11824 11867  // Zero-extend convert int to long
11825 11868  instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
11826 11869    match(Set dst (AndL (ConvI2L src) mask) );
11827 11870    effect( KILL flags );
11828 11871    ins_cost(250);
11829 11872    format %{ "MOV    $dst.lo,$src\n\t"
11830 11873              "XOR    $dst.hi,$dst.hi" %}
11831 11874    opcode(0x33); // XOR
11832 11875    ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11833 11876    ins_pipe( ialu_reg_reg_long );
11834 11877  %}
11835 11878  
11836 11879  // Zero-extend long
11837 11880  instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11838 11881    match(Set dst (AndL src mask) );
11839 11882    effect( KILL flags );
11840 11883    ins_cost(250);
11841 11884    format %{ "MOV    $dst.lo,$src.lo\n\t"
11842 11885              "XOR    $dst.hi,$dst.hi\n\t" %}
11843 11886    opcode(0x33); // XOR
11844 11887    ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11845 11888    ins_pipe( ialu_reg_reg_long );
11846 11889  %}
11847 11890  
11848 11891  instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11849 11892    predicate (UseSSE<=1);
11850 11893    match(Set dst (ConvL2D src));
11851 11894    effect( KILL cr );
11852 11895    format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11853 11896              "PUSH   $src.lo\n\t"
11854 11897              "FILD   ST,[ESP + #0]\n\t"
11855 11898              "ADD    ESP,8\n\t"
11856 11899              "FSTP_D $dst\t# D-round" %}
11857 11900    opcode(0xDF, 0x5);  /* DF /5 */
11858 11901    ins_encode(convert_long_double(src), Pop_Mem_D(dst));
11859 11902    ins_pipe( pipe_slow );
11860 11903  %}
11861 11904  
11862 11905  instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
11863 11906    predicate (UseSSE>=2);
11864 11907    match(Set dst (ConvL2D src));
11865 11908    effect( KILL cr );
11866 11909    format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11867 11910              "PUSH   $src.lo\n\t"
11868 11911              "FILD_D [ESP]\n\t"
11869 11912              "FSTP_D [ESP]\n\t"
11870 11913              "MOVSD  $dst,[ESP]\n\t"
11871 11914              "ADD    ESP,8" %}
11872 11915    opcode(0xDF, 0x5);  /* DF /5 */
11873 11916    ins_encode(convert_long_double2(src), Push_ResultXD(dst));
11874 11917    ins_pipe( pipe_slow );
11875 11918  %}
11876 11919  
11877 11920  instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
11878 11921    predicate (UseSSE>=1);
11879 11922    match(Set dst (ConvL2F src));
11880 11923    effect( KILL cr );
11881 11924    format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11882 11925              "PUSH   $src.lo\n\t"
11883 11926              "FILD_D [ESP]\n\t"
11884 11927              "FSTP_S [ESP]\n\t"
11885 11928              "MOVSS  $dst,[ESP]\n\t"
11886 11929              "ADD    ESP,8" %}
11887 11930    opcode(0xDF, 0x5);  /* DF /5 */
11888 11931    ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
11889 11932    ins_pipe( pipe_slow );
11890 11933  %}
11891 11934  
11892 11935  instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11893 11936    match(Set dst (ConvL2F src));
11894 11937    effect( KILL cr );
11895 11938    format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11896 11939              "PUSH   $src.lo\n\t"
11897 11940              "FILD   ST,[ESP + #0]\n\t"
11898 11941              "ADD    ESP,8\n\t"
11899 11942              "FSTP_S $dst\t# F-round" %}
11900 11943    opcode(0xDF, 0x5);  /* DF /5 */
11901 11944    ins_encode(convert_long_double(src), Pop_Mem_F(dst));
11902 11945    ins_pipe( pipe_slow );
11903 11946  %}
11904 11947  
11905 11948  instruct convL2I_reg( eRegI dst, eRegL src ) %{
11906 11949    match(Set dst (ConvL2I src));
11907 11950    effect( DEF dst, USE src );
11908 11951    format %{ "MOV    $dst,$src.lo" %}
11909 11952    ins_encode(enc_CopyL_Lo(dst,src));
11910 11953    ins_pipe( ialu_reg_reg );
11911 11954  %}
11912 11955  
11913 11956  
11914 11957  instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
11915 11958    match(Set dst (MoveF2I src));
11916 11959    effect( DEF dst, USE src );
11917 11960    ins_cost(100);
11918 11961    format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11919 11962    opcode(0x8B);
11920 11963    ins_encode( OpcP, RegMem(dst,src));
11921 11964    ins_pipe( ialu_reg_mem );
11922 11965  %}
11923 11966  
11924 11967  instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11925 11968    predicate(UseSSE==0);
11926 11969    match(Set dst (MoveF2I src));
11927 11970    effect( DEF dst, USE src );
11928 11971  
11929 11972    ins_cost(125);
11930 11973    format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11931 11974    ins_encode( Pop_Mem_Reg_F(dst, src) );
11932 11975    ins_pipe( fpu_mem_reg );
11933 11976  %}
11934 11977  
11935 11978  instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
11936 11979    predicate(UseSSE>=1);
11937 11980    match(Set dst (MoveF2I src));
11938 11981    effect( DEF dst, USE src );
11939 11982  
11940 11983    ins_cost(95);
11941 11984    format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11942 11985    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
11943 11986    ins_pipe( pipe_slow );
11944 11987  %}
11945 11988  
11946 11989  instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
11947 11990    predicate(UseSSE>=2);
11948 11991    match(Set dst (MoveF2I src));
11949 11992    effect( DEF dst, USE src );
11950 11993    ins_cost(85);
11951 11994    format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11952 11995    ins_encode( MovX2I_reg(dst, src));
11953 11996    ins_pipe( pipe_slow );
11954 11997  %}
11955 11998  
11956 11999  instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
11957 12000    match(Set dst (MoveI2F src));
11958 12001    effect( DEF dst, USE src );
11959 12002  
11960 12003    ins_cost(100);
11961 12004    format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11962 12005    opcode(0x89);
11963 12006    ins_encode( OpcPRegSS( dst, src ) );
11964 12007    ins_pipe( ialu_mem_reg );
11965 12008  %}
11966 12009  
11967 12010  
11968 12011  instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11969 12012    predicate(UseSSE==0);
11970 12013    match(Set dst (MoveI2F src));
11971 12014    effect(DEF dst, USE src);
11972 12015  
11973 12016    ins_cost(125);
11974 12017    format %{ "FLD_S  $src\n\t"
11975 12018              "FSTP   $dst\t# MoveI2F_stack_reg" %}
11976 12019    opcode(0xD9);               /* D9 /0, FLD m32real */
11977 12020    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11978 12021                Pop_Reg_F(dst) );
11979 12022    ins_pipe( fpu_reg_mem );
11980 12023  %}
11981 12024  
11982 12025  instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
11983 12026    predicate(UseSSE>=1);
11984 12027    match(Set dst (MoveI2F src));
11985 12028    effect( DEF dst, USE src );
11986 12029  
11987 12030    ins_cost(95);
11988 12031    format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11989 12032    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
11990 12033    ins_pipe( pipe_slow );
11991 12034  %}
11992 12035  
11993 12036  instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
11994 12037    predicate(UseSSE>=2);
11995 12038    match(Set dst (MoveI2F src));
11996 12039    effect( DEF dst, USE src );
11997 12040  
11998 12041    ins_cost(85);
11999 12042    format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
12000 12043    ins_encode( MovI2X_reg(dst, src) );
12001 12044    ins_pipe( pipe_slow );
12002 12045  %}
12003 12046  
12004 12047  instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12005 12048    match(Set dst (MoveD2L src));
12006 12049    effect(DEF dst, USE src);
12007 12050  
12008 12051    ins_cost(250);
12009 12052    format %{ "MOV    $dst.lo,$src\n\t"
12010 12053              "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12011 12054    opcode(0x8B, 0x8B);
12012 12055    ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12013 12056    ins_pipe( ialu_mem_long_reg );
12014 12057  %}
12015 12058  
12016 12059  instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12017 12060    predicate(UseSSE<=1);
12018 12061    match(Set dst (MoveD2L src));
12019 12062    effect(DEF dst, USE src);
12020 12063  
12021 12064    ins_cost(125);
12022 12065    format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
12023 12066    ins_encode( Pop_Mem_Reg_D(dst, src) );
12024 12067    ins_pipe( fpu_mem_reg );
12025 12068  %}
12026 12069  
12027 12070  instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12028 12071    predicate(UseSSE>=2);
12029 12072    match(Set dst (MoveD2L src));
12030 12073    effect(DEF dst, USE src);
12031 12074    ins_cost(95);
12032 12075  
12033 12076    format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
12034 12077    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
12035 12078    ins_pipe( pipe_slow );
12036 12079  %}
12037 12080  
12038 12081  instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12039 12082    predicate(UseSSE>=2);
12040 12083    match(Set dst (MoveD2L src));
12041 12084    effect(DEF dst, USE src, TEMP tmp);
12042 12085    ins_cost(85);
12043 12086    format %{ "MOVD   $dst.lo,$src\n\t"
12044 12087              "PSHUFLW $tmp,$src,0x4E\n\t"
12045 12088              "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12046 12089    ins_encode( MovXD2L_reg(dst, src, tmp) );
12047 12090    ins_pipe( pipe_slow );
12048 12091  %}
12049 12092  
12050 12093  instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12051 12094    match(Set dst (MoveL2D src));
12052 12095    effect(DEF dst, USE src);
12053 12096  
12054 12097    ins_cost(200);
12055 12098    format %{ "MOV    $dst,$src.lo\n\t"
12056 12099              "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12057 12100    opcode(0x89, 0x89);
12058 12101    ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12059 12102    ins_pipe( ialu_mem_long_reg );
12060 12103  %}
12061 12104  
12062 12105  
12063 12106  instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12064 12107    predicate(UseSSE<=1);
12065 12108    match(Set dst (MoveL2D src));
12066 12109    effect(DEF dst, USE src);
12067 12110    ins_cost(125);
12068 12111  
12069 12112    format %{ "FLD_D  $src\n\t"
12070 12113              "FSTP   $dst\t# MoveL2D_stack_reg" %}
12071 12114    opcode(0xDD);               /* DD /0, FLD m64real */
12072 12115    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12073 12116                Pop_Reg_D(dst) );
12074 12117    ins_pipe( fpu_reg_mem );
12075 12118  %}
12076 12119  
12077 12120  
12078 12121  instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12079 12122    predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12080 12123    match(Set dst (MoveL2D src));
12081 12124    effect(DEF dst, USE src);
12082 12125  
12083 12126    ins_cost(95);
12084 12127    format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
12085 12128    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12086 12129    ins_pipe( pipe_slow );
12087 12130  %}
12088 12131  
12089 12132  instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12090 12133    predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12091 12134    match(Set dst (MoveL2D src));
12092 12135    effect(DEF dst, USE src);
12093 12136  
12094 12137    ins_cost(95);
12095 12138    format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12096 12139    ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
12097 12140    ins_pipe( pipe_slow );
12098 12141  %}
12099 12142  
12100 12143  instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12101 12144    predicate(UseSSE>=2);
12102 12145    match(Set dst (MoveL2D src));
12103 12146    effect(TEMP dst, USE src, TEMP tmp);
12104 12147    ins_cost(85);
12105 12148    format %{ "MOVD   $dst,$src.lo\n\t"
12106 12149              "MOVD   $tmp,$src.hi\n\t"
12107 12150              "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12108 12151    ins_encode( MovL2XD_reg(dst, src, tmp) );
12109 12152    ins_pipe( pipe_slow );
12110 12153  %}
12111 12154  
12112 12155  // Replicate scalar to packed byte (1 byte) values in xmm
12113 12156  instruct Repl8B_reg(regXD dst, regXD src) %{
12114 12157    predicate(UseSSE>=2);
12115 12158    match(Set dst (Replicate8B src));
12116 12159    format %{ "MOVDQA  $dst,$src\n\t"
12117 12160              "PUNPCKLBW $dst,$dst\n\t"
12118 12161              "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12119 12162    ins_encode( pshufd_8x8(dst, src));
12120 12163    ins_pipe( pipe_slow );
12121 12164  %}
12122 12165  
12123 12166  // Replicate scalar to packed byte (1 byte) values in xmm
12124 12167  instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12125 12168    predicate(UseSSE>=2);
12126 12169    match(Set dst (Replicate8B src));
12127 12170    format %{ "MOVD    $dst,$src\n\t"
12128 12171              "PUNPCKLBW $dst,$dst\n\t"
12129 12172              "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12130 12173    ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
12131 12174    ins_pipe( pipe_slow );
12132 12175  %}
12133 12176  
12134 12177  // Replicate scalar zero to packed byte (1 byte) values in xmm
12135 12178  instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12136 12179    predicate(UseSSE>=2);
12137 12180    match(Set dst (Replicate8B zero));
12138 12181    format %{ "PXOR  $dst,$dst\t! replicate8B" %}
12139 12182    ins_encode( pxor(dst, dst));
12140 12183    ins_pipe( fpu_reg_reg );
12141 12184  %}
12142 12185  
12143 12186  // Replicate scalar to packed shore (2 byte) values in xmm
12144 12187  instruct Repl4S_reg(regXD dst, regXD src) %{
12145 12188    predicate(UseSSE>=2);
12146 12189    match(Set dst (Replicate4S src));
12147 12190    format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12148 12191    ins_encode( pshufd_4x16(dst, src));
12149 12192    ins_pipe( fpu_reg_reg );
12150 12193  %}
12151 12194  
12152 12195  // Replicate scalar to packed shore (2 byte) values in xmm
12153 12196  instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12154 12197    predicate(UseSSE>=2);
12155 12198    match(Set dst (Replicate4S src));
12156 12199    format %{ "MOVD    $dst,$src\n\t"
12157 12200              "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12158 12201    ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12159 12202    ins_pipe( fpu_reg_reg );
12160 12203  %}
12161 12204  
12162 12205  // Replicate scalar zero to packed short (2 byte) values in xmm
12163 12206  instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12164 12207    predicate(UseSSE>=2);
12165 12208    match(Set dst (Replicate4S zero));
12166 12209    format %{ "PXOR  $dst,$dst\t! replicate4S" %}
12167 12210    ins_encode( pxor(dst, dst));
12168 12211    ins_pipe( fpu_reg_reg );
12169 12212  %}
12170 12213  
12171 12214  // Replicate scalar to packed char (2 byte) values in xmm
12172 12215  instruct Repl4C_reg(regXD dst, regXD src) %{
12173 12216    predicate(UseSSE>=2);
12174 12217    match(Set dst (Replicate4C src));
12175 12218    format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12176 12219    ins_encode( pshufd_4x16(dst, src));
12177 12220    ins_pipe( fpu_reg_reg );
12178 12221  %}
12179 12222  
12180 12223  // Replicate scalar to packed char (2 byte) values in xmm
12181 12224  instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12182 12225    predicate(UseSSE>=2);
12183 12226    match(Set dst (Replicate4C src));
12184 12227    format %{ "MOVD    $dst,$src\n\t"
12185 12228              "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12186 12229    ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12187 12230    ins_pipe( fpu_reg_reg );
12188 12231  %}
12189 12232  
12190 12233  // Replicate scalar zero to packed char (2 byte) values in xmm
12191 12234  instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12192 12235    predicate(UseSSE>=2);
12193 12236    match(Set dst (Replicate4C zero));
12194 12237    format %{ "PXOR  $dst,$dst\t! replicate4C" %}
12195 12238    ins_encode( pxor(dst, dst));
12196 12239    ins_pipe( fpu_reg_reg );
12197 12240  %}
12198 12241  
12199 12242  // Replicate scalar to packed integer (4 byte) values in xmm
12200 12243  instruct Repl2I_reg(regXD dst, regXD src) %{
12201 12244    predicate(UseSSE>=2);
12202 12245    match(Set dst (Replicate2I src));
12203 12246    format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12204 12247    ins_encode( pshufd(dst, src, 0x00));
12205 12248    ins_pipe( fpu_reg_reg );
12206 12249  %}
12207 12250  
12208 12251  // Replicate scalar to packed integer (4 byte) values in xmm
12209 12252  instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12210 12253    predicate(UseSSE>=2);
12211 12254    match(Set dst (Replicate2I src));
12212 12255    format %{ "MOVD   $dst,$src\n\t"
12213 12256              "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12214 12257    ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
12215 12258    ins_pipe( fpu_reg_reg );
12216 12259  %}
12217 12260  
12218 12261  // Replicate scalar zero to packed integer (2 byte) values in xmm
12219 12262  instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12220 12263    predicate(UseSSE>=2);
12221 12264    match(Set dst (Replicate2I zero));
12222 12265    format %{ "PXOR  $dst,$dst\t! replicate2I" %}
12223 12266    ins_encode( pxor(dst, dst));
12224 12267    ins_pipe( fpu_reg_reg );
12225 12268  %}
12226 12269  
12227 12270  // Replicate scalar to packed single precision floating point values in xmm
12228 12271  instruct Repl2F_reg(regXD dst, regXD src) %{
12229 12272    predicate(UseSSE>=2);
12230 12273    match(Set dst (Replicate2F src));
12231 12274    format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12232 12275    ins_encode( pshufd(dst, src, 0xe0));
12233 12276    ins_pipe( fpu_reg_reg );
12234 12277  %}
12235 12278  
12236 12279  // Replicate scalar to packed single precision floating point values in xmm
12237 12280  instruct Repl2F_regX(regXD dst, regX src) %{
12238 12281    predicate(UseSSE>=2);
12239 12282    match(Set dst (Replicate2F src));
12240 12283    format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12241 12284    ins_encode( pshufd(dst, src, 0xe0));
12242 12285    ins_pipe( fpu_reg_reg );
12243 12286  %}
12244 12287  
12245 12288  // Replicate scalar to packed single precision floating point values in xmm
12246 12289  instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12247 12290    predicate(UseSSE>=2);
12248 12291    match(Set dst (Replicate2F zero));
12249 12292    format %{ "PXOR  $dst,$dst\t! replicate2F" %}
12250 12293    ins_encode( pxor(dst, dst));
12251 12294    ins_pipe( fpu_reg_reg );
12252 12295  %}
12253 12296  
12254 12297  // =======================================================================
12255 12298  // fast clearing of an array
12256 12299  instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12257 12300    match(Set dummy (ClearArray cnt base));
12258 12301    effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12259 12302    format %{ "SHL    ECX,1\t# Convert doublewords to words\n\t"
12260 12303              "XOR    EAX,EAX\n\t"
12261 12304              "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12262 12305    opcode(0,0x4);
12263 12306    ins_encode( Opcode(0xD1), RegOpc(ECX),
12264 12307                OpcRegReg(0x33,EAX,EAX),
12265 12308                Opcode(0xF3), Opcode(0xAB) );
12266 12309    ins_pipe( pipe_slow );
12267 12310  %}
12268 12311  
12269 12312  instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eBXRegI cnt2,
12270 12313                          eAXRegI result, regXD tmp1, regXD tmp2, eFlagsReg cr) %{
12271 12314    match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12272 12315    effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12273 12316  
12274 12317    format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
12275 12318    ins_encode %{
12276 12319      __ string_compare($str1$$Register, $str2$$Register,
12277 12320                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
12278 12321                        $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12279 12322    %}
12280 12323    ins_pipe( pipe_slow );
12281 12324  %}
12282 12325  
12283 12326  // fast string equals
12284 12327  instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12285 12328                         regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12286 12329    match(Set result (StrEquals (Binary str1 str2) cnt));
12287 12330    effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12288 12331  
12289 12332    format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12290 12333    ins_encode %{
12291 12334      __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
12292 12335                            $cnt$$Register, $result$$Register, $tmp3$$Register,
12293 12336                            $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12294 12337    %}
12295 12338    ins_pipe( pipe_slow );
12296 12339  %}
12297 12340  
12298 12341  instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12299 12342                          eBXRegI result, regXD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12300 12343    predicate(UseSSE42Intrinsics);
12301 12344    match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12302 12345    effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
12303 12346  
12304 12347    format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp2, $tmp1" %}
12305 12348    ins_encode %{
12306 12349      __ string_indexof($str1$$Register, $str2$$Register,
12307 12350                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
12308 12351                        $tmp1$$XMMRegister, $tmp2$$Register);
12309 12352    %}
12310 12353    ins_pipe( pipe_slow );
12311 12354  %}
12312 12355  
12313 12356  // fast array equals
12314 12357  instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12315 12358                        regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12316 12359  %{
12317 12360    match(Set result (AryEq ary1 ary2));
12318 12361    effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12319 12362    //ins_cost(300);
12320 12363  
12321 12364    format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12322 12365    ins_encode %{
12323 12366      __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
12324 12367                            $tmp3$$Register, $result$$Register, $tmp4$$Register,
12325 12368                            $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12326 12369    %}
12327 12370    ins_pipe( pipe_slow );
12328 12371  %}
12329 12372  
12330 12373  //----------Control Flow Instructions------------------------------------------
12331 12374  // Signed compare Instructions
12332 12375  instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12333 12376    match(Set cr (CmpI op1 op2));
12334 12377    effect( DEF cr, USE op1, USE op2 );
12335 12378    format %{ "CMP    $op1,$op2" %}
12336 12379    opcode(0x3B);  /* Opcode 3B /r */
12337 12380    ins_encode( OpcP, RegReg( op1, op2) );
12338 12381    ins_pipe( ialu_cr_reg_reg );
12339 12382  %}
12340 12383  
12341 12384  instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
12342 12385    match(Set cr (CmpI op1 op2));
12343 12386    effect( DEF cr, USE op1 );
12344 12387    format %{ "CMP    $op1,$op2" %}
12345 12388    opcode(0x81,0x07);  /* Opcode 81 /7 */
12346 12389    // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12347 12390    ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12348 12391    ins_pipe( ialu_cr_reg_imm );
12349 12392  %}
12350 12393  
12351 12394  // Cisc-spilled version of cmpI_eReg
12352 12395  instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
12353 12396    match(Set cr (CmpI op1 (LoadI op2)));
12354 12397  
12355 12398    format %{ "CMP    $op1,$op2" %}
12356 12399    ins_cost(500);
12357 12400    opcode(0x3B);  /* Opcode 3B /r */
12358 12401    ins_encode( OpcP, RegMem( op1, op2) );
12359 12402    ins_pipe( ialu_cr_reg_mem );
12360 12403  %}
12361 12404  
12362 12405  instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
12363 12406    match(Set cr (CmpI src zero));
12364 12407    effect( DEF cr, USE src );
12365 12408  
12366 12409    format %{ "TEST   $src,$src" %}
12367 12410    opcode(0x85);
12368 12411    ins_encode( OpcP, RegReg( src, src ) );
12369 12412    ins_pipe( ialu_cr_reg_imm );
12370 12413  %}
12371 12414  
12372 12415  instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
12373 12416    match(Set cr (CmpI (AndI src con) zero));
12374 12417  
12375 12418    format %{ "TEST   $src,$con" %}
12376 12419    opcode(0xF7,0x00);
12377 12420    ins_encode( OpcP, RegOpc(src), Con32(con) );
12378 12421    ins_pipe( ialu_cr_reg_imm );
12379 12422  %}
12380 12423  
12381 12424  instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
12382 12425    match(Set cr (CmpI (AndI src mem) zero));
12383 12426  
12384 12427    format %{ "TEST   $src,$mem" %}
12385 12428    opcode(0x85);
12386 12429    ins_encode( OpcP, RegMem( src, mem ) );
12387 12430    ins_pipe( ialu_cr_reg_mem );
12388 12431  %}
12389 12432  
12390 12433  // Unsigned compare Instructions; really, same as signed except they
12391 12434  // produce an eFlagsRegU instead of eFlagsReg.
12392 12435  instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
12393 12436    match(Set cr (CmpU op1 op2));
12394 12437  
12395 12438    format %{ "CMPu   $op1,$op2" %}
12396 12439    opcode(0x3B);  /* Opcode 3B /r */
12397 12440    ins_encode( OpcP, RegReg( op1, op2) );
12398 12441    ins_pipe( ialu_cr_reg_reg );
12399 12442  %}
12400 12443  
12401 12444  instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
12402 12445    match(Set cr (CmpU op1 op2));
12403 12446  
12404 12447    format %{ "CMPu   $op1,$op2" %}
12405 12448    opcode(0x81,0x07);  /* Opcode 81 /7 */
12406 12449    ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12407 12450    ins_pipe( ialu_cr_reg_imm );
12408 12451  %}
12409 12452  
12410 12453  // // Cisc-spilled version of cmpU_eReg
12411 12454  instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
12412 12455    match(Set cr (CmpU op1 (LoadI op2)));
12413 12456  
12414 12457    format %{ "CMPu   $op1,$op2" %}
12415 12458    ins_cost(500);
12416 12459    opcode(0x3B);  /* Opcode 3B /r */
12417 12460    ins_encode( OpcP, RegMem( op1, op2) );
12418 12461    ins_pipe( ialu_cr_reg_mem );
12419 12462  %}
12420 12463  
12421 12464  // // Cisc-spilled version of cmpU_eReg
12422 12465  //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
12423 12466  //  match(Set cr (CmpU (LoadI op1) op2));
12424 12467  //
12425 12468  //  format %{ "CMPu   $op1,$op2" %}
12426 12469  //  ins_cost(500);
12427 12470  //  opcode(0x39);  /* Opcode 39 /r */
12428 12471  //  ins_encode( OpcP, RegMem( op1, op2) );
12429 12472  //%}
12430 12473  
12431 12474  instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
12432 12475    match(Set cr (CmpU src zero));
12433 12476  
12434 12477    format %{ "TESTu  $src,$src" %}
12435 12478    opcode(0x85);
12436 12479    ins_encode( OpcP, RegReg( src, src ) );
12437 12480    ins_pipe( ialu_cr_reg_imm );
12438 12481  %}
12439 12482  
12440 12483  // Unsigned pointer compare Instructions
12441 12484  instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12442 12485    match(Set cr (CmpP op1 op2));
12443 12486  
12444 12487    format %{ "CMPu   $op1,$op2" %}
12445 12488    opcode(0x3B);  /* Opcode 3B /r */
12446 12489    ins_encode( OpcP, RegReg( op1, op2) );
12447 12490    ins_pipe( ialu_cr_reg_reg );
12448 12491  %}
12449 12492  
12450 12493  instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12451 12494    match(Set cr (CmpP op1 op2));
12452 12495  
12453 12496    format %{ "CMPu   $op1,$op2" %}
12454 12497    opcode(0x81,0x07);  /* Opcode 81 /7 */
12455 12498    ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12456 12499    ins_pipe( ialu_cr_reg_imm );
12457 12500  %}
12458 12501  
12459 12502  // // Cisc-spilled version of cmpP_eReg
12460 12503  instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12461 12504    match(Set cr (CmpP op1 (LoadP op2)));
12462 12505  
12463 12506    format %{ "CMPu   $op1,$op2" %}
12464 12507    ins_cost(500);
12465 12508    opcode(0x3B);  /* Opcode 3B /r */
12466 12509    ins_encode( OpcP, RegMem( op1, op2) );
12467 12510    ins_pipe( ialu_cr_reg_mem );
12468 12511  %}
12469 12512  
12470 12513  // // Cisc-spilled version of cmpP_eReg
12471 12514  //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12472 12515  //  match(Set cr (CmpP (LoadP op1) op2));
12473 12516  //
12474 12517  //  format %{ "CMPu   $op1,$op2" %}
12475 12518  //  ins_cost(500);
12476 12519  //  opcode(0x39);  /* Opcode 39 /r */
12477 12520  //  ins_encode( OpcP, RegMem( op1, op2) );
12478 12521  //%}
12479 12522  
12480 12523  // Compare raw pointer (used in out-of-heap check).
12481 12524  // Only works because non-oop pointers must be raw pointers
12482 12525  // and raw pointers have no anti-dependencies.
12483 12526  instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12484 12527    predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
12485 12528    match(Set cr (CmpP op1 (LoadP op2)));
12486 12529  
12487 12530    format %{ "CMPu   $op1,$op2" %}
12488 12531    opcode(0x3B);  /* Opcode 3B /r */
12489 12532    ins_encode( OpcP, RegMem( op1, op2) );
12490 12533    ins_pipe( ialu_cr_reg_mem );
12491 12534  %}
12492 12535  
12493 12536  //
12494 12537  // This will generate a signed flags result. This should be ok
12495 12538  // since any compare to a zero should be eq/neq.
12496 12539  instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12497 12540    match(Set cr (CmpP src zero));
12498 12541  
12499 12542    format %{ "TEST   $src,$src" %}
12500 12543    opcode(0x85);
12501 12544    ins_encode( OpcP, RegReg( src, src ) );
12502 12545    ins_pipe( ialu_cr_reg_imm );
12503 12546  %}
12504 12547  
12505 12548  // Cisc-spilled version of testP_reg
12506 12549  // This will generate a signed flags result. This should be ok
12507 12550  // since any compare to a zero should be eq/neq.
12508 12551  instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12509 12552    match(Set cr (CmpP (LoadP op) zero));
12510 12553  
12511 12554    format %{ "TEST   $op,0xFFFFFFFF" %}
12512 12555    ins_cost(500);
12513 12556    opcode(0xF7);               /* Opcode F7 /0 */
12514 12557    ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12515 12558    ins_pipe( ialu_cr_reg_imm );
12516 12559  %}
12517 12560  
12518 12561  // Yanked all unsigned pointer compare operations.
12519 12562  // Pointer compares are done with CmpP which is already unsigned.
12520 12563  
12521 12564  //----------Max and Min--------------------------------------------------------
12522 12565  // Min Instructions
12523 12566  ////
12524 12567  //   *** Min and Max using the conditional move are slower than the
12525 12568  //   *** branch version on a Pentium III.
12526 12569  // // Conditional move for min
12527 12570  //instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12528 12571  //  effect( USE_DEF op2, USE op1, USE cr );
12529 12572  //  format %{ "CMOVlt $op2,$op1\t! min" %}
12530 12573  //  opcode(0x4C,0x0F);
12531 12574  //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12532 12575  //  ins_pipe( pipe_cmov_reg );
12533 12576  //%}
12534 12577  //
12535 12578  //// Min Register with Register (P6 version)
12536 12579  //instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
12537 12580  //  predicate(VM_Version::supports_cmov() );
12538 12581  //  match(Set op2 (MinI op1 op2));
12539 12582  //  ins_cost(200);
12540 12583  //  expand %{
12541 12584  //    eFlagsReg cr;
12542 12585  //    compI_eReg(cr,op1,op2);
12543 12586  //    cmovI_reg_lt(op2,op1,cr);
12544 12587  //  %}
12545 12588  //%}
12546 12589  
12547 12590  // Min Register with Register (generic version)
12548 12591  instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12549 12592    match(Set dst (MinI dst src));
12550 12593    effect(KILL flags);
12551 12594    ins_cost(300);
12552 12595  
12553 12596    format %{ "MIN    $dst,$src" %}
12554 12597    opcode(0xCC);
12555 12598    ins_encode( min_enc(dst,src) );
12556 12599    ins_pipe( pipe_slow );
12557 12600  %}
12558 12601  
12559 12602  // Max Register with Register
12560 12603  //   *** Min and Max using the conditional move are slower than the
12561 12604  //   *** branch version on a Pentium III.
12562 12605  // // Conditional move for max
12563 12606  //instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12564 12607  //  effect( USE_DEF op2, USE op1, USE cr );
12565 12608  //  format %{ "CMOVgt $op2,$op1\t! max" %}
12566 12609  //  opcode(0x4F,0x0F);
12567 12610  //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12568 12611  //  ins_pipe( pipe_cmov_reg );
12569 12612  //%}
12570 12613  //
12571 12614  // // Max Register with Register (P6 version)
12572 12615  //instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
12573 12616  //  predicate(VM_Version::supports_cmov() );
12574 12617  //  match(Set op2 (MaxI op1 op2));
12575 12618  //  ins_cost(200);
12576 12619  //  expand %{
12577 12620  //    eFlagsReg cr;
12578 12621  //    compI_eReg(cr,op1,op2);
12579 12622  //    cmovI_reg_gt(op2,op1,cr);
12580 12623  //  %}
12581 12624  //%}
12582 12625  
12583 12626  // Max Register with Register (generic version)
12584 12627  instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12585 12628    match(Set dst (MaxI dst src));
12586 12629    effect(KILL flags);
12587 12630    ins_cost(300);
12588 12631  
12589 12632    format %{ "MAX    $dst,$src" %}
12590 12633    opcode(0xCC);
12591 12634    ins_encode( max_enc(dst,src) );
12592 12635    ins_pipe( pipe_slow );
12593 12636  %}
12594 12637  
12595 12638  // ============================================================================
12596 12639  // Branch Instructions
12597 12640  // Jump Table
12598 12641  instruct jumpXtnd(eRegI switch_val) %{
12599 12642    match(Jump switch_val);
12600 12643    ins_cost(350);
12601 12644  
12602 12645    format %{  "JMP    [table_base](,$switch_val,1)\n\t" %}
12603 12646  
12604 12647    ins_encode %{
12605 12648      address table_base  = __ address_table_constant(_index2label);
12606 12649  
12607 12650      // Jump to Address(table_base + switch_reg)
12608 12651      InternalAddress table(table_base);
12609 12652      Address index(noreg, $switch_val$$Register, Address::times_1);
12610 12653      __ jump(ArrayAddress(table, index));
12611 12654    %}
12612 12655    ins_pc_relative(1);
12613 12656    ins_pipe(pipe_jmp);
12614 12657  %}
12615 12658  
12616 12659  // Jump Direct - Label defines a relative address from JMP+1
12617 12660  instruct jmpDir(label labl) %{
12618 12661    match(Goto);
12619 12662    effect(USE labl);
12620 12663  
12621 12664    ins_cost(300);
12622 12665    format %{ "JMP    $labl" %}
12623 12666    size(5);
12624 12667    opcode(0xE9);
12625 12668    ins_encode( OpcP, Lbl( labl ) );
12626 12669    ins_pipe( pipe_jmp );
12627 12670    ins_pc_relative(1);
12628 12671  %}
12629 12672  
12630 12673  // Jump Direct Conditional - Label defines a relative address from Jcc+1
12631 12674  instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12632 12675    match(If cop cr);
12633 12676    effect(USE labl);
12634 12677  
12635 12678    ins_cost(300);
12636 12679    format %{ "J$cop    $labl" %}
12637 12680    size(6);
12638 12681    opcode(0x0F, 0x80);
12639 12682    ins_encode( Jcc( cop, labl) );
12640 12683    ins_pipe( pipe_jcc );
12641 12684    ins_pc_relative(1);
12642 12685  %}
12643 12686  
12644 12687  // Jump Direct Conditional - Label defines a relative address from Jcc+1
12645 12688  instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12646 12689    match(CountedLoopEnd cop cr);
12647 12690    effect(USE labl);
12648 12691  
12649 12692    ins_cost(300);
12650 12693    format %{ "J$cop    $labl\t# Loop end" %}
12651 12694    size(6);
12652 12695    opcode(0x0F, 0x80);
12653 12696    ins_encode( Jcc( cop, labl) );
12654 12697    ins_pipe( pipe_jcc );
12655 12698    ins_pc_relative(1);
12656 12699  %}
12657 12700  
12658 12701  // Jump Direct Conditional - Label defines a relative address from Jcc+1
12659 12702  instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12660 12703    match(CountedLoopEnd cop cmp);
12661 12704    effect(USE labl);
12662 12705  
12663 12706    ins_cost(300);
12664 12707    format %{ "J$cop,u  $labl\t# Loop end" %}
12665 12708    size(6);
12666 12709    opcode(0x0F, 0x80);
12667 12710    ins_encode( Jcc( cop, labl) );
12668 12711    ins_pipe( pipe_jcc );
12669 12712    ins_pc_relative(1);
12670 12713  %}
12671 12714  
12672 12715  instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12673 12716    match(CountedLoopEnd cop cmp);
12674 12717    effect(USE labl);
12675 12718  
12676 12719    ins_cost(200);
12677 12720    format %{ "J$cop,u  $labl\t# Loop end" %}
12678 12721    size(6);
12679 12722    opcode(0x0F, 0x80);
12680 12723    ins_encode( Jcc( cop, labl) );
12681 12724    ins_pipe( pipe_jcc );
12682 12725    ins_pc_relative(1);
12683 12726  %}
12684 12727  
12685 12728  // Jump Direct Conditional - using unsigned comparison
12686 12729  instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12687 12730    match(If cop cmp);
12688 12731    effect(USE labl);
12689 12732  
12690 12733    ins_cost(300);
12691 12734    format %{ "J$cop,u  $labl" %}
12692 12735    size(6);
12693 12736    opcode(0x0F, 0x80);
12694 12737    ins_encode(Jcc(cop, labl));
12695 12738    ins_pipe(pipe_jcc);
12696 12739    ins_pc_relative(1);
12697 12740  %}
12698 12741  
12699 12742  instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12700 12743    match(If cop cmp);
12701 12744    effect(USE labl);
12702 12745  
12703 12746    ins_cost(200);
12704 12747    format %{ "J$cop,u  $labl" %}
12705 12748    size(6);
12706 12749    opcode(0x0F, 0x80);
12707 12750    ins_encode(Jcc(cop, labl));
12708 12751    ins_pipe(pipe_jcc);
12709 12752    ins_pc_relative(1);
12710 12753  %}
12711 12754  
12712 12755  instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12713 12756    match(If cop cmp);
12714 12757    effect(USE labl);
12715 12758  
12716 12759    ins_cost(200);
12717 12760    format %{ $$template
12718 12761      if ($cop$$cmpcode == Assembler::notEqual) {
12719 12762        $$emit$$"JP,u   $labl\n\t"
12720 12763        $$emit$$"J$cop,u   $labl"
12721 12764      } else {
12722 12765        $$emit$$"JP,u   done\n\t"
12723 12766        $$emit$$"J$cop,u   $labl\n\t"
12724 12767        $$emit$$"done:"
12725 12768      }
12726 12769    %}
12727 12770    size(12);
12728 12771    opcode(0x0F, 0x80);
12729 12772    ins_encode %{
12730 12773      Label* l = $labl$$label;
12731 12774      $$$emit8$primary;
12732 12775      emit_cc(cbuf, $secondary, Assembler::parity);
12733 12776      int parity_disp = -1;
12734 12777      bool ok = false;
12735 12778      if ($cop$$cmpcode == Assembler::notEqual) {
12736 12779         // the two jumps 6 bytes apart so the jump distances are too
12737 12780         parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12738 12781      } else if ($cop$$cmpcode == Assembler::equal) {
12739 12782         parity_disp = 6;
12740 12783         ok = true;
12741 12784      } else {
12742 12785         ShouldNotReachHere();
12743 12786      }
12744 12787      emit_d32(cbuf, parity_disp);
12745 12788      $$$emit8$primary;
12746 12789      emit_cc(cbuf, $secondary, $cop$$cmpcode);
12747 12790      int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12748 12791      emit_d32(cbuf, disp);
12749 12792    %}
12750 12793    ins_pipe(pipe_jcc);
12751 12794    ins_pc_relative(1);
12752 12795  %}
12753 12796  
12754 12797  // ============================================================================
12755 12798  // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12756 12799  // array for an instance of the superklass.  Set a hidden internal cache on a
12757 12800  // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12758 12801  // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12759 12802  instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12760 12803    match(Set result (PartialSubtypeCheck sub super));
12761 12804    effect( KILL rcx, KILL cr );
12762 12805  
12763 12806    ins_cost(1100);  // slightly larger than the next version
12764 12807    format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12765 12808              "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
12766 12809              "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12767 12810              "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12768 12811              "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12769 12812              "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12770 12813              "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12771 12814       "miss:\t" %}
12772 12815  
12773 12816    opcode(0x1); // Force a XOR of EDI
12774 12817    ins_encode( enc_PartialSubtypeCheck() );
12775 12818    ins_pipe( pipe_slow );
12776 12819  %}
12777 12820  
12778 12821  instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12779 12822    match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12780 12823    effect( KILL rcx, KILL result );
12781 12824  
12782 12825    ins_cost(1000);
12783 12826    format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12784 12827              "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
12785 12828              "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12786 12829              "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12787 12830              "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12788 12831              "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12789 12832       "miss:\t" %}
12790 12833  
12791 12834    opcode(0x0);  // No need to XOR EDI
12792 12835    ins_encode( enc_PartialSubtypeCheck() );
12793 12836    ins_pipe( pipe_slow );
12794 12837  %}
12795 12838  
12796 12839  // ============================================================================
12797 12840  // Branch Instructions -- short offset versions
12798 12841  //
12799 12842  // These instructions are used to replace jumps of a long offset (the default
12800 12843  // match) with jumps of a shorter offset.  These instructions are all tagged
12801 12844  // with the ins_short_branch attribute, which causes the ADLC to suppress the
12802 12845  // match rules in general matching.  Instead, the ADLC generates a conversion
12803 12846  // method in the MachNode which can be used to do in-place replacement of the
12804 12847  // long variant with the shorter variant.  The compiler will determine if a
12805 12848  // branch can be taken by the is_short_branch_offset() predicate in the machine
12806 12849  // specific code section of the file.
12807 12850  
12808 12851  // Jump Direct - Label defines a relative address from JMP+1
12809 12852  instruct jmpDir_short(label labl) %{
12810 12853    match(Goto);
12811 12854    effect(USE labl);
12812 12855  
12813 12856    ins_cost(300);
12814 12857    format %{ "JMP,s  $labl" %}
12815 12858    size(2);
12816 12859    opcode(0xEB);
12817 12860    ins_encode( OpcP, LblShort( labl ) );
12818 12861    ins_pipe( pipe_jmp );
12819 12862    ins_pc_relative(1);
12820 12863    ins_short_branch(1);
12821 12864  %}
12822 12865  
12823 12866  // Jump Direct Conditional - Label defines a relative address from Jcc+1
12824 12867  instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12825 12868    match(If cop cr);
12826 12869    effect(USE labl);
12827 12870  
12828 12871    ins_cost(300);
12829 12872    format %{ "J$cop,s  $labl" %}
12830 12873    size(2);
12831 12874    opcode(0x70);
12832 12875    ins_encode( JccShort( cop, labl) );
12833 12876    ins_pipe( pipe_jcc );
12834 12877    ins_pc_relative(1);
12835 12878    ins_short_branch(1);
12836 12879  %}
12837 12880  
12838 12881  // Jump Direct Conditional - Label defines a relative address from Jcc+1
12839 12882  instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12840 12883    match(CountedLoopEnd cop cr);
12841 12884    effect(USE labl);
12842 12885  
12843 12886    ins_cost(300);
12844 12887    format %{ "J$cop,s  $labl\t# Loop end" %}
12845 12888    size(2);
12846 12889    opcode(0x70);
12847 12890    ins_encode( JccShort( cop, labl) );
12848 12891    ins_pipe( pipe_jcc );
12849 12892    ins_pc_relative(1);
12850 12893    ins_short_branch(1);
12851 12894  %}
12852 12895  
12853 12896  // Jump Direct Conditional - Label defines a relative address from Jcc+1
12854 12897  instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12855 12898    match(CountedLoopEnd cop cmp);
12856 12899    effect(USE labl);
12857 12900  
12858 12901    ins_cost(300);
12859 12902    format %{ "J$cop,us $labl\t# Loop end" %}
12860 12903    size(2);
12861 12904    opcode(0x70);
12862 12905    ins_encode( JccShort( cop, labl) );
12863 12906    ins_pipe( pipe_jcc );
12864 12907    ins_pc_relative(1);
12865 12908    ins_short_branch(1);
12866 12909  %}
12867 12910  
12868 12911  instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12869 12912    match(CountedLoopEnd cop cmp);
12870 12913    effect(USE labl);
12871 12914  
12872 12915    ins_cost(300);
12873 12916    format %{ "J$cop,us $labl\t# Loop end" %}
12874 12917    size(2);
12875 12918    opcode(0x70);
12876 12919    ins_encode( JccShort( cop, labl) );
12877 12920    ins_pipe( pipe_jcc );
12878 12921    ins_pc_relative(1);
12879 12922    ins_short_branch(1);
12880 12923  %}
12881 12924  
12882 12925  // Jump Direct Conditional - using unsigned comparison
12883 12926  instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12884 12927    match(If cop cmp);
12885 12928    effect(USE labl);
12886 12929  
12887 12930    ins_cost(300);
12888 12931    format %{ "J$cop,us $labl" %}
12889 12932    size(2);
12890 12933    opcode(0x70);
12891 12934    ins_encode( JccShort( cop, labl) );
12892 12935    ins_pipe( pipe_jcc );
12893 12936    ins_pc_relative(1);
12894 12937    ins_short_branch(1);
12895 12938  %}
12896 12939  
12897 12940  instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12898 12941    match(If cop cmp);
12899 12942    effect(USE labl);
12900 12943  
12901 12944    ins_cost(300);
12902 12945    format %{ "J$cop,us $labl" %}
12903 12946    size(2);
12904 12947    opcode(0x70);
12905 12948    ins_encode( JccShort( cop, labl) );
12906 12949    ins_pipe( pipe_jcc );
12907 12950    ins_pc_relative(1);
12908 12951    ins_short_branch(1);
12909 12952  %}
12910 12953  
12911 12954  instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12912 12955    match(If cop cmp);
12913 12956    effect(USE labl);
12914 12957  
12915 12958    ins_cost(300);
12916 12959    format %{ $$template
12917 12960      if ($cop$$cmpcode == Assembler::notEqual) {
12918 12961        $$emit$$"JP,u,s   $labl\n\t"
12919 12962        $$emit$$"J$cop,u,s   $labl"
12920 12963      } else {
12921 12964        $$emit$$"JP,u,s   done\n\t"
12922 12965        $$emit$$"J$cop,u,s  $labl\n\t"
12923 12966        $$emit$$"done:"
12924 12967      }
12925 12968    %}
12926 12969    size(4);
12927 12970    opcode(0x70);
12928 12971    ins_encode %{
12929 12972      Label* l = $labl$$label;
12930 12973      emit_cc(cbuf, $primary, Assembler::parity);
12931 12974      int parity_disp = -1;
12932 12975      if ($cop$$cmpcode == Assembler::notEqual) {
12933 12976        parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12934 12977      } else if ($cop$$cmpcode == Assembler::equal) {
12935 12978        parity_disp = 2;
12936 12979      } else {
12937 12980        ShouldNotReachHere();
12938 12981      }
12939 12982      emit_d8(cbuf, parity_disp);
12940 12983      emit_cc(cbuf, $primary, $cop$$cmpcode);
12941 12984      int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12942 12985      emit_d8(cbuf, disp);
12943 12986      assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12944 12987      assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12945 12988    %}
12946 12989    ins_pipe(pipe_jcc);
12947 12990    ins_pc_relative(1);
12948 12991    ins_short_branch(1);
12949 12992  %}
12950 12993  
12951 12994  // ============================================================================
12952 12995  // Long Compare
12953 12996  //
12954 12997  // Currently we hold longs in 2 registers.  Comparing such values efficiently
12955 12998  // is tricky.  The flavor of compare used depends on whether we are testing
12956 12999  // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12957 13000  // The GE test is the negated LT test.  The LE test can be had by commuting
12958 13001  // the operands (yielding a GE test) and then negating; negate again for the
12959 13002  // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12960 13003  // NE test is negated from that.
12961 13004  
12962 13005  // Due to a shortcoming in the ADLC, it mixes up expressions like:
12963 13006  // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12964 13007  // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12965 13008  // are collapsed internally in the ADLC's dfa-gen code.  The match for
12966 13009  // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12967 13010  // foo match ends up with the wrong leaf.  One fix is to not match both
12968 13011  // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12969 13012  // both forms beat the trinary form of long-compare and both are very useful
12970 13013  // on Intel which has so few registers.
12971 13014  
12972 13015  // Manifest a CmpL result in an integer register.  Very painful.
12973 13016  // This is the test to avoid.
12974 13017  instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12975 13018    match(Set dst (CmpL3 src1 src2));
12976 13019    effect( KILL flags );
12977 13020    ins_cost(1000);
12978 13021    format %{ "XOR    $dst,$dst\n\t"
12979 13022              "CMP    $src1.hi,$src2.hi\n\t"
12980 13023              "JLT,s  m_one\n\t"
12981 13024              "JGT,s  p_one\n\t"
12982 13025              "CMP    $src1.lo,$src2.lo\n\t"
12983 13026              "JB,s   m_one\n\t"
12984 13027              "JEQ,s  done\n"
12985 13028      "p_one:\tINC    $dst\n\t"
12986 13029              "JMP,s  done\n"
12987 13030      "m_one:\tDEC    $dst\n"
12988 13031       "done:" %}
12989 13032    ins_encode %{
12990 13033      Label p_one, m_one, done;
12991 13034      __ xorptr($dst$$Register, $dst$$Register);
12992 13035      __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12993 13036      __ jccb(Assembler::less,    m_one);
12994 13037      __ jccb(Assembler::greater, p_one);
12995 13038      __ cmpl($src1$$Register, $src2$$Register);
12996 13039      __ jccb(Assembler::below,   m_one);
12997 13040      __ jccb(Assembler::equal,   done);
12998 13041      __ bind(p_one);
12999 13042      __ incrementl($dst$$Register);
13000 13043      __ jmpb(done);
13001 13044      __ bind(m_one);
13002 13045      __ decrementl($dst$$Register);
13003 13046      __ bind(done);
13004 13047    %}
13005 13048    ins_pipe( pipe_slow );
13006 13049  %}
13007 13050  
13008 13051  //======
13009 13052  // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13010 13053  // compares.  Can be used for LE or GT compares by reversing arguments.
13011 13054  // NOT GOOD FOR EQ/NE tests.
13012 13055  instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13013 13056    match( Set flags (CmpL src zero ));
13014 13057    ins_cost(100);
13015 13058    format %{ "TEST   $src.hi,$src.hi" %}
13016 13059    opcode(0x85);
13017 13060    ins_encode( OpcP, RegReg_Hi2( src, src ) );
13018 13061    ins_pipe( ialu_cr_reg_reg );
13019 13062  %}
13020 13063  
13021 13064  // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13022 13065  // compares.  Can be used for LE or GT compares by reversing arguments.
13023 13066  // NOT GOOD FOR EQ/NE tests.
13024 13067  instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13025 13068    match( Set flags (CmpL src1 src2 ));
13026 13069    effect( TEMP tmp );
13027 13070    ins_cost(300);
13028 13071    format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13029 13072              "MOV    $tmp,$src1.hi\n\t"
13030 13073              "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13031 13074    ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13032 13075    ins_pipe( ialu_cr_reg_reg );
13033 13076  %}
13034 13077  
13035 13078  // Long compares reg < zero/req OR reg >= zero/req.
13036 13079  // Just a wrapper for a normal branch, plus the predicate test.
13037 13080  instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13038 13081    match(If cmp flags);
13039 13082    effect(USE labl);
13040 13083    predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13041 13084    expand %{
13042 13085      jmpCon(cmp,flags,labl);    // JLT or JGE...
13043 13086    %}
13044 13087  %}
13045 13088  
13046 13089  // Compare 2 longs and CMOVE longs.
13047 13090  instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13048 13091    match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13049 13092    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13050 13093    ins_cost(400);
13051 13094    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13052 13095              "CMOV$cmp $dst.hi,$src.hi" %}
13053 13096    opcode(0x0F,0x40);
13054 13097    ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13055 13098    ins_pipe( pipe_cmov_reg_long );
13056 13099  %}
13057 13100  
13058 13101  instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13059 13102    match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13060 13103    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13061 13104    ins_cost(500);
13062 13105    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13063 13106              "CMOV$cmp $dst.hi,$src.hi" %}
13064 13107    opcode(0x0F,0x40);
13065 13108    ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13066 13109    ins_pipe( pipe_cmov_reg_long );
13067 13110  %}
13068 13111  
13069 13112  // Compare 2 longs and CMOVE ints.
13070 13113  instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
13071 13114    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13072 13115    match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13073 13116    ins_cost(200);
13074 13117    format %{ "CMOV$cmp $dst,$src" %}
13075 13118    opcode(0x0F,0x40);
13076 13119    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13077 13120    ins_pipe( pipe_cmov_reg );
13078 13121  %}
13079 13122  
13080 13123  instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
13081 13124    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13082 13125    match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13083 13126    ins_cost(250);
13084 13127    format %{ "CMOV$cmp $dst,$src" %}
13085 13128    opcode(0x0F,0x40);
13086 13129    ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13087 13130    ins_pipe( pipe_cmov_mem );
13088 13131  %}
13089 13132  
13090 13133  // Compare 2 longs and CMOVE ints.
13091 13134  instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13092 13135    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13093 13136    match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13094 13137    ins_cost(200);
13095 13138    format %{ "CMOV$cmp $dst,$src" %}
13096 13139    opcode(0x0F,0x40);
13097 13140    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13098 13141    ins_pipe( pipe_cmov_reg );
13099 13142  %}
13100 13143  
13101 13144  // Compare 2 longs and CMOVE doubles
13102 13145  instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13103 13146    predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13104 13147    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13105 13148    ins_cost(200);
13106 13149    expand %{
13107 13150      fcmovD_regS(cmp,flags,dst,src);
13108 13151    %}
13109 13152  %}
13110 13153  
13111 13154  // Compare 2 longs and CMOVE doubles
13112 13155  instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
13113 13156    predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13114 13157    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13115 13158    ins_cost(200);
13116 13159    expand %{
13117 13160      fcmovXD_regS(cmp,flags,dst,src);
13118 13161    %}
13119 13162  %}
13120 13163  
13121 13164  instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13122 13165    predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13123 13166    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13124 13167    ins_cost(200);
13125 13168    expand %{
13126 13169      fcmovF_regS(cmp,flags,dst,src);
13127 13170    %}
13128 13171  %}
13129 13172  
13130 13173  instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
13131 13174    predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13132 13175    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13133 13176    ins_cost(200);
13134 13177    expand %{
13135 13178      fcmovX_regS(cmp,flags,dst,src);
13136 13179    %}
13137 13180  %}
13138 13181  
13139 13182  //======
13140 13183  // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13141 13184  instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
13142 13185    match( Set flags (CmpL src zero ));
13143 13186    effect(TEMP tmp);
13144 13187    ins_cost(200);
13145 13188    format %{ "MOV    $tmp,$src.lo\n\t"
13146 13189              "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13147 13190    ins_encode( long_cmp_flags0( src, tmp ) );
13148 13191    ins_pipe( ialu_reg_reg_long );
13149 13192  %}
13150 13193  
13151 13194  // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13152 13195  instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13153 13196    match( Set flags (CmpL src1 src2 ));
13154 13197    ins_cost(200+300);
13155 13198    format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13156 13199              "JNE,s  skip\n\t"
13157 13200              "CMP    $src1.hi,$src2.hi\n\t"
13158 13201       "skip:\t" %}
13159 13202    ins_encode( long_cmp_flags1( src1, src2 ) );
13160 13203    ins_pipe( ialu_cr_reg_reg );
13161 13204  %}
13162 13205  
13163 13206  // Long compare reg == zero/reg OR reg != zero/reg
13164 13207  // Just a wrapper for a normal branch, plus the predicate test.
13165 13208  instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13166 13209    match(If cmp flags);
13167 13210    effect(USE labl);
13168 13211    predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13169 13212    expand %{
13170 13213      jmpCon(cmp,flags,labl);    // JEQ or JNE...
13171 13214    %}
13172 13215  %}
13173 13216  
13174 13217  // Compare 2 longs and CMOVE longs.
13175 13218  instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13176 13219    match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13177 13220    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13178 13221    ins_cost(400);
13179 13222    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13180 13223              "CMOV$cmp $dst.hi,$src.hi" %}
13181 13224    opcode(0x0F,0x40);
13182 13225    ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13183 13226    ins_pipe( pipe_cmov_reg_long );
13184 13227  %}
13185 13228  
13186 13229  instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13187 13230    match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13188 13231    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13189 13232    ins_cost(500);
13190 13233    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13191 13234              "CMOV$cmp $dst.hi,$src.hi" %}
13192 13235    opcode(0x0F,0x40);
13193 13236    ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13194 13237    ins_pipe( pipe_cmov_reg_long );
13195 13238  %}
13196 13239  
13197 13240  // Compare 2 longs and CMOVE ints.
13198 13241  instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
13199 13242    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13200 13243    match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13201 13244    ins_cost(200);
13202 13245    format %{ "CMOV$cmp $dst,$src" %}
13203 13246    opcode(0x0F,0x40);
13204 13247    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13205 13248    ins_pipe( pipe_cmov_reg );
13206 13249  %}
13207 13250  
13208 13251  instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
13209 13252    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13210 13253    match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13211 13254    ins_cost(250);
13212 13255    format %{ "CMOV$cmp $dst,$src" %}
13213 13256    opcode(0x0F,0x40);
13214 13257    ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13215 13258    ins_pipe( pipe_cmov_mem );
13216 13259  %}
13217 13260  
13218 13261  // Compare 2 longs and CMOVE ints.
13219 13262  instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13220 13263    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13221 13264    match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13222 13265    ins_cost(200);
13223 13266    format %{ "CMOV$cmp $dst,$src" %}
13224 13267    opcode(0x0F,0x40);
13225 13268    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13226 13269    ins_pipe( pipe_cmov_reg );
13227 13270  %}
13228 13271  
13229 13272  // Compare 2 longs and CMOVE doubles
13230 13273  instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13231 13274    predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13232 13275    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13233 13276    ins_cost(200);
13234 13277    expand %{
13235 13278      fcmovD_regS(cmp,flags,dst,src);
13236 13279    %}
13237 13280  %}
13238 13281  
13239 13282  // Compare 2 longs and CMOVE doubles
13240 13283  instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
13241 13284    predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13242 13285    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13243 13286    ins_cost(200);
13244 13287    expand %{
13245 13288      fcmovXD_regS(cmp,flags,dst,src);
13246 13289    %}
13247 13290  %}
13248 13291  
13249 13292  instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13250 13293    predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13251 13294    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13252 13295    ins_cost(200);
13253 13296    expand %{
13254 13297      fcmovF_regS(cmp,flags,dst,src);
13255 13298    %}
13256 13299  %}
13257 13300  
13258 13301  instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
13259 13302    predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13260 13303    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13261 13304    ins_cost(200);
13262 13305    expand %{
13263 13306      fcmovX_regS(cmp,flags,dst,src);
13264 13307    %}
13265 13308  %}
13266 13309  
13267 13310  //======
13268 13311  // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13269 13312  // Same as cmpL_reg_flags_LEGT except must negate src
13270 13313  instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
13271 13314    match( Set flags (CmpL src zero ));
13272 13315    effect( TEMP tmp );
13273 13316    ins_cost(300);
13274 13317    format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13275 13318              "CMP    $tmp,$src.lo\n\t"
13276 13319              "SBB    $tmp,$src.hi\n\t" %}
13277 13320    ins_encode( long_cmp_flags3(src, tmp) );
13278 13321    ins_pipe( ialu_reg_reg_long );
13279 13322  %}
13280 13323  
13281 13324  // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13282 13325  // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13283 13326  // requires a commuted test to get the same result.
13284 13327  instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13285 13328    match( Set flags (CmpL src1 src2 ));
13286 13329    effect( TEMP tmp );
13287 13330    ins_cost(300);
13288 13331    format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13289 13332              "MOV    $tmp,$src2.hi\n\t"
13290 13333              "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13291 13334    ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13292 13335    ins_pipe( ialu_cr_reg_reg );
13293 13336  %}
13294 13337  
13295 13338  // Long compares reg < zero/req OR reg >= zero/req.
13296 13339  // Just a wrapper for a normal branch, plus the predicate test
13297 13340  instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13298 13341    match(If cmp flags);
13299 13342    effect(USE labl);
13300 13343    predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13301 13344    ins_cost(300);
13302 13345    expand %{
13303 13346      jmpCon(cmp,flags,labl);    // JGT or JLE...
13304 13347    %}
13305 13348  %}
13306 13349  
13307 13350  // Compare 2 longs and CMOVE longs.
13308 13351  instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13309 13352    match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13310 13353    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13311 13354    ins_cost(400);
13312 13355    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13313 13356              "CMOV$cmp $dst.hi,$src.hi" %}
13314 13357    opcode(0x0F,0x40);
13315 13358    ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13316 13359    ins_pipe( pipe_cmov_reg_long );
13317 13360  %}
13318 13361  
13319 13362  instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13320 13363    match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13321 13364    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13322 13365    ins_cost(500);
13323 13366    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13324 13367              "CMOV$cmp $dst.hi,$src.hi+4" %}
13325 13368    opcode(0x0F,0x40);
13326 13369    ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13327 13370    ins_pipe( pipe_cmov_reg_long );
13328 13371  %}
13329 13372  
13330 13373  // Compare 2 longs and CMOVE ints.
13331 13374  instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
13332 13375    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13333 13376    match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13334 13377    ins_cost(200);
13335 13378    format %{ "CMOV$cmp $dst,$src" %}
13336 13379    opcode(0x0F,0x40);
13337 13380    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13338 13381    ins_pipe( pipe_cmov_reg );
13339 13382  %}
13340 13383  
13341 13384  instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
13342 13385    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13343 13386    match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13344 13387    ins_cost(250);
13345 13388    format %{ "CMOV$cmp $dst,$src" %}
13346 13389    opcode(0x0F,0x40);
13347 13390    ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13348 13391    ins_pipe( pipe_cmov_mem );
13349 13392  %}
13350 13393  
13351 13394  // Compare 2 longs and CMOVE ptrs.
13352 13395  instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13353 13396    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13354 13397    match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13355 13398    ins_cost(200);
13356 13399    format %{ "CMOV$cmp $dst,$src" %}
13357 13400    opcode(0x0F,0x40);
13358 13401    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13359 13402    ins_pipe( pipe_cmov_reg );
13360 13403  %}
13361 13404  
13362 13405  // Compare 2 longs and CMOVE doubles
13363 13406  instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13364 13407    predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13365 13408    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13366 13409    ins_cost(200);
13367 13410    expand %{
13368 13411      fcmovD_regS(cmp,flags,dst,src);
13369 13412    %}
13370 13413  %}
13371 13414  
13372 13415  // Compare 2 longs and CMOVE doubles
13373 13416  instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
13374 13417    predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13375 13418    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13376 13419    ins_cost(200);
13377 13420    expand %{
13378 13421      fcmovXD_regS(cmp,flags,dst,src);
13379 13422    %}
13380 13423  %}
13381 13424  
13382 13425  instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13383 13426    predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13384 13427    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13385 13428    ins_cost(200);
13386 13429    expand %{
13387 13430      fcmovF_regS(cmp,flags,dst,src);
13388 13431    %}
13389 13432  %}
13390 13433  
13391 13434  
13392 13435  instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
13393 13436    predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13394 13437    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13395 13438    ins_cost(200);
13396 13439    expand %{
13397 13440      fcmovX_regS(cmp,flags,dst,src);
13398 13441    %}

↓ open down ↓

11600 lines elided

↑ open up ↑

13399 13442  %}
13400 13443  
13401 13444  
13402 13445  // ============================================================================
13403 13446  // Procedure Call/Return Instructions
13404 13447  // Call Java Static Instruction
13405 13448  // Note: If this code changes, the corresponding ret_addr_offset() and
13406 13449  //       compute_padding() functions will have to be adjusted.
13407 13450  instruct CallStaticJavaDirect(method meth) %{
13408 13451    match(CallStaticJava);
     13452 +  predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13409 13453    effect(USE meth);
13410 13454  
13411 13455    ins_cost(300);
13412 13456    format %{ "CALL,static " %}
13413 13457    opcode(0xE8); /* E8 cd */
13414 13458    ins_encode( pre_call_FPU,
13415 13459                Java_Static_Call( meth ),
13416 13460                call_epilog,
13417 13461                post_call_FPU );
13418 13462    ins_pipe( pipe_slow );
13419 13463    ins_pc_relative(1);
13420 13464    ins_alignment(4);
13421 13465  %}
13422 13466  
     13467 +// Call Java Static Instruction (method handle version)
     13468 +// Note: If this code changes, the corresponding ret_addr_offset() and
     13469 +//       compute_padding() functions will have to be adjusted.
     13470 +instruct CallStaticJavaHandle(method meth, eBPRegP ebp) %{
     13471 +  match(CallStaticJava);
     13472 +  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
     13473 +  effect(USE meth);
     13474 +  // EBP is saved by all callees (for interpreter stack correction).
     13475 +  // We use it here for a similar purpose, in {preserve,restore}_SP.
     13476 +
     13477 +  ins_cost(300);
     13478 +  format %{ "CALL,static/MethodHandle " %}
     13479 +  opcode(0xE8); /* E8 cd */
     13480 +  ins_encode( pre_call_FPU,
     13481 +              preserve_SP,
     13482 +              Java_Static_Call( meth ),
     13483 +              restore_SP,
     13484 +              call_epilog,
     13485 +              post_call_FPU );
     13486 +  ins_pipe( pipe_slow );
     13487 +  ins_pc_relative(1);
     13488 +  ins_alignment(4);
     13489 +%}
     13490 +
13423 13491  // Call Java Dynamic Instruction
13424 13492  // Note: If this code changes, the corresponding ret_addr_offset() and
13425 13493  //       compute_padding() functions will have to be adjusted.
13426 13494  instruct CallDynamicJavaDirect(method meth) %{
13427 13495    match(CallDynamicJava);
13428 13496    effect(USE meth);
13429 13497  
13430 13498    ins_cost(300);
13431 13499    format %{ "MOV    EAX,(oop)-1\n\t"
13432 13500              "CALL,dynamic" %}

13433 13501    opcode(0xE8); /* E8 cd */
13434 13502    ins_encode( pre_call_FPU,
13435 13503                Java_Dynamic_Call( meth ),
13436 13504                call_epilog,
13437 13505                post_call_FPU );
13438 13506    ins_pipe( pipe_slow );
13439 13507    ins_pc_relative(1);
13440 13508    ins_alignment(4);
13441 13509  %}
13442 13510  
13443 13511  // Call Runtime Instruction
13444 13512  instruct CallRuntimeDirect(method meth) %{
13445 13513    match(CallRuntime );
13446 13514    effect(USE meth);
13447 13515  
13448 13516    ins_cost(300);
13449 13517    format %{ "CALL,runtime " %}
13450 13518    opcode(0xE8); /* E8 cd */
13451 13519    // Use FFREEs to clear entries in float stack
13452 13520    ins_encode( pre_call_FPU,
13453 13521                FFree_Float_Stack_All,
13454 13522                Java_To_Runtime( meth ),
13455 13523                post_call_FPU );
13456 13524    ins_pipe( pipe_slow );
13457 13525    ins_pc_relative(1);
13458 13526  %}
13459 13527  
13460 13528  // Call runtime without safepoint
13461 13529  instruct CallLeafDirect(method meth) %{
13462 13530    match(CallLeaf);
13463 13531    effect(USE meth);
13464 13532  
13465 13533    ins_cost(300);
13466 13534    format %{ "CALL_LEAF,runtime " %}
13467 13535    opcode(0xE8); /* E8 cd */
13468 13536    ins_encode( pre_call_FPU,
13469 13537                FFree_Float_Stack_All,
13470 13538                Java_To_Runtime( meth ),
13471 13539                Verify_FPU_For_Leaf, post_call_FPU );
13472 13540    ins_pipe( pipe_slow );
13473 13541    ins_pc_relative(1);
13474 13542  %}
13475 13543  
13476 13544  instruct CallLeafNoFPDirect(method meth) %{
13477 13545    match(CallLeafNoFP);
13478 13546    effect(USE meth);
13479 13547  
13480 13548    ins_cost(300);
13481 13549    format %{ "CALL_LEAF_NOFP,runtime " %}
13482 13550    opcode(0xE8); /* E8 cd */
13483 13551    ins_encode(Java_To_Runtime(meth));
13484 13552    ins_pipe( pipe_slow );
13485 13553    ins_pc_relative(1);
13486 13554  %}
13487 13555  
13488 13556  
13489 13557  // Return Instruction
13490 13558  // Remove the return address & jump to it.
13491 13559  instruct Ret() %{
13492 13560    match(Return);
13493 13561    format %{ "RET" %}
13494 13562    opcode(0xC3);
13495 13563    ins_encode(OpcP);
13496 13564    ins_pipe( pipe_jmp );
13497 13565  %}
13498 13566  
13499 13567  // Tail Call; Jump from runtime stub to Java code.
13500 13568  // Also known as an 'interprocedural jump'.
13501 13569  // Target of jump will eventually return to caller.
13502 13570  // TailJump below removes the return address.
13503 13571  instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13504 13572    match(TailCall jump_target method_oop );
13505 13573    ins_cost(300);
13506 13574    format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13507 13575    opcode(0xFF, 0x4);  /* Opcode FF /4 */
13508 13576    ins_encode( OpcP, RegOpc(jump_target) );
13509 13577    ins_pipe( pipe_jmp );
13510 13578  %}
13511 13579  
13512 13580  
13513 13581  // Tail Jump; remove the return address; jump to target.
13514 13582  // TailCall above leaves the return address around.
13515 13583  instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13516 13584    match( TailJump jump_target ex_oop );
13517 13585    ins_cost(300);
13518 13586    format %{ "POP    EDX\t# pop return address into dummy\n\t"
13519 13587              "JMP    $jump_target " %}
13520 13588    opcode(0xFF, 0x4);  /* Opcode FF /4 */
13521 13589    ins_encode( enc_pop_rdx,
13522 13590                OpcP, RegOpc(jump_target) );
13523 13591    ins_pipe( pipe_jmp );
13524 13592  %}
13525 13593  
13526 13594  // Create exception oop: created by stack-crawling runtime code.
13527 13595  // Created exception is now available to this handler, and is setup
13528 13596  // just prior to jumping to this handler.  No code emitted.
13529 13597  instruct CreateException( eAXRegP ex_oop )
13530 13598  %{
13531 13599    match(Set ex_oop (CreateEx));
13532 13600  
13533 13601    size(0);
13534 13602    // use the following format syntax
13535 13603    format %{ "# exception oop is in EAX; no code emitted" %}
13536 13604    ins_encode();
13537 13605    ins_pipe( empty );
13538 13606  %}
13539 13607  
13540 13608  
13541 13609  // Rethrow exception:
13542 13610  // The exception oop will come in the first argument position.
13543 13611  // Then JUMP (not call) to the rethrow stub code.
13544 13612  instruct RethrowException()
13545 13613  %{
13546 13614    match(Rethrow);
13547 13615  
13548 13616    // use the following format syntax
13549 13617    format %{ "JMP    rethrow_stub" %}
13550 13618    ins_encode(enc_rethrow);
13551 13619    ins_pipe( pipe_jmp );
13552 13620  %}
13553 13621  
13554 13622  // inlined locking and unlocking
13555 13623  
13556 13624  
13557 13625  instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
13558 13626    match( Set cr (FastLock object box) );
13559 13627    effect( TEMP tmp, TEMP scr );
13560 13628    ins_cost(300);
13561 13629    format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
13562 13630    ins_encode( Fast_Lock(object,box,tmp,scr) );
13563 13631    ins_pipe( pipe_slow );
13564 13632    ins_pc_relative(1);
13565 13633  %}
13566 13634  
13567 13635  instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13568 13636    match( Set cr (FastUnlock object box) );
13569 13637    effect( TEMP tmp );
13570 13638    ins_cost(300);
13571 13639    format %{ "FASTUNLOCK $object, $box, $tmp" %}
13572 13640    ins_encode( Fast_Unlock(object,box,tmp) );
13573 13641    ins_pipe( pipe_slow );
13574 13642    ins_pc_relative(1);
13575 13643  %}
13576 13644  
13577 13645  
13578 13646  
13579 13647  // ============================================================================
13580 13648  // Safepoint Instruction
13581 13649  instruct safePoint_poll(eFlagsReg cr) %{
13582 13650    match(SafePoint);
13583 13651    effect(KILL cr);
13584 13652  
13585 13653    // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13586 13654    // On SPARC that might be acceptable as we can generate the address with
13587 13655    // just a sethi, saving an or.  By polling at offset 0 we can end up
13588 13656    // putting additional pressure on the index-0 in the D$.  Because of
13589 13657    // alignment (just like the situation at hand) the lower indices tend
13590 13658    // to see more traffic.  It'd be better to change the polling address
13591 13659    // to offset 0 of the last $line in the polling page.
13592 13660  
13593 13661    format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13594 13662    ins_cost(125);
13595 13663    size(6) ;
13596 13664    ins_encode( Safepoint_Poll() );
13597 13665    ins_pipe( ialu_reg_mem );
13598 13666  %}
13599 13667  
13600 13668  //----------PEEPHOLE RULES-----------------------------------------------------
13601 13669  // These must follow all instruction definitions as they use the names
13602 13670  // defined in the instructions definitions.
13603 13671  //
13604 13672  // peepmatch ( root_instr_name [preceding_instruction]* );
13605 13673  //
13606 13674  // peepconstraint %{
13607 13675  // (instruction_number.operand_name relational_op instruction_number.operand_name
13608 13676  //  [, ...] );
13609 13677  // // instruction numbers are zero-based using left to right order in peepmatch
13610 13678  //
13611 13679  // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13612 13680  // // provide an instruction_number.operand_name for each operand that appears
13613 13681  // // in the replacement instruction's match rule
13614 13682  //
13615 13683  // ---------VM FLAGS---------------------------------------------------------
13616 13684  //
13617 13685  // All peephole optimizations can be turned off using -XX:-OptoPeephole
13618 13686  //
13619 13687  // Each peephole rule is given an identifying number starting with zero and
13620 13688  // increasing by one in the order seen by the parser.  An individual peephole
13621 13689  // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13622 13690  // on the command-line.
13623 13691  //
13624 13692  // ---------CURRENT LIMITATIONS----------------------------------------------
13625 13693  //
13626 13694  // Only match adjacent instructions in same basic block
13627 13695  // Only equality constraints
13628 13696  // Only constraints between operands, not (0.dest_reg == EAX_enc)
13629 13697  // Only one replacement instruction
13630 13698  //
13631 13699  // ---------EXAMPLE----------------------------------------------------------
13632 13700  //
13633 13701  // // pertinent parts of existing instructions in architecture description
13634 13702  // instruct movI(eRegI dst, eRegI src) %{
13635 13703  //   match(Set dst (CopyI src));
13636 13704  // %}
13637 13705  //
13638 13706  // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
13639 13707  //   match(Set dst (AddI dst src));
13640 13708  //   effect(KILL cr);
13641 13709  // %}
13642 13710  //
13643 13711  // // Change (inc mov) to lea
13644 13712  // peephole %{
13645 13713  //   // increment preceeded by register-register move
13646 13714  //   peepmatch ( incI_eReg movI );
13647 13715  //   // require that the destination register of the increment
13648 13716  //   // match the destination register of the move
13649 13717  //   peepconstraint ( 0.dst == 1.dst );
13650 13718  //   // construct a replacement instruction that sets
13651 13719  //   // the destination to ( move's source register + one )
13652 13720  //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13653 13721  // %}
13654 13722  //
13655 13723  // Implementation no longer uses movX instructions since
13656 13724  // machine-independent system no longer uses CopyX nodes.
13657 13725  //
13658 13726  // peephole %{
13659 13727  //   peepmatch ( incI_eReg movI );
13660 13728  //   peepconstraint ( 0.dst == 1.dst );
13661 13729  //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13662 13730  // %}
13663 13731  //
13664 13732  // peephole %{
13665 13733  //   peepmatch ( decI_eReg movI );
13666 13734  //   peepconstraint ( 0.dst == 1.dst );
13667 13735  //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13668 13736  // %}
13669 13737  //
13670 13738  // peephole %{
13671 13739  //   peepmatch ( addI_eReg_imm movI );
13672 13740  //   peepconstraint ( 0.dst == 1.dst );
13673 13741  //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13674 13742  // %}
13675 13743  //
13676 13744  // peephole %{
13677 13745  //   peepmatch ( addP_eReg_imm movP );
13678 13746  //   peepconstraint ( 0.dst == 1.dst );
13679 13747  //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13680 13748  // %}
13681 13749  
13682 13750  // // Change load of spilled value to only a spill
13683 13751  // instruct storeI(memory mem, eRegI src) %{
13684 13752  //   match(Set mem (StoreI mem src));
13685 13753  // %}
13686 13754  //
13687 13755  // instruct loadI(eRegI dst, memory mem) %{
13688 13756  //   match(Set dst (LoadI mem));
13689 13757  // %}
13690 13758  //
13691 13759  peephole %{
13692 13760    peepmatch ( loadI storeI );
13693 13761    peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13694 13762    peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13695 13763  %}
13696 13764  
13697 13765  //----------SMARTSPILL RULES---------------------------------------------------
13698 13766  // These must follow all instruction definitions as they use the names
13699 13767  // defined in the instructions definitions.

↓ open down ↓

267 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX