6961690 Wdiff src/cpu/x86/vm/x86_32.ad

Print this page

rev 1838 : 6961690: load oops from constant table on SPARC
Summary: oops should be loaded from the constant table of an nmethod instead of materializing them with a long code sequence.
Reviewed-by:

Split	Close
Expand all
Collapse all

          --- old/src/cpu/x86/vm/x86_32.ad
          +++ new/src/cpu/x86/vm/x86_32.ad

   1    1  //
   2    2  // Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
   3    3  // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4  //
   5    5  // This code is free software; you can redistribute it and/or modify it
   6    6  // under the terms of the GNU General Public License version 2 only, as
   7    7  // published by the Free Software Foundation.
   8    8  //
   9    9  // This code is distributed in the hope that it will be useful, but WITHOUT
  10   10  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11  // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12  // version 2 for more details (a copy is included in the LICENSE file that
  13   13  // accompanied this code).
  14   14  //
  15   15  // You should have received a copy of the GNU General Public License version
  16   16  // 2 along with this work; if not, write to the Free Software Foundation,
  17   17  // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18  //
  19   19  // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20  // or visit www.oracle.com if you need additional information or have any
  21   21  // questions.
  22   22  //
  23   23  //
  24   24  
  25   25  // X86 Architecture Description File
  26   26  
  27   27  //----------REGISTER DEFINITION BLOCK------------------------------------------
  28   28  // This information is used by the matcher and the register allocator to
  29   29  // describe individual registers and classes of registers within the target
  30   30  // archtecture.
  31   31  
  32   32  register %{
  33   33  //----------Architecture Description Register Definitions----------------------
  34   34  // General Registers
  35   35  // "reg_def"  name ( register save type, C convention save type,
  36   36  //                   ideal register type, encoding );
  37   37  // Register Save Types:
  38   38  //
  39   39  // NS  = No-Save:       The register allocator assumes that these registers
  40   40  //                      can be used without saving upon entry to the method, &
  41   41  //                      that they do not need to be saved at call sites.
  42   42  //
  43   43  // SOC = Save-On-Call:  The register allocator assumes that these registers
  44   44  //                      can be used without saving upon entry to the method,
  45   45  //                      but that they must be saved at call sites.
  46   46  //
  47   47  // SOE = Save-On-Entry: The register allocator assumes that these registers
  48   48  //                      must be saved before using them upon entry to the
  49   49  //                      method, but they do not need to be saved at call
  50   50  //                      sites.
  51   51  //
  52   52  // AS  = Always-Save:   The register allocator assumes that these registers
  53   53  //                      must be saved before using them upon entry to the
  54   54  //                      method, & that they must be saved at call sites.
  55   55  //
  56   56  // Ideal Register Type is used to determine how to save & restore a
  57   57  // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58   58  // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59   59  //
  60   60  // The encoding number is the actual bit-pattern placed into the opcodes.
  61   61  
  62   62  // General Registers
  63   63  // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64   64  // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65   65  // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66   66  
  67   67  reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68   68  reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69   69  reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70   70  reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71   71  // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72   72  reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73   73  reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74   74  reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75   75  reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76   76  
  77   77  // Special Registers
  78   78  reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
  79   79  
  80   80  // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  81   81  // allocator, and only shows up in the encodings.
  82   82  reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  83   83  reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  84   84  // Ok so here's the trick FPR1 is really st(0) except in the midst
  85   85  // of emission of assembly for a machnode. During the emission the fpu stack
  86   86  // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  87   87  // the stack will not have this element so FPR1 == st(0) from the
  88   88  // oopMap viewpoint. This same weirdness with numbering causes
  89   89  // instruction encoding to have to play games with the register
  90   90  // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  91   91  // where it does flt->flt moves to see an example
  92   92  //
  93   93  reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  94   94  reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  95   95  reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  96   96  reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  97   97  reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  98   98  reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  99   99  reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
 100  100  reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
 101  101  reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
 102  102  reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 103  103  reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 104  104  reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 105  105  reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 106  106  reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 107  107  
 108  108  // XMM registers.  128-bit registers or 4 words each, labeled a-d.
 109  109  // Word a in each register holds a Float, words ab hold a Double.
 110  110  // We currently do not use the SIMD capabilities, so registers cd
 111  111  // are unused at the moment.
 112  112  reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
 113  113  reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
 114  114  reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
 115  115  reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
 116  116  reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 117  117  reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
 118  118  reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 119  119  reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
 120  120  reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 121  121  reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
 122  122  reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 123  123  reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
 124  124  reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 125  125  reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
 126  126  reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 127  127  reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
 128  128  
 129  129  // Specify priority of register selection within phases of register
 130  130  // allocation.  Highest priority is first.  A useful heuristic is to
 131  131  // give registers a low priority when they are required by machine
 132  132  // instructions, like EAX and EDX.  Registers which are used as
 133  133  // pairs must fall on an even boundary (witness the FPR#L's in this list).
 134  134  // For the Intel integer registers, the equivalent Long pairs are
 135  135  // EDX:EAX, EBX:ECX, and EDI:EBP.
 136  136  alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 137  137                      FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 138  138                      FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 139  139                      FPR6L, FPR6H, FPR7L, FPR7H );
 140  140  
 141  141  alloc_class chunk1( XMM0a, XMM0b,
 142  142                      XMM1a, XMM1b,
 143  143                      XMM2a, XMM2b,
 144  144                      XMM3a, XMM3b,
 145  145                      XMM4a, XMM4b,
 146  146                      XMM5a, XMM5b,
 147  147                      XMM6a, XMM6b,
 148  148                      XMM7a, XMM7b, EFLAGS);
 149  149  
 150  150  
 151  151  //----------Architecture Description Register Classes--------------------------
 152  152  // Several register classes are automatically defined based upon information in
 153  153  // this architecture description.
 154  154  // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 155  155  // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 156  156  // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 157  157  // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 158  158  //
 159  159  // Class for all registers
 160  160  reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 161  161  // Class for general registers
 162  162  reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 163  163  // Class for general registers which may be used for implicit null checks on win95
 164  164  // Also safe for use by tailjump. We don't want to allocate in rbp,
 165  165  reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
 166  166  // Class of "X" registers
 167  167  reg_class x_reg(EBX, ECX, EDX, EAX);
 168  168  // Class of registers that can appear in an address with no offset.
 169  169  // EBP and ESP require an extra instruction byte for zero offset.
 170  170  // Used in fast-unlock
 171  171  reg_class p_reg(EDX, EDI, ESI, EBX);
 172  172  // Class for general registers not including ECX
 173  173  reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
 174  174  // Class for general registers not including EAX
 175  175  reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 176  176  // Class for general registers not including EAX or EBX.
 177  177  reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
 178  178  // Class of EAX (for multiply and divide operations)
 179  179  reg_class eax_reg(EAX);
 180  180  // Class of EBX (for atomic add)
 181  181  reg_class ebx_reg(EBX);
 182  182  // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 183  183  reg_class ecx_reg(ECX);
 184  184  // Class of EDX (for multiply and divide operations)
 185  185  reg_class edx_reg(EDX);
 186  186  // Class of EDI (for synchronization)
 187  187  reg_class edi_reg(EDI);
 188  188  // Class of ESI (for synchronization)
 189  189  reg_class esi_reg(ESI);
 190  190  // Singleton class for interpreter's stack pointer
 191  191  reg_class ebp_reg(EBP);
 192  192  // Singleton class for stack pointer
 193  193  reg_class sp_reg(ESP);
 194  194  // Singleton class for instruction pointer
 195  195  // reg_class ip_reg(EIP);
 196  196  // Singleton class for condition codes
 197  197  reg_class int_flags(EFLAGS);
 198  198  // Class of integer register pairs
 199  199  reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
 200  200  // Class of integer register pairs that aligns with calling convention
 201  201  reg_class eadx_reg( EAX,EDX );
 202  202  reg_class ebcx_reg( ECX,EBX );
 203  203  // Not AX or DX, used in divides
 204  204  reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
 205  205  
 206  206  // Floating point registers.  Notice FPR0 is not a choice.
 207  207  // FPR0 is not ever allocated; we use clever encodings to fake
 208  208  // a 2-address instructions out of Intels FP stack.
 209  209  reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 210  210  
 211  211  // make a register class for SSE registers
 212  212  reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
 213  213  
 214  214  // make a double register class for SSE2 registers
 215  215  reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
 216  216                    XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
 217  217  
 218  218  reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 219  219                     FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 220  220                     FPR7L,FPR7H );
 221  221  
 222  222  reg_class flt_reg0( FPR1L );
 223  223  reg_class dbl_reg0( FPR1L,FPR1H );
 224  224  reg_class dbl_reg1( FPR2L,FPR2H );
 225  225  reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 226  226                         FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 227  227  
 228  228  // XMM6 and XMM7 could be used as temporary registers for long, float and
 229  229  // double values for SSE2.
 230  230  reg_class xdb_reg6( XMM6a,XMM6b );
 231  231  reg_class xdb_reg7( XMM7a,XMM7b );
 232  232  %}
 233  233  
 234  234  
 235  235  //----------SOURCE BLOCK-------------------------------------------------------
 236  236  // This is a block of C++ code which provides values, functions, and
 237  237  // definitions necessary in the rest of the architecture description
 238  238  source_hpp %{
 239  239  // Must be visible to the DFA in dfa_x86_32.cpp
 240  240  extern bool is_operand_hi32_zero(Node* n);
 241  241  %}
 242  242  
 243  243  source %{
 244  244  #define   RELOC_IMM32    Assembler::imm_operand
 245  245  #define   RELOC_DISP32   Assembler::disp32_operand
 246  246  
 247  247  #define __ _masm.
 248  248  
 249  249  // How to find the high register of a Long pair, given the low register
 250  250  #define   HIGH_FROM_LOW(x) ((x)+2)
 251  251  
 252  252  // These masks are used to provide 128-bit aligned bitmasks to the XMM
 253  253  // instructions, to allow sign-masking or sign-bit flipping.  They allow
 254  254  // fast versions of NegF/NegD and AbsF/AbsD.
 255  255  
 256  256  // Note: 'double' and 'long long' have 32-bits alignment on x86.
 257  257  static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 258  258    // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 259  259    // of 128-bits operands for SSE instructions.
 260  260    jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 261  261    // Store the value to a 128-bits operand.
 262  262    operand[0] = lo;
 263  263    operand[1] = hi;
 264  264    return operand;
 265  265  }
 266  266  
 267  267  // Buffer for 128-bits masks used by SSE instructions.
 268  268  static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 269  269  
 270  270  // Static initialization during VM startup.
 271  271  static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 272  272  static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 273  273  static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 274  274  static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 275  275  
 276  276  // Offset hacking within calls.
 277  277  static int pre_call_FPU_size() {
 278  278    if (Compile::current()->in_24_bit_fp_mode())
 279  279      return 6; // fldcw
 280  280    return 0;
 281  281  }
 282  282  
 283  283  static int preserve_SP_size() {
 284  284    return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 285  285  }
 286  286  
 287  287  // !!!!! Special hack to get all type of calls to specify the byte offset
 288  288  //       from the start of the call to the point where the return address
 289  289  //       will point.
 290  290  int MachCallStaticJavaNode::ret_addr_offset() {
 291  291    int offset = 5 + pre_call_FPU_size();  // 5 bytes from start of call to where return address points
 292  292    if (_method_handle_invoke)
 293  293      offset += preserve_SP_size();
 294  294    return offset;
 295  295  }
 296  296  
 297  297  int MachCallDynamicJavaNode::ret_addr_offset() {
 298  298    return 10 + pre_call_FPU_size();  // 10 bytes from start of call to where return address points
 299  299  }
 300  300  
 301  301  static int sizeof_FFree_Float_Stack_All = -1;
 302  302  
 303  303  int MachCallRuntimeNode::ret_addr_offset() {
 304  304    assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 305  305    return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
 306  306  }
 307  307  
 308  308  // Indicate if the safepoint node needs the polling page as an input.
 309  309  // Since x86 does have absolute addressing, it doesn't.
 310  310  bool SafePointNode::needs_polling_address_input() {
 311  311    return false;
 312  312  }
 313  313  
 314  314  //
 315  315  // Compute padding required for nodes which need alignment
 316  316  //
 317  317  
 318  318  // The address of the call instruction needs to be 4-byte aligned to
 319  319  // ensure that it does not span a cache line so that it can be patched.
 320  320  int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 321  321    current_offset += pre_call_FPU_size();  // skip fldcw, if any
 322  322    current_offset += 1;      // skip call opcode byte
 323  323    return round_to(current_offset, alignment_required()) - current_offset;
 324  324  }
 325  325  
 326  326  // The address of the call instruction needs to be 4-byte aligned to
 327  327  // ensure that it does not span a cache line so that it can be patched.
 328  328  int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
 329  329    current_offset += pre_call_FPU_size();  // skip fldcw, if any
 330  330    current_offset += preserve_SP_size();   // skip mov rbp, rsp
 331  331    current_offset += 1;      // skip call opcode byte
 332  332    return round_to(current_offset, alignment_required()) - current_offset;
 333  333  }
 334  334  
 335  335  // The address of the call instruction needs to be 4-byte aligned to
 336  336  // ensure that it does not span a cache line so that it can be patched.
 337  337  int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338  338    current_offset += pre_call_FPU_size();  // skip fldcw, if any
 339  339    current_offset += 5;      // skip MOV instruction
 340  340    current_offset += 1;      // skip call opcode byte
 341  341    return round_to(current_offset, alignment_required()) - current_offset;
 342  342  }
 343  343  
 344  344  #ifndef PRODUCT
 345  345  void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
 346  346    st->print("INT3");
 347  347  }
 348  348  #endif
 349  349  
 350  350  // EMIT_RM()
 351  351  void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 352  352    unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 353  353    cbuf.insts()->emit_int8(c);
 354  354  }
 355  355  
 356  356  // EMIT_CC()
 357  357  void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 358  358    unsigned char c = (unsigned char)( f1 | f2 );
 359  359    cbuf.insts()->emit_int8(c);
 360  360  }
 361  361  
 362  362  // EMIT_OPCODE()
 363  363  void emit_opcode(CodeBuffer &cbuf, int code) {
 364  364    cbuf.insts()->emit_int8((unsigned char) code);
 365  365  }
 366  366  
 367  367  // EMIT_OPCODE() w/ relocation information
 368  368  void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 369  369    cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 370  370    emit_opcode(cbuf, code);
 371  371  }
 372  372  
 373  373  // EMIT_D8()
 374  374  void emit_d8(CodeBuffer &cbuf, int d8) {
 375  375    cbuf.insts()->emit_int8((unsigned char) d8);
 376  376  }
 377  377  
 378  378  // EMIT_D16()
 379  379  void emit_d16(CodeBuffer &cbuf, int d16) {
 380  380    cbuf.insts()->emit_int16(d16);
 381  381  }
 382  382  
 383  383  // EMIT_D32()
 384  384  void emit_d32(CodeBuffer &cbuf, int d32) {
 385  385    cbuf.insts()->emit_int32(d32);
 386  386  }
 387  387  
 388  388  // emit 32 bit value and construct relocation entry from relocInfo::relocType
 389  389  void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 390  390          int format) {
 391  391    cbuf.relocate(cbuf.insts_mark(), reloc, format);
 392  392    cbuf.insts()->emit_int32(d32);
 393  393  }
 394  394  
 395  395  // emit 32 bit value and construct relocation entry from RelocationHolder
 396  396  void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 397  397          int format) {
 398  398  #ifdef ASSERT
 399  399    if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 400  400      assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 401  401    }
 402  402  #endif
 403  403    cbuf.relocate(cbuf.insts_mark(), rspec, format);
 404  404    cbuf.insts()->emit_int32(d32);
 405  405  }
 406  406  
 407  407  // Access stack slot for load or store
 408  408  void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 409  409    emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 410  410    if( -128 <= disp && disp <= 127 ) {
 411  411      emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 412  412      emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 413  413      emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 414  414    } else {
 415  415      emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 416  416      emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 417  417      emit_d32(cbuf, disp);     // Displacement  // R/M byte
 418  418    }
 419  419  }
 420  420  
 421  421     // eRegI ereg, memory mem) %{    // emit_reg_mem
 422  422  void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
 423  423    // There is no index & no scale, use form without SIB byte
 424  424    if ((index == 0x4) &&
 425  425        (scale == 0) && (base != ESP_enc)) {
 426  426      // If no displacement, mode is 0x0; unless base is [EBP]
 427  427      if ( (displace == 0) && (base != EBP_enc) ) {
 428  428        emit_rm(cbuf, 0x0, reg_encoding, base);
 429  429      }
 430  430      else {                    // If 8-bit displacement, mode 0x1
 431  431        if ((displace >= -128) && (displace <= 127)
 432  432            && !(displace_is_oop) ) {
 433  433          emit_rm(cbuf, 0x1, reg_encoding, base);
 434  434          emit_d8(cbuf, displace);
 435  435        }
 436  436        else {                  // If 32-bit displacement
 437  437          if (base == -1) { // Special flag for absolute address
 438  438            emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 439  439            // (manual lies; no SIB needed here)
 440  440            if ( displace_is_oop ) {
 441  441              emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 442  442            } else {
 443  443              emit_d32      (cbuf, displace);
 444  444            }
 445  445          }
 446  446          else {                // Normal base + offset
 447  447            emit_rm(cbuf, 0x2, reg_encoding, base);
 448  448            if ( displace_is_oop ) {
 449  449              emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 450  450            } else {
 451  451              emit_d32      (cbuf, displace);
 452  452            }
 453  453          }
 454  454        }
 455  455      }
 456  456    }
 457  457    else {                      // Else, encode with the SIB byte
 458  458      // If no displacement, mode is 0x0; unless base is [EBP]
 459  459      if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 460  460        emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 461  461        emit_rm(cbuf, scale, index, base);
 462  462      }
 463  463      else {                    // If 8-bit displacement, mode 0x1
 464  464        if ((displace >= -128) && (displace <= 127)
 465  465            && !(displace_is_oop) ) {
 466  466          emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 467  467          emit_rm(cbuf, scale, index, base);
 468  468          emit_d8(cbuf, displace);
 469  469        }
 470  470        else {                  // If 32-bit displacement
 471  471          if (base == 0x04 ) {
 472  472            emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 473  473            emit_rm(cbuf, scale, index, 0x04);
 474  474          } else {
 475  475            emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 476  476            emit_rm(cbuf, scale, index, base);
 477  477          }
 478  478          if ( displace_is_oop ) {
 479  479            emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 480  480          } else {
 481  481            emit_d32      (cbuf, displace);
 482  482          }
 483  483        }
 484  484      }
 485  485    }
 486  486  }
 487  487  
 488  488  
 489  489  void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 490  490    if( dst_encoding == src_encoding ) {
 491  491      // reg-reg copy, use an empty encoding
 492  492    } else {
 493  493      emit_opcode( cbuf, 0x8B );
 494  494      emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 495  495    }
 496  496  }
 497  497  
 498  498  void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 499  499    if( dst_encoding == src_encoding ) {

↓ open down ↓

499 lines elided

↑ open up ↑

 500  500      // reg-reg copy, use an empty encoding
 501  501    } else {
 502  502      MacroAssembler _masm(&cbuf);
 503  503  
 504  504      __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 505  505    }
 506  506  }
 507  507  
 508  508  
 509  509  //=============================================================================
      510 +const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
      511 +
      512 +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
      513 +  emit_constant_table(cbuf);
      514 +  set_table_base_offset(0);
      515 +  // Empty encoding
      516 +}
      517 +
      518 +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
      519 +  // Compute the size (even if it's zero) since
      520 +  // Compile::Shorten_branches needs the table to be emitted (which
      521 +  // happens in Compile::scratch_emit_size) to calculate the size for
      522 +  // MachConstantNode's.
      523 +  return MachNode::size(ra_);
      524 +}
      525 +
      526 +#ifndef PRODUCT
      527 +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
      528 +  st->print("# MachConstantBaseNode (empty encoding)");
      529 +}
      530 +#endif
      531 +
      532 +
      533 +//=============================================================================
 510  534  #ifndef PRODUCT
 511  535  void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 512  536    Compile* C = ra_->C;
 513  537    if( C->in_24_bit_fp_mode() ) {
 514  538      st->print("FLDCW  24 bit fpu control word");
 515  539      st->print_cr(""); st->print("\t");
 516  540    }
 517  541  
 518  542    int framesize = C->frame_slots() << LogBytesPerInt;
 519  543    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");

 520  544    // Remove two words for return addr and rbp,
 521  545    framesize -= 2*wordSize;
 522  546  
 523  547    // Calls to C2R adapters often do not accept exceptional returns.
 524  548    // We require that their callers must bang for them.  But be careful, because
 525  549    // some VM calls (such as call site linkage) can use several kilobytes of
 526  550    // stack.  But the stack safety zone should account for that.
 527  551    // See bugs 4446381, 4468289, 4497237.
 528  552    if (C->need_stack_bang(framesize)) {
 529  553      st->print_cr("# stack bang"); st->print("\t");
 530  554    }
 531  555    st->print_cr("PUSHL  EBP"); st->print("\t");
 532  556  
 533  557    if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
 534  558      st->print("PUSH   0xBADB100D\t# Majik cookie for stack depth check");
 535  559      st->print_cr(""); st->print("\t");
 536  560      framesize -= wordSize;
 537  561    }
 538  562  
 539  563    if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
 540  564      if (framesize) {
 541  565        st->print("SUB    ESP,%d\t# Create frame",framesize);
 542  566      }
 543  567    } else {
 544  568      st->print("SUB    ESP,%d\t# Create frame",framesize);
 545  569    }
 546  570  }
 547  571  #endif
 548  572  
 549  573  
 550  574  void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 551  575    Compile* C = ra_->C;
 552  576  
 553  577    if (UseSSE >= 2 && VerifyFPU) {
 554  578      MacroAssembler masm(&cbuf);
 555  579      masm.verify_FPU(0, "FPU stack must be clean on entry");
 556  580    }
 557  581  
 558  582    // WARNING: Initial instruction MUST be 5 bytes or longer so that
 559  583    // NativeJump::patch_verified_entry will be able to patch out the entry
 560  584    // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 561  585    // depth is ok at 5 bytes, the frame allocation can be either 3 or
 562  586    // 6 bytes. So if we don't do the fldcw or the push then we must
 563  587    // use the 6 byte frame allocation even if we have no frame. :-(
 564  588    // If method sets FPU control word do it now
 565  589    if( C->in_24_bit_fp_mode() ) {
 566  590      MacroAssembler masm(&cbuf);
 567  591      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
 568  592    }
 569  593  
 570  594    int framesize = C->frame_slots() << LogBytesPerInt;
 571  595    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 572  596    // Remove two words for return addr and rbp,
 573  597    framesize -= 2*wordSize;
 574  598  
 575  599    // Calls to C2R adapters often do not accept exceptional returns.
 576  600    // We require that their callers must bang for them.  But be careful, because
 577  601    // some VM calls (such as call site linkage) can use several kilobytes of
 578  602    // stack.  But the stack safety zone should account for that.
 579  603    // See bugs 4446381, 4468289, 4497237.
 580  604    if (C->need_stack_bang(framesize)) {
 581  605      MacroAssembler masm(&cbuf);
 582  606      masm.generate_stack_overflow_check(framesize);
 583  607    }
 584  608  
 585  609    // We always push rbp, so that on return to interpreter rbp, will be
 586  610    // restored correctly and we can correct the stack.
 587  611    emit_opcode(cbuf, 0x50 | EBP_enc);
 588  612  
 589  613    if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
 590  614      emit_opcode(cbuf, 0x68); // push 0xbadb100d
 591  615      emit_d32(cbuf, 0xbadb100d);
 592  616      framesize -= wordSize;
 593  617    }
 594  618  
 595  619    if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
 596  620      if (framesize) {
 597  621        emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 598  622        emit_rm(cbuf, 0x3, 0x05, ESP_enc);
 599  623        emit_d8(cbuf, framesize);
 600  624      }
 601  625    } else {
 602  626      emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 603  627      emit_rm(cbuf, 0x3, 0x05, ESP_enc);
 604  628      emit_d32(cbuf, framesize);
 605  629    }
 606  630    C->set_frame_complete(cbuf.insts_size());
 607  631  
 608  632  #ifdef ASSERT
 609  633    if (VerifyStackAtCalls) {
 610  634      Label L;
 611  635      MacroAssembler masm(&cbuf);
 612  636      masm.push(rax);
 613  637      masm.mov(rax, rsp);
 614  638      masm.andptr(rax, StackAlignmentInBytes-1);
 615  639      masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 616  640      masm.pop(rax);
 617  641      masm.jcc(Assembler::equal, L);
 618  642      masm.stop("Stack is not properly aligned!");
 619  643      masm.bind(L);
 620  644    }
 621  645  #endif
 622  646  
 623  647  }
 624  648  
 625  649  uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 626  650    return MachNode::size(ra_); // too many variables; just compute it the hard way
 627  651  }
 628  652  
 629  653  int MachPrologNode::reloc() const {
 630  654    return 0; // a large enough number
 631  655  }
 632  656  
 633  657  //=============================================================================
 634  658  #ifndef PRODUCT
 635  659  void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 636  660    Compile *C = ra_->C;
 637  661    int framesize = C->frame_slots() << LogBytesPerInt;
 638  662    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 639  663    // Remove two words for return addr and rbp,
 640  664    framesize -= 2*wordSize;
 641  665  
 642  666    if( C->in_24_bit_fp_mode() ) {
 643  667      st->print("FLDCW  standard control word");
 644  668      st->cr(); st->print("\t");
 645  669    }
 646  670    if( framesize ) {
 647  671      st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 648  672      st->cr(); st->print("\t");
 649  673    }
 650  674    st->print_cr("POPL   EBP"); st->print("\t");
 651  675    if( do_polling() && C->is_method_compilation() ) {
 652  676      st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 653  677      st->cr(); st->print("\t");
 654  678    }
 655  679  }
 656  680  #endif
 657  681  
 658  682  void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 659  683    Compile *C = ra_->C;
 660  684  
 661  685    // If method set FPU control word, restore to standard control word
 662  686    if( C->in_24_bit_fp_mode() ) {
 663  687      MacroAssembler masm(&cbuf);
 664  688      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 665  689    }
 666  690  
 667  691    int framesize = C->frame_slots() << LogBytesPerInt;
 668  692    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 669  693    // Remove two words for return addr and rbp,
 670  694    framesize -= 2*wordSize;
 671  695  
 672  696    // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 673  697  
 674  698    if( framesize >= 128 ) {
 675  699      emit_opcode(cbuf, 0x81); // add  SP, #framesize
 676  700      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 677  701      emit_d32(cbuf, framesize);
 678  702    }
 679  703    else if( framesize ) {
 680  704      emit_opcode(cbuf, 0x83); // add  SP, #framesize
 681  705      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 682  706      emit_d8(cbuf, framesize);
 683  707    }
 684  708  
 685  709    emit_opcode(cbuf, 0x58 | EBP_enc);
 686  710  
 687  711    if( do_polling() && C->is_method_compilation() ) {
 688  712      cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 689  713      emit_opcode(cbuf,0x85);
 690  714      emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 691  715      emit_d32(cbuf, (intptr_t)os::get_polling_page());
 692  716    }
 693  717  }
 694  718  
 695  719  uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 696  720    Compile *C = ra_->C;
 697  721    // If method set FPU control word, restore to standard control word
 698  722    int size = C->in_24_bit_fp_mode() ? 6 : 0;
 699  723    if( do_polling() && C->is_method_compilation() ) size += 6;
 700  724  
 701  725    int framesize = C->frame_slots() << LogBytesPerInt;
 702  726    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 703  727    // Remove two words for return addr and rbp,
 704  728    framesize -= 2*wordSize;
 705  729  
 706  730    size++; // popl rbp,
 707  731  
 708  732    if( framesize >= 128 ) {
 709  733      size += 6;
 710  734    } else {
 711  735      size += framesize ? 3 : 0;
 712  736    }
 713  737    return size;
 714  738  }
 715  739  
 716  740  int MachEpilogNode::reloc() const {
 717  741    return 0; // a large enough number
 718  742  }
 719  743  
 720  744  const Pipeline * MachEpilogNode::pipeline() const {
 721  745    return MachNode::pipeline_class();
 722  746  }
 723  747  
 724  748  int MachEpilogNode::safepoint_offset() const { return 0; }
 725  749  
 726  750  //=============================================================================
 727  751  
 728  752  enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 729  753  static enum RC rc_class( OptoReg::Name reg ) {
 730  754  
 731  755    if( !OptoReg::is_valid(reg)  ) return rc_bad;
 732  756    if (OptoReg::is_stack(reg)) return rc_stack;
 733  757  
 734  758    VMReg r = OptoReg::as_VMReg(reg);
 735  759    if (r->is_Register()) return rc_int;
 736  760    if (r->is_FloatRegister()) {
 737  761      assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 738  762      return rc_float;
 739  763    }
 740  764    assert(r->is_XMMRegister(), "must be");
 741  765    return rc_xmm;
 742  766  }
 743  767  
 744  768  static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 745  769                          int opcode, const char *op_str, int size, outputStream* st ) {
 746  770    if( cbuf ) {
 747  771      emit_opcode  (*cbuf, opcode );
 748  772      encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
 749  773  #ifndef PRODUCT
 750  774    } else if( !do_size ) {
 751  775      if( size != 0 ) st->print("\n\t");
 752  776      if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 753  777        if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 754  778        else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 755  779      } else { // FLD, FST, PUSH, POP
 756  780        st->print("%s [ESP + #%d]",op_str,offset);
 757  781      }
 758  782  #endif
 759  783    }
 760  784    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 761  785    return size+3+offset_size;
 762  786  }
 763  787  
 764  788  // Helper for XMM registers.  Extra opcode bits, limited syntax.
 765  789  static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 766  790                           int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 767  791    if( cbuf ) {
 768  792      if( reg_lo+1 == reg_hi ) { // double move?
 769  793        if( is_load && !UseXmmLoadAndClearUpper )
 770  794          emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
 771  795        else
 772  796          emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
 773  797      } else {
 774  798        emit_opcode(*cbuf, 0xF3 );
 775  799      }
 776  800      emit_opcode(*cbuf, 0x0F );
 777  801      if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
 778  802        emit_opcode(*cbuf, 0x12 );   // use 'movlpd' for load
 779  803      else
 780  804        emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
 781  805      encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
 782  806  #ifndef PRODUCT
 783  807    } else if( !do_size ) {
 784  808      if( size != 0 ) st->print("\n\t");
 785  809      if( reg_lo+1 == reg_hi ) { // double move?
 786  810        if( is_load ) st->print("%s %s,[ESP + #%d]",
 787  811                                 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 788  812                                 Matcher::regName[reg_lo], offset);
 789  813        else          st->print("MOVSD  [ESP + #%d],%s",
 790  814                                 offset, Matcher::regName[reg_lo]);
 791  815      } else {
 792  816        if( is_load ) st->print("MOVSS  %s,[ESP + #%d]",
 793  817                                 Matcher::regName[reg_lo], offset);
 794  818        else          st->print("MOVSS  [ESP + #%d],%s",
 795  819                                 offset, Matcher::regName[reg_lo]);
 796  820      }
 797  821  #endif
 798  822    }
 799  823    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 800  824    return size+5+offset_size;
 801  825  }
 802  826  
 803  827  
 804  828  static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 805  829                              int src_hi, int dst_hi, int size, outputStream* st ) {
 806  830    if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
 807  831      if( cbuf ) {
 808  832        if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
 809  833          emit_opcode(*cbuf, 0x66 );
 810  834        }
 811  835        emit_opcode(*cbuf, 0x0F );
 812  836        emit_opcode(*cbuf, 0x28 );
 813  837        emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 814  838  #ifndef PRODUCT
 815  839      } else if( !do_size ) {
 816  840        if( size != 0 ) st->print("\n\t");
 817  841        if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 818  842          st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 819  843        } else {
 820  844          st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 821  845        }
 822  846  #endif
 823  847      }
 824  848      return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
 825  849    } else {
 826  850      if( cbuf ) {
 827  851        emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
 828  852        emit_opcode(*cbuf, 0x0F );
 829  853        emit_opcode(*cbuf, 0x10 );
 830  854        emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 831  855  #ifndef PRODUCT
 832  856      } else if( !do_size ) {
 833  857        if( size != 0 ) st->print("\n\t");
 834  858        if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 835  859          st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 836  860        } else {
 837  861          st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 838  862        }
 839  863  #endif
 840  864      }
 841  865      return size+4;
 842  866    }
 843  867  }
 844  868  
 845  869  static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 846  870                              int src_hi, int dst_hi, int size, outputStream* st ) {
 847  871    // 32-bit
 848  872    if (cbuf) {
 849  873      emit_opcode(*cbuf, 0x66);
 850  874      emit_opcode(*cbuf, 0x0F);
 851  875      emit_opcode(*cbuf, 0x6E);
 852  876      emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
 853  877  #ifndef PRODUCT
 854  878    } else if (!do_size) {
 855  879      st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 856  880  #endif
 857  881    }
 858  882    return 4;
 859  883  }
 860  884  
 861  885  
 862  886  static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 863  887                                   int src_hi, int dst_hi, int size, outputStream* st ) {
 864  888    // 32-bit
 865  889    if (cbuf) {
 866  890      emit_opcode(*cbuf, 0x66);
 867  891      emit_opcode(*cbuf, 0x0F);
 868  892      emit_opcode(*cbuf, 0x7E);
 869  893      emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
 870  894  #ifndef PRODUCT
 871  895    } else if (!do_size) {
 872  896      st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 873  897  #endif
 874  898    }
 875  899    return 4;
 876  900  }
 877  901  
 878  902  static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 879  903    if( cbuf ) {
 880  904      emit_opcode(*cbuf, 0x8B );
 881  905      emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 882  906  #ifndef PRODUCT
 883  907    } else if( !do_size ) {
 884  908      if( size != 0 ) st->print("\n\t");
 885  909      st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 886  910  #endif
 887  911    }
 888  912    return size+2;
 889  913  }
 890  914  
 891  915  static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 892  916                                   int offset, int size, outputStream* st ) {
 893  917    if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 894  918      if( cbuf ) {
 895  919        emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 896  920        emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 897  921  #ifndef PRODUCT
 898  922      } else if( !do_size ) {
 899  923        if( size != 0 ) st->print("\n\t");
 900  924        st->print("FLD    %s",Matcher::regName[src_lo]);
 901  925  #endif
 902  926      }
 903  927      size += 2;
 904  928    }
 905  929  
 906  930    int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 907  931    const char *op_str;
 908  932    int op;
 909  933    if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 910  934      op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 911  935      op = 0xDD;
 912  936    } else {                   // 32-bit store
 913  937      op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 914  938      op = 0xD9;
 915  939      assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 916  940    }
 917  941  
 918  942    return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 919  943  }
 920  944  
 921  945  uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 922  946    // Get registers to move
 923  947    OptoReg::Name src_second = ra_->get_reg_second(in(1));
 924  948    OptoReg::Name src_first = ra_->get_reg_first(in(1));
 925  949    OptoReg::Name dst_second = ra_->get_reg_second(this );
 926  950    OptoReg::Name dst_first = ra_->get_reg_first(this );
 927  951  
 928  952    enum RC src_second_rc = rc_class(src_second);
 929  953    enum RC src_first_rc = rc_class(src_first);
 930  954    enum RC dst_second_rc = rc_class(dst_second);
 931  955    enum RC dst_first_rc = rc_class(dst_first);
 932  956  
 933  957    assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 934  958  
 935  959    // Generate spill code!
 936  960    int size = 0;
 937  961  
 938  962    if( src_first == dst_first && src_second == dst_second )
 939  963      return size;            // Self copy, no move
 940  964  
 941  965    // --------------------------------------
 942  966    // Check for mem-mem move.  push/pop to move.
 943  967    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 944  968      if( src_second == dst_first ) { // overlapping stack copy ranges
 945  969        assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 946  970        size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 947  971        size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 948  972        src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 949  973      }
 950  974      // move low bits
 951  975      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 952  976      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 953  977      if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 954  978        size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 955  979        size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 956  980      }
 957  981      return size;
 958  982    }
 959  983  
 960  984    // --------------------------------------
 961  985    // Check for integer reg-reg copy
 962  986    if( src_first_rc == rc_int && dst_first_rc == rc_int )
 963  987      size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 964  988  
 965  989    // Check for integer store
 966  990    if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 967  991      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 968  992  
 969  993    // Check for integer load
 970  994    if( dst_first_rc == rc_int && src_first_rc == rc_stack )
 971  995      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 972  996  
 973  997    // Check for integer reg-xmm reg copy
 974  998    if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 975  999      assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 976 1000              "no 64 bit integer-float reg moves" );
 977 1001      return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 978 1002    }
 979 1003    // --------------------------------------
 980 1004    // Check for float reg-reg copy
 981 1005    if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 982 1006      assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 983 1007              (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 984 1008      if( cbuf ) {
 985 1009  
 986 1010        // Note the mucking with the register encode to compensate for the 0/1
 987 1011        // indexing issue mentioned in a comment in the reg_def sections
 988 1012        // for FPR registers many lines above here.
 989 1013  
 990 1014        if( src_first != FPR1L_num ) {
 991 1015          emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 992 1016          emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 993 1017          emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 994 1018          emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 995 1019       } else {
 996 1020          emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 997 1021          emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 998 1022       }
 999 1023  #ifndef PRODUCT
1000 1024      } else if( !do_size ) {
1001 1025        if( size != 0 ) st->print("\n\t");
1002 1026        if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1003 1027        else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1004 1028  #endif
1005 1029      }
1006 1030      return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1007 1031    }
1008 1032  
1009 1033    // Check for float store
1010 1034    if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1011 1035      return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1012 1036    }
1013 1037  
1014 1038    // Check for float load
1015 1039    if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1016 1040      int offset = ra_->reg2offset(src_first);
1017 1041      const char *op_str;
1018 1042      int op;
1019 1043      if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1020 1044        op_str = "FLD_D";
1021 1045        op = 0xDD;
1022 1046      } else {                   // 32-bit load
1023 1047        op_str = "FLD_S";
1024 1048        op = 0xD9;
1025 1049        assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1026 1050      }
1027 1051      if( cbuf ) {
1028 1052        emit_opcode  (*cbuf, op );
1029 1053        encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
1030 1054        emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1031 1055        emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1032 1056  #ifndef PRODUCT
1033 1057      } else if( !do_size ) {
1034 1058        if( size != 0 ) st->print("\n\t");
1035 1059        st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1036 1060  #endif
1037 1061      }
1038 1062      int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1039 1063      return size + 3+offset_size+2;
1040 1064    }
1041 1065  
1042 1066    // Check for xmm reg-reg copy
1043 1067    if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1044 1068      assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1045 1069              (src_first+1 == src_second && dst_first+1 == dst_second),
1046 1070              "no non-adjacent float-moves" );
1047 1071      return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1048 1072    }
1049 1073  
1050 1074    // Check for xmm reg-integer reg copy
1051 1075    if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1052 1076      assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1053 1077              "no 64 bit float-integer reg moves" );
1054 1078      return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1055 1079    }
1056 1080  
1057 1081    // Check for xmm store
1058 1082    if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1059 1083      return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1060 1084    }
1061 1085  
1062 1086    // Check for float xmm load
1063 1087    if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1064 1088      return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1065 1089    }
1066 1090  
1067 1091    // Copy from float reg to xmm reg
1068 1092    if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1069 1093      // copy to the top of stack from floating point reg
1070 1094      // and use LEA to preserve flags
1071 1095      if( cbuf ) {
1072 1096        emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1073 1097        emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1074 1098        emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1075 1099        emit_d8(*cbuf,0xF8);
1076 1100  #ifndef PRODUCT
1077 1101      } else if( !do_size ) {
1078 1102        if( size != 0 ) st->print("\n\t");
1079 1103        st->print("LEA    ESP,[ESP-8]");
1080 1104  #endif
1081 1105      }
1082 1106      size += 4;
1083 1107  
1084 1108      size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1085 1109  
1086 1110      // Copy from the temp memory to the xmm reg.
1087 1111      size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1088 1112  
1089 1113      if( cbuf ) {
1090 1114        emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1091 1115        emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1092 1116        emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1093 1117        emit_d8(*cbuf,0x08);
1094 1118  #ifndef PRODUCT
1095 1119      } else if( !do_size ) {
1096 1120        if( size != 0 ) st->print("\n\t");
1097 1121        st->print("LEA    ESP,[ESP+8]");
1098 1122  #endif
1099 1123      }
1100 1124      size += 4;
1101 1125      return size;
1102 1126    }
1103 1127  
1104 1128    assert( size > 0, "missed a case" );
1105 1129  
1106 1130    // --------------------------------------------------------------------
1107 1131    // Check for second bits still needing moving.
1108 1132    if( src_second == dst_second )
1109 1133      return size;               // Self copy; no move
1110 1134    assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1111 1135  
1112 1136    // Check for second word int-int move
1113 1137    if( src_second_rc == rc_int && dst_second_rc == rc_int )
1114 1138      return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1115 1139  
1116 1140    // Check for second word integer store
1117 1141    if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1118 1142      return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1119 1143  
1120 1144    // Check for second word integer load
1121 1145    if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1122 1146      return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1123 1147  
1124 1148  
1125 1149    Unimplemented();
1126 1150  }
1127 1151  
1128 1152  #ifndef PRODUCT
1129 1153  void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1130 1154    implementation( NULL, ra_, false, st );
1131 1155  }
1132 1156  #endif
1133 1157  
1134 1158  void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1135 1159    implementation( &cbuf, ra_, false, NULL );
1136 1160  }
1137 1161  
1138 1162  uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1139 1163    return implementation( NULL, ra_, true, NULL );
1140 1164  }
1141 1165  
1142 1166  //=============================================================================
1143 1167  #ifndef PRODUCT
1144 1168  void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
1145 1169    st->print("NOP \t# %d bytes pad for loops and calls", _count);
1146 1170  }
1147 1171  #endif
1148 1172  
1149 1173  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
1150 1174    MacroAssembler _masm(&cbuf);
1151 1175    __ nop(_count);
1152 1176  }
1153 1177  
1154 1178  uint MachNopNode::size(PhaseRegAlloc *) const {
1155 1179    return _count;
1156 1180  }
1157 1181  
1158 1182  
1159 1183  //=============================================================================
1160 1184  #ifndef PRODUCT
1161 1185  void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1162 1186    int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1163 1187    int reg = ra_->get_reg_first(this);
1164 1188    st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1165 1189  }
1166 1190  #endif
1167 1191  
1168 1192  void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1169 1193    int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1170 1194    int reg = ra_->get_encode(this);
1171 1195    if( offset >= 128 ) {
1172 1196      emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1173 1197      emit_rm(cbuf, 0x2, reg, 0x04);
1174 1198      emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1175 1199      emit_d32(cbuf, offset);
1176 1200    }
1177 1201    else {
1178 1202      emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1179 1203      emit_rm(cbuf, 0x1, reg, 0x04);
1180 1204      emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1181 1205      emit_d8(cbuf, offset);
1182 1206    }
1183 1207  }
1184 1208  
1185 1209  uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1186 1210    int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1187 1211    if( offset >= 128 ) {
1188 1212      return 7;
1189 1213    }
1190 1214    else {
1191 1215      return 4;
1192 1216    }
1193 1217  }
1194 1218  
1195 1219  //=============================================================================
1196 1220  
1197 1221  // emit call stub, compiled java to interpreter
1198 1222  void emit_java_to_interp(CodeBuffer &cbuf ) {
1199 1223    // Stub is fixed up when the corresponding call is converted from calling
1200 1224    // compiled code to calling interpreted code.
1201 1225    // mov rbx,0
1202 1226    // jmp -1
1203 1227  
1204 1228    address mark = cbuf.insts_mark();  // get mark within main instrs section
1205 1229  
1206 1230    // Note that the code buffer's insts_mark is always relative to insts.
1207 1231    // That's why we must use the macroassembler to generate a stub.
1208 1232    MacroAssembler _masm(&cbuf);
1209 1233  
1210 1234    address base =
1211 1235    __ start_a_stub(Compile::MAX_stubs_size);
1212 1236    if (base == NULL)  return;  // CodeBuffer::expand failed
1213 1237    // static stub relocation stores the instruction address of the call
1214 1238    __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1215 1239    // static stub relocation also tags the methodOop in the code-stream.
1216 1240    __ movoop(rbx, (jobject)NULL);  // method is zapped till fixup time
1217 1241    // This is recognized as unresolved by relocs/nativeInst/ic code
1218 1242    __ jump(RuntimeAddress(__ pc()));
1219 1243  
1220 1244    __ end_a_stub();
1221 1245    // Update current stubs pointer and restore insts_end.
1222 1246  }
1223 1247  // size of call stub, compiled java to interpretor
1224 1248  uint size_java_to_interp() {
1225 1249    return 10;  // movl; jmp
1226 1250  }
1227 1251  // relocation entries for call stub, compiled java to interpretor
1228 1252  uint reloc_java_to_interp() {
1229 1253    return 4;  // 3 in emit_java_to_interp + 1 in Java_Static_Call
1230 1254  }
1231 1255  
1232 1256  //=============================================================================
1233 1257  #ifndef PRODUCT
1234 1258  void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1235 1259    st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1236 1260    st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1237 1261    st->print_cr("\tNOP");
1238 1262    st->print_cr("\tNOP");
1239 1263    if( !OptoBreakpoint )
1240 1264      st->print_cr("\tNOP");
1241 1265  }
1242 1266  #endif
1243 1267  
1244 1268  void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1245 1269    MacroAssembler masm(&cbuf);
1246 1270  #ifdef ASSERT
1247 1271    uint insts_size = cbuf.insts_size();
1248 1272  #endif
1249 1273    masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1250 1274    masm.jump_cc(Assembler::notEqual,
1251 1275                 RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1252 1276    /* WARNING these NOPs are critical so that verified entry point is properly
1253 1277       aligned for patching by NativeJump::patch_verified_entry() */
1254 1278    int nops_cnt = 2;
1255 1279    if( !OptoBreakpoint ) // Leave space for int3
1256 1280       nops_cnt += 1;
1257 1281    masm.nop(nops_cnt);
1258 1282  
1259 1283    assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1260 1284  }
1261 1285  
1262 1286  uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1263 1287    return OptoBreakpoint ? 11 : 12;
1264 1288  }
1265 1289  
1266 1290  
1267 1291  //=============================================================================
1268 1292  uint size_exception_handler() {
1269 1293    // NativeCall instruction size is the same as NativeJump.
1270 1294    // exception handler starts out as jump and can be patched to
1271 1295    // a call be deoptimization.  (4932387)
1272 1296    // Note that this value is also credited (in output.cpp) to
1273 1297    // the size of the code section.
1274 1298    return NativeJump::instruction_size;
1275 1299  }
1276 1300  
1277 1301  // Emit exception handler code.  Stuff framesize into a register
1278 1302  // and call a VM stub routine.
1279 1303  int emit_exception_handler(CodeBuffer& cbuf) {
1280 1304  
1281 1305    // Note that the code buffer's insts_mark is always relative to insts.
1282 1306    // That's why we must use the macroassembler to generate a handler.
1283 1307    MacroAssembler _masm(&cbuf);
1284 1308    address base =
1285 1309    __ start_a_stub(size_exception_handler());
1286 1310    if (base == NULL)  return 0;  // CodeBuffer::expand failed
1287 1311    int offset = __ offset();
1288 1312    __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1289 1313    assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1290 1314    __ end_a_stub();
1291 1315    return offset;
1292 1316  }
1293 1317  
1294 1318  uint size_deopt_handler() {
1295 1319    // NativeCall instruction size is the same as NativeJump.
1296 1320    // exception handler starts out as jump and can be patched to
1297 1321    // a call be deoptimization.  (4932387)
1298 1322    // Note that this value is also credited (in output.cpp) to
1299 1323    // the size of the code section.
1300 1324    return 5 + NativeJump::instruction_size; // pushl(); jmp;
1301 1325  }
1302 1326  
1303 1327  // Emit deopt handler code.
1304 1328  int emit_deopt_handler(CodeBuffer& cbuf) {
1305 1329  
1306 1330    // Note that the code buffer's insts_mark is always relative to insts.
1307 1331    // That's why we must use the macroassembler to generate a handler.
1308 1332    MacroAssembler _masm(&cbuf);
1309 1333    address base =
1310 1334    __ start_a_stub(size_exception_handler());
1311 1335    if (base == NULL)  return 0;  // CodeBuffer::expand failed
1312 1336    int offset = __ offset();

↓ open down ↓

793 lines elided

↑ open up ↑

1313 1337    InternalAddress here(__ pc());
1314 1338    __ pushptr(here.addr());
1315 1339  
1316 1340    __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1317 1341    assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1318 1342    __ end_a_stub();
1319 1343    return offset;
1320 1344  }
1321 1345  
1322 1346  
1323      -static void emit_double_constant(CodeBuffer& cbuf, double x) {
1324      -  int mark = cbuf.insts()->mark_off();
1325      -  MacroAssembler _masm(&cbuf);
1326      -  address double_address = __ double_constant(x);
1327      -  cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1328      -  emit_d32_reloc(cbuf,
1329      -                 (int)double_address,
1330      -                 internal_word_Relocation::spec(double_address),
1331      -                 RELOC_DISP32);
1332      -}
1333      -
1334      -static void emit_float_constant(CodeBuffer& cbuf, float x) {
1335      -  int mark = cbuf.insts()->mark_off();
1336      -  MacroAssembler _masm(&cbuf);
1337      -  address float_address = __ float_constant(x);
1338      -  cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1339      -  emit_d32_reloc(cbuf,
1340      -                 (int)float_address,
1341      -                 internal_word_Relocation::spec(float_address),
1342      -                 RELOC_DISP32);
1343      -}
1344      -
1345      -
1346 1347  const bool Matcher::match_rule_supported(int opcode) {
1347 1348    if (!has_match_rule(opcode))
1348 1349      return false;
1349 1350  
1350 1351    return true;  // Per default match rules are supported.
1351 1352  }
1352 1353  
1353 1354  int Matcher::regnum_to_fpu_offset(int regnum) {
1354 1355    return regnum - 32; // The FP registers are in the second chunk
1355 1356  }
1356 1357  
1357      -bool is_positive_zero_float(jfloat f) {
1358      -  return jint_cast(f) == jint_cast(0.0F);
1359      -}
1360      -
1361      -bool is_positive_one_float(jfloat f) {
1362      -  return jint_cast(f) == jint_cast(1.0F);
1363      -}
1364      -
1365      -bool is_positive_zero_double(jdouble d) {
1366      -  return jlong_cast(d) == jlong_cast(0.0);
1367      -}
1368      -
1369      -bool is_positive_one_double(jdouble d) {
1370      -  return jlong_cast(d) == jlong_cast(1.0);
1371      -}
1372      -
1373 1358  // This is UltraSparc specific, true just means we have fast l2f conversion
1374 1359  const bool Matcher::convL2FSupported(void) {
1375 1360    return true;
1376 1361  }
1377 1362  
1378 1363  // Vector width in bytes
1379 1364  const uint Matcher::vector_width_in_bytes(void) {
1380 1365    return UseSSE >= 2 ? 8 : 0;
1381 1366  }
1382 1367

1383 1368  // Vector ideal reg
1384 1369  const uint Matcher::vector_ideal_reg(void) {
1385 1370    return Op_RegD;
1386 1371  }
1387 1372  
1388 1373  // Is this branch offset short enough that a short branch can be used?
1389 1374  //
1390 1375  // NOTE: If the platform does not provide any short branch variants, then
1391 1376  //       this method should return false for offset 0.
1392 1377  bool Matcher::is_short_branch_offset(int rule, int offset) {
1393 1378    // the short version of jmpConUCF2 contains multiple branches,
1394 1379    // making the reach slightly less
1395 1380    if (rule == jmpConUCF2_rule)
1396 1381      return (-126 <= offset && offset <= 125);
1397 1382    return (-128 <= offset && offset <= 127);
1398 1383  }
1399 1384  
1400 1385  const bool Matcher::isSimpleConstant64(jlong value) {
1401 1386    // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1402 1387    return false;
1403 1388  }
1404 1389  
1405 1390  // The ecx parameter to rep stos for the ClearArray node is in dwords.
1406 1391  const bool Matcher::init_array_count_is_in_bytes = false;
1407 1392  
1408 1393  // Threshold size for cleararray.
1409 1394  const int Matcher::init_array_short_size = 8 * BytesPerLong;
1410 1395  
1411 1396  // Should the Matcher clone shifts on addressing modes, expecting them to
1412 1397  // be subsumed into complex addressing expressions or compute them into
1413 1398  // registers?  True for Intel but false for most RISCs
1414 1399  const bool Matcher::clone_shift_expressions = true;
1415 1400  
1416 1401  bool Matcher::narrow_oop_use_complex_address() {
1417 1402    ShouldNotCallThis();
1418 1403    return true;
1419 1404  }
1420 1405  
1421 1406  
1422 1407  // Is it better to copy float constants, or load them directly from memory?
1423 1408  // Intel can load a float constant from a direct address, requiring no
1424 1409  // extra registers.  Most RISCs will have to materialize an address into a
1425 1410  // register first, so they would do better to copy the constant from stack.
1426 1411  const bool Matcher::rematerialize_float_constants = true;
1427 1412  
1428 1413  // If CPU can load and store mis-aligned doubles directly then no fixup is
1429 1414  // needed.  Else we split the double into 2 integer pieces and move it
1430 1415  // piece-by-piece.  Only happens when passing doubles into C code as the
1431 1416  // Java calling convention forces doubles to be aligned.
1432 1417  const bool Matcher::misaligned_doubles_ok = true;
1433 1418  
1434 1419  
1435 1420  void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1436 1421    // Get the memory operand from the node
1437 1422    uint numopnds = node->num_opnds();        // Virtual call for number of operands
1438 1423    uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1439 1424    assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1440 1425    uint opcnt     = 1;                 // First operand
1441 1426    uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1442 1427    while( idx >= skipped+num_edges ) {
1443 1428      skipped += num_edges;
1444 1429      opcnt++;                          // Bump operand count
1445 1430      assert( opcnt < numopnds, "Accessing non-existent operand" );
1446 1431      num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1447 1432    }
1448 1433  
1449 1434    MachOper *memory = node->_opnds[opcnt];
1450 1435    MachOper *new_memory = NULL;
1451 1436    switch (memory->opcode()) {
1452 1437    case DIRECT:
1453 1438    case INDOFFSET32X:
1454 1439      // No transformation necessary.
1455 1440      return;
1456 1441    case INDIRECT:
1457 1442      new_memory = new (C) indirect_win95_safeOper( );
1458 1443      break;
1459 1444    case INDOFFSET8:
1460 1445      new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1461 1446      break;
1462 1447    case INDOFFSET32:
1463 1448      new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1464 1449      break;
1465 1450    case INDINDEXOFFSET:
1466 1451      new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1467 1452      break;
1468 1453    case INDINDEXSCALE:
1469 1454      new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1470 1455      break;
1471 1456    case INDINDEXSCALEOFFSET:
1472 1457      new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1473 1458      break;
1474 1459    case LOAD_LONG_INDIRECT:
1475 1460    case LOAD_LONG_INDOFFSET32:
1476 1461      // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1477 1462      return;
1478 1463    default:
1479 1464      assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1480 1465      return;
1481 1466    }
1482 1467    node->_opnds[opcnt] = new_memory;
1483 1468  }
1484 1469  
1485 1470  // Advertise here if the CPU requires explicit rounding operations
1486 1471  // to implement the UseStrictFP mode.
1487 1472  const bool Matcher::strict_fp_requires_explicit_rounding = true;
1488 1473  
1489 1474  // Are floats conerted to double when stored to stack during deoptimization?
1490 1475  // On x32 it is stored with convertion only when FPU is used for floats.
1491 1476  bool Matcher::float_in_double() { return (UseSSE == 0); }
1492 1477  
1493 1478  // Do ints take an entire long register or just half?
1494 1479  const bool Matcher::int_in_long = false;
1495 1480  
1496 1481  // Return whether or not this register is ever used as an argument.  This
1497 1482  // function is used on startup to build the trampoline stubs in generateOptoStub.
1498 1483  // Registers not mentioned will be killed by the VM call in the trampoline, and
1499 1484  // arguments in those registers not be available to the callee.
1500 1485  bool Matcher::can_be_java_arg( int reg ) {
1501 1486    if(  reg == ECX_num   || reg == EDX_num   ) return true;
1502 1487    if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1503 1488    if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1504 1489    return false;
1505 1490  }
1506 1491  
1507 1492  bool Matcher::is_spillable_arg( int reg ) {
1508 1493    return can_be_java_arg(reg);
1509 1494  }
1510 1495  
1511 1496  bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1512 1497    // Use hardware integer DIV instruction when
1513 1498    // it is faster than a code which use multiply.
1514 1499    // Only when constant divisor fits into 32 bit
1515 1500    // (min_jint is excluded to get only correct
1516 1501    // positive 32 bit values from negative).
1517 1502    return VM_Version::has_fast_idiv() &&
1518 1503           (divisor == (int)divisor && divisor != min_jint);
1519 1504  }
1520 1505  
1521 1506  // Register for DIVI projection of divmodI
1522 1507  RegMask Matcher::divI_proj_mask() {
1523 1508    return EAX_REG_mask;
1524 1509  }
1525 1510  
1526 1511  // Register for MODI projection of divmodI
1527 1512  RegMask Matcher::modI_proj_mask() {
1528 1513    return EDX_REG_mask;
1529 1514  }
1530 1515  
1531 1516  // Register for DIVL projection of divmodL
1532 1517  RegMask Matcher::divL_proj_mask() {
1533 1518    ShouldNotReachHere();
1534 1519    return RegMask();
1535 1520  }
1536 1521  
1537 1522  // Register for MODL projection of divmodL
1538 1523  RegMask Matcher::modL_proj_mask() {
1539 1524    ShouldNotReachHere();
1540 1525    return RegMask();
1541 1526  }
1542 1527  
1543 1528  const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1544 1529    return EBP_REG_mask;
1545 1530  }
1546 1531  
1547 1532  // Returns true if the high 32 bits of the value is known to be zero.
1548 1533  bool is_operand_hi32_zero(Node* n) {
1549 1534    int opc = n->Opcode();
1550 1535    if (opc == Op_LoadUI2L) {
1551 1536      return true;
1552 1537    }
1553 1538    if (opc == Op_AndL) {
1554 1539      Node* o2 = n->in(2);
1555 1540      if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1556 1541        return true;
1557 1542      }
1558 1543    }
1559 1544    if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1560 1545      return true;
1561 1546    }
1562 1547    return false;
1563 1548  }
1564 1549  
1565 1550  %}
1566 1551  
1567 1552  //----------ENCODING BLOCK-----------------------------------------------------
1568 1553  // This block specifies the encoding classes used by the compiler to output
1569 1554  // byte streams.  Encoding classes generate functions which are called by
1570 1555  // Machine Instruction Nodes in order to generate the bit encoding of the
1571 1556  // instruction.  Operands specify their base encoding interface with the
1572 1557  // interface keyword.  There are currently supported four interfaces,
1573 1558  // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1574 1559  // operand to generate a function which returns its register number when
1575 1560  // queried.   CONST_INTER causes an operand to generate a function which
1576 1561  // returns the value of the constant when queried.  MEMORY_INTER causes an
1577 1562  // operand to generate four functions which return the Base Register, the
1578 1563  // Index Register, the Scale Value, and the Offset Value of the operand when
1579 1564  // queried.  COND_INTER causes an operand to generate six functions which
1580 1565  // return the encoding code (ie - encoding bits for the instruction)
1581 1566  // associated with each basic boolean condition for a conditional instruction.
1582 1567  // Instructions specify two basic values for encoding.  They use the
1583 1568  // ins_encode keyword to specify their encoding class (which must be one of
1584 1569  // the class names specified in the encoding block), and they use the
1585 1570  // opcode keyword to specify, in order, their primary, secondary, and
1586 1571  // tertiary opcode.  Only the opcode sections which a particular instruction
1587 1572  // needs for encoding need to be specified.
1588 1573  encode %{
1589 1574    // Build emit functions for each basic byte or larger field in the intel
1590 1575    // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1591 1576    // code in the enc_class source block.  Emit functions will live in the
1592 1577    // main source block for now.  In future, we can generalize this by
1593 1578    // adding a syntax that specifies the sizes of fields in an order,
1594 1579    // so that the adlc can build the emit functions automagically
1595 1580  
1596 1581    // Emit primary opcode
1597 1582    enc_class OpcP %{
1598 1583      emit_opcode(cbuf, $primary);
1599 1584    %}
1600 1585  
1601 1586    // Emit secondary opcode
1602 1587    enc_class OpcS %{
1603 1588      emit_opcode(cbuf, $secondary);
1604 1589    %}
1605 1590  
1606 1591    // Emit opcode directly
1607 1592    enc_class Opcode(immI d8) %{
1608 1593      emit_opcode(cbuf, $d8$$constant);
1609 1594    %}
1610 1595  
1611 1596    enc_class SizePrefix %{
1612 1597      emit_opcode(cbuf,0x66);
1613 1598    %}
1614 1599  
1615 1600    enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
1616 1601      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1617 1602    %}
1618 1603  
1619 1604    enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{    // OpcRegReg(Many)
1620 1605      emit_opcode(cbuf,$opcode$$constant);
1621 1606      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1622 1607    %}
1623 1608  
1624 1609    enc_class mov_r32_imm0( eRegI dst ) %{
1625 1610      emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1626 1611      emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1627 1612    %}
1628 1613  
1629 1614    enc_class cdq_enc %{
1630 1615      // Full implementation of Java idiv and irem; checks for
1631 1616      // special case as described in JVM spec., p.243 & p.271.
1632 1617      //
1633 1618      //         normal case                           special case
1634 1619      //
1635 1620      // input : rax,: dividend                         min_int
1636 1621      //         reg: divisor                          -1
1637 1622      //
1638 1623      // output: rax,: quotient  (= rax, idiv reg)       min_int
1639 1624      //         rdx: remainder (= rax, irem reg)       0
1640 1625      //
1641 1626      //  Code sequnce:
1642 1627      //
1643 1628      //  81 F8 00 00 00 80    cmp         rax,80000000h
1644 1629      //  0F 85 0B 00 00 00    jne         normal_case
1645 1630      //  33 D2                xor         rdx,edx
1646 1631      //  83 F9 FF             cmp         rcx,0FFh
1647 1632      //  0F 84 03 00 00 00    je          done
1648 1633      //                  normal_case:
1649 1634      //  99                   cdq
1650 1635      //  F7 F9                idiv        rax,ecx
1651 1636      //                  done:
1652 1637      //
1653 1638      emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1654 1639      emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1655 1640      emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1656 1641      emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1657 1642      emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1658 1643      emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1659 1644      emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1660 1645      emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1661 1646      emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1662 1647      emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1663 1648      emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1664 1649      // normal_case:
1665 1650      emit_opcode(cbuf,0x99);                                         // cdq
1666 1651      // idiv (note: must be emitted by the user of this rule)
1667 1652      // normal:
1668 1653    %}
1669 1654  
1670 1655    // Dense encoding for older common ops
1671 1656    enc_class Opc_plus(immI opcode, eRegI reg) %{
1672 1657      emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1673 1658    %}
1674 1659  
1675 1660  
1676 1661    // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1677 1662    enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1678 1663      // Check for 8-bit immediate, and set sign extend bit in opcode
1679 1664      if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1680 1665        emit_opcode(cbuf, $primary | 0x02);
1681 1666      }
1682 1667      else {                          // If 32-bit immediate
1683 1668        emit_opcode(cbuf, $primary);
1684 1669      }
1685 1670    %}
1686 1671  
1687 1672    enc_class OpcSErm (eRegI dst, immI imm) %{    // OpcSEr/m
1688 1673      // Emit primary opcode and set sign-extend bit
1689 1674      // Check for 8-bit immediate, and set sign extend bit in opcode
1690 1675      if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1691 1676        emit_opcode(cbuf, $primary | 0x02);    }
1692 1677      else {                          // If 32-bit immediate
1693 1678        emit_opcode(cbuf, $primary);
1694 1679      }
1695 1680      // Emit r/m byte with secondary opcode, after primary opcode.
1696 1681      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1697 1682    %}
1698 1683  
1699 1684    enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1700 1685      // Check for 8-bit immediate, and set sign extend bit in opcode
1701 1686      if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1702 1687        $$$emit8$imm$$constant;
1703 1688      }
1704 1689      else {                          // If 32-bit immediate
1705 1690        // Output immediate
1706 1691        $$$emit32$imm$$constant;
1707 1692      }
1708 1693    %}
1709 1694  
1710 1695    enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1711 1696      // Emit primary opcode and set sign-extend bit
1712 1697      // Check for 8-bit immediate, and set sign extend bit in opcode
1713 1698      int con = (int)$imm$$constant; // Throw away top bits
1714 1699      emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1715 1700      // Emit r/m byte with secondary opcode, after primary opcode.
1716 1701      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1717 1702      if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1718 1703      else                               emit_d32(cbuf,con);
1719 1704    %}
1720 1705  
1721 1706    enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1722 1707      // Emit primary opcode and set sign-extend bit
1723 1708      // Check for 8-bit immediate, and set sign extend bit in opcode
1724 1709      int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1725 1710      emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1726 1711      // Emit r/m byte with tertiary opcode, after primary opcode.
1727 1712      emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1728 1713      if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1729 1714      else                               emit_d32(cbuf,con);
1730 1715    %}
1731 1716  
1732 1717    enc_class Lbl (label labl) %{ // JMP, CALL
1733 1718      Label *l = $labl$$label;
1734 1719      emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
1735 1720    %}
1736 1721  
1737 1722    enc_class LblShort (label labl) %{ // JMP, CALL
1738 1723      Label *l = $labl$$label;
1739 1724      int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
1740 1725      assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1741 1726      emit_d8(cbuf, disp);
1742 1727    %}
1743 1728  
1744 1729    enc_class OpcSReg (eRegI dst) %{    // BSWAP
1745 1730      emit_cc(cbuf, $secondary, $dst$$reg );
1746 1731    %}
1747 1732  
1748 1733    enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1749 1734      int destlo = $dst$$reg;
1750 1735      int desthi = HIGH_FROM_LOW(destlo);
1751 1736      // bswap lo
1752 1737      emit_opcode(cbuf, 0x0F);
1753 1738      emit_cc(cbuf, 0xC8, destlo);
1754 1739      // bswap hi
1755 1740      emit_opcode(cbuf, 0x0F);
1756 1741      emit_cc(cbuf, 0xC8, desthi);
1757 1742      // xchg lo and hi
1758 1743      emit_opcode(cbuf, 0x87);
1759 1744      emit_rm(cbuf, 0x3, destlo, desthi);
1760 1745    %}
1761 1746  
1762 1747    enc_class RegOpc (eRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1763 1748      emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1764 1749    %}
1765 1750  
1766 1751    enc_class Jcc (cmpOp cop, label labl) %{    // JCC
1767 1752      Label *l = $labl$$label;
1768 1753      $$$emit8$primary;
1769 1754      emit_cc(cbuf, $secondary, $cop$$cmpcode);
1770 1755      emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
1771 1756    %}
1772 1757  
1773 1758    enc_class JccShort (cmpOp cop, label labl) %{    // JCC
1774 1759      Label *l = $labl$$label;
1775 1760      emit_cc(cbuf, $primary, $cop$$cmpcode);
1776 1761      int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
1777 1762      assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1778 1763      emit_d8(cbuf, disp);
1779 1764    %}
1780 1765  
1781 1766    enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1782 1767      $$$emit8$primary;
1783 1768      emit_cc(cbuf, $secondary, $cop$$cmpcode);
1784 1769    %}
1785 1770  
1786 1771    enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
1787 1772      int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1788 1773      emit_d8(cbuf, op >> 8 );
1789 1774      emit_d8(cbuf, op & 255);
1790 1775    %}
1791 1776  
1792 1777    // emulate a CMOV with a conditional branch around a MOV
1793 1778    enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1794 1779      // Invert sense of branch from sense of CMOV
1795 1780      emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1796 1781      emit_d8( cbuf, $brOffs$$constant );
1797 1782    %}
1798 1783  
1799 1784    enc_class enc_PartialSubtypeCheck( ) %{
1800 1785      Register Redi = as_Register(EDI_enc); // result register
1801 1786      Register Reax = as_Register(EAX_enc); // super class
1802 1787      Register Recx = as_Register(ECX_enc); // killed
1803 1788      Register Resi = as_Register(ESI_enc); // sub class
1804 1789      Label miss;
1805 1790  
1806 1791      MacroAssembler _masm(&cbuf);
1807 1792      __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1808 1793                                       NULL, &miss,
1809 1794                                       /*set_cond_codes:*/ true);
1810 1795      if ($primary) {
1811 1796        __ xorptr(Redi, Redi);
1812 1797      }
1813 1798      __ bind(miss);
1814 1799    %}
1815 1800  
1816 1801    enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1817 1802      MacroAssembler masm(&cbuf);
1818 1803      int start = masm.offset();
1819 1804      if (UseSSE >= 2) {
1820 1805        if (VerifyFPU) {
1821 1806          masm.verify_FPU(0, "must be empty in SSE2+ mode");
1822 1807        }
1823 1808      } else {
1824 1809        // External c_calling_convention expects the FPU stack to be 'clean'.
1825 1810        // Compiled code leaves it dirty.  Do cleanup now.
1826 1811        masm.empty_FPU_stack();
1827 1812      }
1828 1813      if (sizeof_FFree_Float_Stack_All == -1) {
1829 1814        sizeof_FFree_Float_Stack_All = masm.offset() - start;
1830 1815      } else {
1831 1816        assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1832 1817      }
1833 1818    %}
1834 1819  
1835 1820    enc_class Verify_FPU_For_Leaf %{
1836 1821      if( VerifyFPU ) {
1837 1822        MacroAssembler masm(&cbuf);
1838 1823        masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1839 1824      }
1840 1825    %}
1841 1826  
1842 1827    enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1843 1828      // This is the instruction starting address for relocation info.
1844 1829      cbuf.set_insts_mark();
1845 1830      $$$emit8$primary;
1846 1831      // CALL directly to the runtime
1847 1832      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1848 1833                  runtime_call_Relocation::spec(), RELOC_IMM32 );
1849 1834  
1850 1835      if (UseSSE >= 2) {
1851 1836        MacroAssembler _masm(&cbuf);
1852 1837        BasicType rt = tf()->return_type();
1853 1838  
1854 1839        if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1855 1840          // A C runtime call where the return value is unused.  In SSE2+
1856 1841          // mode the result needs to be removed from the FPU stack.  It's
1857 1842          // likely that this function call could be removed by the
1858 1843          // optimizer if the C function is a pure function.
1859 1844          __ ffree(0);
1860 1845        } else if (rt == T_FLOAT) {
1861 1846          __ lea(rsp, Address(rsp, -4));
1862 1847          __ fstp_s(Address(rsp, 0));
1863 1848          __ movflt(xmm0, Address(rsp, 0));
1864 1849          __ lea(rsp, Address(rsp,  4));
1865 1850        } else if (rt == T_DOUBLE) {
1866 1851          __ lea(rsp, Address(rsp, -8));
1867 1852          __ fstp_d(Address(rsp, 0));
1868 1853          __ movdbl(xmm0, Address(rsp, 0));
1869 1854          __ lea(rsp, Address(rsp,  8));
1870 1855        }
1871 1856      }
1872 1857    %}
1873 1858  
1874 1859  
1875 1860    enc_class pre_call_FPU %{
1876 1861      // If method sets FPU control word restore it here
1877 1862      debug_only(int off0 = cbuf.insts_size());
1878 1863      if( Compile::current()->in_24_bit_fp_mode() ) {
1879 1864        MacroAssembler masm(&cbuf);
1880 1865        masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1881 1866      }
1882 1867      debug_only(int off1 = cbuf.insts_size());
1883 1868      assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
1884 1869    %}
1885 1870  
1886 1871    enc_class post_call_FPU %{
1887 1872      // If method sets FPU control word do it here also
1888 1873      if( Compile::current()->in_24_bit_fp_mode() ) {
1889 1874        MacroAssembler masm(&cbuf);
1890 1875        masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1891 1876      }
1892 1877    %}
1893 1878  
1894 1879    enc_class preserve_SP %{
1895 1880      debug_only(int off0 = cbuf.insts_size());
1896 1881      MacroAssembler _masm(&cbuf);
1897 1882      // RBP is preserved across all calls, even compiled calls.
1898 1883      // Use it to preserve RSP in places where the callee might change the SP.
1899 1884      __ movptr(rbp_mh_SP_save, rsp);
1900 1885      debug_only(int off1 = cbuf.insts_size());
1901 1886      assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
1902 1887    %}
1903 1888  
1904 1889    enc_class restore_SP %{
1905 1890      MacroAssembler _masm(&cbuf);
1906 1891      __ movptr(rsp, rbp_mh_SP_save);
1907 1892    %}
1908 1893  
1909 1894    enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1910 1895      // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1911 1896      // who we intended to call.
1912 1897      cbuf.set_insts_mark();
1913 1898      $$$emit8$primary;
1914 1899      if ( !_method ) {
1915 1900        emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916 1901                       runtime_call_Relocation::spec(), RELOC_IMM32 );
1917 1902      } else if(_optimized_virtual) {
1918 1903        emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1919 1904                       opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1920 1905      } else {
1921 1906        emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1922 1907                       static_call_Relocation::spec(), RELOC_IMM32 );
1923 1908      }
1924 1909      if( _method ) {  // Emit stub for static call
1925 1910        emit_java_to_interp(cbuf);
1926 1911      }
1927 1912    %}
1928 1913  
1929 1914    enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1930 1915      // !!!!!
1931 1916      // Generate  "Mov EAX,0x00", placeholder instruction to load oop-info
1932 1917      // emit_call_dynamic_prologue( cbuf );
1933 1918      cbuf.set_insts_mark();
1934 1919      emit_opcode(cbuf, 0xB8 + EAX_enc);        // mov    EAX,-1
1935 1920      emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1936 1921      address  virtual_call_oop_addr = cbuf.insts_mark();
1937 1922      // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1938 1923      // who we intended to call.
1939 1924      cbuf.set_insts_mark();
1940 1925      $$$emit8$primary;
1941 1926      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1942 1927                  virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1943 1928    %}
1944 1929  
1945 1930    enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1946 1931      int disp = in_bytes(methodOopDesc::from_compiled_offset());
1947 1932      assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1948 1933  
1949 1934      // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1950 1935      cbuf.set_insts_mark();
1951 1936      $$$emit8$primary;
1952 1937      emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1953 1938      emit_d8(cbuf, disp);             // Displacement
1954 1939  
1955 1940    %}
1956 1941  
1957 1942    enc_class Xor_Reg (eRegI dst) %{
1958 1943      emit_opcode(cbuf, 0x33);
1959 1944      emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1960 1945    %}
1961 1946  
1962 1947  //   Following encoding is no longer used, but may be restored if calling
1963 1948  //   convention changes significantly.
1964 1949  //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1965 1950  //
1966 1951  //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1967 1952  //     // int ic_reg     = Matcher::inline_cache_reg();
1968 1953  //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1969 1954  //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1970 1955  //     // int imo_encode = Matcher::_regEncode[imo_reg];
1971 1956  //
1972 1957  //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1973 1958  //     // // so we load it immediately before the call
1974 1959  //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1975 1960  //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1976 1961  //
1977 1962  //     // xor rbp,ebp
1978 1963  //     emit_opcode(cbuf, 0x33);
1979 1964  //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1980 1965  //
1981 1966  //     // CALL to interpreter.
1982 1967  //     cbuf.set_insts_mark();
1983 1968  //     $$$emit8$primary;
1984 1969  //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1985 1970  //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1986 1971  //   %}
1987 1972  
1988 1973    enc_class RegOpcImm (eRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1989 1974      $$$emit8$primary;
1990 1975      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1991 1976      $$$emit8$shift$$constant;
1992 1977    %}
1993 1978  
1994 1979    enc_class LdImmI (eRegI dst, immI src) %{    // Load Immediate
1995 1980      // Load immediate does not have a zero or sign extended version
1996 1981      // for 8-bit immediates
1997 1982      emit_opcode(cbuf, 0xB8 + $dst$$reg);
1998 1983      $$$emit32$src$$constant;
1999 1984    %}
2000 1985  
2001 1986    enc_class LdImmP (eRegI dst, immI src) %{    // Load Immediate
2002 1987      // Load immediate does not have a zero or sign extended version
2003 1988      // for 8-bit immediates
2004 1989      emit_opcode(cbuf, $primary + $dst$$reg);
2005 1990      $$$emit32$src$$constant;
2006 1991    %}
2007 1992  
2008 1993    enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
2009 1994      // Load immediate does not have a zero or sign extended version
2010 1995      // for 8-bit immediates
2011 1996      int dst_enc = $dst$$reg;
2012 1997      int src_con = $src$$constant & 0x0FFFFFFFFL;
2013 1998      if (src_con == 0) {
2014 1999        // xor dst, dst
2015 2000        emit_opcode(cbuf, 0x33);
2016 2001        emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2017 2002      } else {
2018 2003        emit_opcode(cbuf, $primary + dst_enc);
2019 2004        emit_d32(cbuf, src_con);
2020 2005      }
2021 2006    %}
2022 2007  
2023 2008    enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2024 2009      // Load immediate does not have a zero or sign extended version
2025 2010      // for 8-bit immediates
2026 2011      int dst_enc = $dst$$reg + 2;
2027 2012      int src_con = ((julong)($src$$constant)) >> 32;
2028 2013      if (src_con == 0) {

↓ open down ↓

646 lines elided

↑ open up ↑

2029 2014        // xor dst, dst
2030 2015        emit_opcode(cbuf, 0x33);
2031 2016        emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2032 2017      } else {
2033 2018        emit_opcode(cbuf, $primary + dst_enc);
2034 2019        emit_d32(cbuf, src_con);
2035 2020      }
2036 2021    %}
2037 2022  
2038 2023  
2039      -  enc_class LdImmD (immD src) %{    // Load Immediate
2040      -    if( is_positive_zero_double($src$$constant)) {
2041      -      // FLDZ
2042      -      emit_opcode(cbuf,0xD9);
2043      -      emit_opcode(cbuf,0xEE);
2044      -    } else if( is_positive_one_double($src$$constant)) {
2045      -      // FLD1
2046      -      emit_opcode(cbuf,0xD9);
2047      -      emit_opcode(cbuf,0xE8);
2048      -    } else {
2049      -      emit_opcode(cbuf,0xDD);
2050      -      emit_rm(cbuf, 0x0, 0x0, 0x5);
2051      -      emit_double_constant(cbuf, $src$$constant);
2052      -    }
2053      -  %}
2054      -
2055      -
2056      -  enc_class LdImmF (immF src) %{    // Load Immediate
2057      -    if( is_positive_zero_float($src$$constant)) {
2058      -      emit_opcode(cbuf,0xD9);
2059      -      emit_opcode(cbuf,0xEE);
2060      -    } else if( is_positive_one_float($src$$constant)) {
2061      -      emit_opcode(cbuf,0xD9);
2062      -      emit_opcode(cbuf,0xE8);
2063      -    } else {
2064      -      $$$emit8$primary;
2065      -      // Load immediate does not have a zero or sign extended version
2066      -      // for 8-bit immediates
2067      -      // First load to TOS, then move to dst
2068      -      emit_rm(cbuf, 0x0, 0x0, 0x5);
2069      -      emit_float_constant(cbuf, $src$$constant);
2070      -    }
2071      -  %}
2072      -
2073      -  enc_class LdImmX (regX dst, immXF con) %{    // Load Immediate
2074      -    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2075      -    emit_float_constant(cbuf, $con$$constant);
2076      -  %}
2077      -
2078      -  enc_class LdImmXD (regXD dst, immXD con) %{    // Load Immediate
2079      -    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2080      -    emit_double_constant(cbuf, $con$$constant);
2081      -  %}
2082      -
2083      -  enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant
2084      -    // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2085      -    emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2086      -    emit_opcode(cbuf, 0x0F);
2087      -    emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2088      -    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2089      -    emit_double_constant(cbuf, $con$$constant);
2090      -  %}
2091      -
2092      -  enc_class Opc_MemImm_F(immF src) %{
2093      -    cbuf.set_insts_mark();
2094      -    $$$emit8$primary;
2095      -    emit_rm(cbuf, 0x0, $secondary, 0x5);
2096      -    emit_float_constant(cbuf, $src$$constant);
2097      -  %}
2098      -
2099      -
2100 2024    enc_class MovI2X_reg(regX dst, eRegI src) %{
2101 2025      emit_opcode(cbuf, 0x66 );     // MOVD dst,src
2102 2026      emit_opcode(cbuf, 0x0F );
2103 2027      emit_opcode(cbuf, 0x6E );
2104 2028      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2105 2029    %}
2106 2030  
2107 2031    enc_class MovX2I_reg(eRegI dst, regX src) %{
2108 2032      emit_opcode(cbuf, 0x66 );     // MOVD dst,src
2109 2033      emit_opcode(cbuf, 0x0F );

2110 2034      emit_opcode(cbuf, 0x7E );
2111 2035      emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2112 2036    %}
2113 2037  
2114 2038    enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
2115 2039      { // MOVD $dst,$src.lo
2116 2040        emit_opcode(cbuf,0x66);
2117 2041        emit_opcode(cbuf,0x0F);
2118 2042        emit_opcode(cbuf,0x6E);
2119 2043        emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2120 2044      }
2121 2045      { // MOVD $tmp,$src.hi
2122 2046        emit_opcode(cbuf,0x66);
2123 2047        emit_opcode(cbuf,0x0F);
2124 2048        emit_opcode(cbuf,0x6E);
2125 2049        emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2126 2050      }
2127 2051      { // PUNPCKLDQ $dst,$tmp
2128 2052        emit_opcode(cbuf,0x66);
2129 2053        emit_opcode(cbuf,0x0F);
2130 2054        emit_opcode(cbuf,0x62);
2131 2055        emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2132 2056       }
2133 2057    %}
2134 2058  
2135 2059    enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2136 2060      { // MOVD $dst.lo,$src
2137 2061        emit_opcode(cbuf,0x66);
2138 2062        emit_opcode(cbuf,0x0F);
2139 2063        emit_opcode(cbuf,0x7E);
2140 2064        emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2141 2065      }
2142 2066      { // PSHUFLW $tmp,$src,0x4E  (01001110b)
2143 2067        emit_opcode(cbuf,0xF2);
2144 2068        emit_opcode(cbuf,0x0F);
2145 2069        emit_opcode(cbuf,0x70);
2146 2070        emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2147 2071        emit_d8(cbuf, 0x4E);
2148 2072      }
2149 2073      { // MOVD $dst.hi,$tmp
2150 2074        emit_opcode(cbuf,0x66);
2151 2075        emit_opcode(cbuf,0x0F);
2152 2076        emit_opcode(cbuf,0x7E);
2153 2077        emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2154 2078      }
2155 2079    %}
2156 2080  
2157 2081  
2158 2082    // Encode a reg-reg copy.  If it is useless, then empty encoding.
2159 2083    enc_class enc_Copy( eRegI dst, eRegI src ) %{
2160 2084      encode_Copy( cbuf, $dst$$reg, $src$$reg );
2161 2085    %}
2162 2086  
2163 2087    enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2164 2088      encode_Copy( cbuf, $dst$$reg, $src$$reg );
2165 2089    %}
2166 2090  
2167 2091    // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2168 2092    enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2169 2093      encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2170 2094    %}
2171 2095  
2172 2096    enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
2173 2097      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2174 2098    %}
2175 2099  
2176 2100    enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2177 2101      $$$emit8$primary;
2178 2102      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2179 2103    %}
2180 2104  
2181 2105    enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2182 2106      $$$emit8$secondary;
2183 2107      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2184 2108    %}
2185 2109  
2186 2110    enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2187 2111      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2188 2112    %}
2189 2113  
2190 2114    enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2191 2115      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2192 2116    %}
2193 2117  
2194 2118    enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2195 2119      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2196 2120    %}
2197 2121  
2198 2122    enc_class Con32 (immI src) %{    // Con32(storeImmI)
2199 2123      // Output immediate
2200 2124      $$$emit32$src$$constant;
2201 2125    %}
2202 2126  
2203 2127    enc_class Con32F_as_bits(immF src) %{        // storeF_imm
2204 2128      // Output Float immediate bits
2205 2129      jfloat jf = $src$$constant;
2206 2130      int    jf_as_bits = jint_cast( jf );
2207 2131      emit_d32(cbuf, jf_as_bits);
2208 2132    %}
2209 2133  
2210 2134    enc_class Con32XF_as_bits(immXF src) %{      // storeX_imm
2211 2135      // Output Float immediate bits
2212 2136      jfloat jf = $src$$constant;
2213 2137      int    jf_as_bits = jint_cast( jf );
2214 2138      emit_d32(cbuf, jf_as_bits);
2215 2139    %}
2216 2140  
2217 2141    enc_class Con16 (immI src) %{    // Con16(storeImmI)
2218 2142      // Output immediate
2219 2143      $$$emit16$src$$constant;
2220 2144    %}
2221 2145  
2222 2146    enc_class Con_d32(immI src) %{
2223 2147      emit_d32(cbuf,$src$$constant);
2224 2148    %}
2225 2149  
2226 2150    enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2227 2151      // Output immediate memory reference
2228 2152      emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2229 2153      emit_d32(cbuf, 0x00);
2230 2154    %}
2231 2155  
2232 2156    enc_class lock_prefix( ) %{
2233 2157      if( os::is_MP() )
2234 2158        emit_opcode(cbuf,0xF0);         // [Lock]
2235 2159    %}
2236 2160  
2237 2161    // Cmp-xchg long value.
2238 2162    // Note: we need to swap rbx, and rcx before and after the
2239 2163    //       cmpxchg8 instruction because the instruction uses
2240 2164    //       rcx as the high order word of the new value to store but
2241 2165    //       our register encoding uses rbx,.
2242 2166    enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2243 2167  
2244 2168      // XCHG  rbx,ecx
2245 2169      emit_opcode(cbuf,0x87);
2246 2170      emit_opcode(cbuf,0xD9);
2247 2171      // [Lock]
2248 2172      if( os::is_MP() )
2249 2173        emit_opcode(cbuf,0xF0);
2250 2174      // CMPXCHG8 [Eptr]
2251 2175      emit_opcode(cbuf,0x0F);
2252 2176      emit_opcode(cbuf,0xC7);
2253 2177      emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2254 2178      // XCHG  rbx,ecx
2255 2179      emit_opcode(cbuf,0x87);
2256 2180      emit_opcode(cbuf,0xD9);
2257 2181    %}
2258 2182  
2259 2183    enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2260 2184      // [Lock]
2261 2185      if( os::is_MP() )
2262 2186        emit_opcode(cbuf,0xF0);
2263 2187  
2264 2188      // CMPXCHG [Eptr]
2265 2189      emit_opcode(cbuf,0x0F);
2266 2190      emit_opcode(cbuf,0xB1);
2267 2191      emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2268 2192    %}
2269 2193  
2270 2194    enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2271 2195      int res_encoding = $res$$reg;
2272 2196  
2273 2197      // MOV  res,0
2274 2198      emit_opcode( cbuf, 0xB8 + res_encoding);
2275 2199      emit_d32( cbuf, 0 );
2276 2200      // JNE,s  fail
2277 2201      emit_opcode(cbuf,0x75);
2278 2202      emit_d8(cbuf, 5 );
2279 2203      // MOV  res,1
2280 2204      emit_opcode( cbuf, 0xB8 + res_encoding);
2281 2205      emit_d32( cbuf, 1 );
2282 2206      // fail:
2283 2207    %}
2284 2208  
2285 2209    enc_class set_instruction_start( ) %{
2286 2210      cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2287 2211    %}
2288 2212  
2289 2213    enc_class RegMem (eRegI ereg, memory mem) %{    // emit_reg_mem
2290 2214      int reg_encoding = $ereg$$reg;
2291 2215      int base  = $mem$$base;
2292 2216      int index = $mem$$index;
2293 2217      int scale = $mem$$scale;
2294 2218      int displace = $mem$$disp;
2295 2219      bool disp_is_oop = $mem->disp_is_oop();
2296 2220      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2297 2221    %}
2298 2222  
2299 2223    enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2300 2224      int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2301 2225      int base  = $mem$$base;
2302 2226      int index = $mem$$index;
2303 2227      int scale = $mem$$scale;
2304 2228      int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2305 2229      assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2306 2230      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2307 2231    %}
2308 2232  
2309 2233    enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2310 2234      int r1, r2;
2311 2235      if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2312 2236      else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2313 2237      emit_opcode(cbuf,0x0F);
2314 2238      emit_opcode(cbuf,$tertiary);
2315 2239      emit_rm(cbuf, 0x3, r1, r2);
2316 2240      emit_d8(cbuf,$cnt$$constant);
2317 2241      emit_d8(cbuf,$primary);
2318 2242      emit_rm(cbuf, 0x3, $secondary, r1);
2319 2243      emit_d8(cbuf,$cnt$$constant);
2320 2244    %}
2321 2245  
2322 2246    enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2323 2247      emit_opcode( cbuf, 0x8B ); // Move
2324 2248      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2325 2249      if( $cnt$$constant > 32 ) { // Shift, if not by zero
2326 2250        emit_d8(cbuf,$primary);
2327 2251        emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2328 2252        emit_d8(cbuf,$cnt$$constant-32);
2329 2253      }
2330 2254      emit_d8(cbuf,$primary);
2331 2255      emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2332 2256      emit_d8(cbuf,31);
2333 2257    %}
2334 2258  
2335 2259    enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2336 2260      int r1, r2;
2337 2261      if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2338 2262      else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2339 2263  
2340 2264      emit_opcode( cbuf, 0x8B ); // Move r1,r2
2341 2265      emit_rm(cbuf, 0x3, r1, r2);
2342 2266      if( $cnt$$constant > 32 ) { // Shift, if not by zero
2343 2267        emit_opcode(cbuf,$primary);
2344 2268        emit_rm(cbuf, 0x3, $secondary, r1);
2345 2269        emit_d8(cbuf,$cnt$$constant-32);
2346 2270      }
2347 2271      emit_opcode(cbuf,0x33);  // XOR r2,r2
2348 2272      emit_rm(cbuf, 0x3, r2, r2);
2349 2273    %}
2350 2274  
2351 2275    // Clone of RegMem but accepts an extra parameter to access each
2352 2276    // half of a double in memory; it never needs relocation info.
2353 2277    enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2354 2278      emit_opcode(cbuf,$opcode$$constant);
2355 2279      int reg_encoding = $rm_reg$$reg;
2356 2280      int base     = $mem$$base;
2357 2281      int index    = $mem$$index;
2358 2282      int scale    = $mem$$scale;
2359 2283      int displace = $mem$$disp + $disp_for_half$$constant;
2360 2284      bool disp_is_oop = false;
2361 2285      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2362 2286    %}
2363 2287  
2364 2288    // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2365 2289    //
2366 2290    // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2367 2291    // and it never needs relocation information.
2368 2292    // Frequently used to move data between FPU's Stack Top and memory.
2369 2293    enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2370 2294      int rm_byte_opcode = $rm_opcode$$constant;
2371 2295      int base     = $mem$$base;
2372 2296      int index    = $mem$$index;
2373 2297      int scale    = $mem$$scale;
2374 2298      int displace = $mem$$disp;
2375 2299      assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2376 2300      encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2377 2301    %}
2378 2302  
2379 2303    enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2380 2304      int rm_byte_opcode = $rm_opcode$$constant;
2381 2305      int base     = $mem$$base;
2382 2306      int index    = $mem$$index;
2383 2307      int scale    = $mem$$scale;
2384 2308      int displace = $mem$$disp;
2385 2309      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2386 2310      encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2387 2311    %}
2388 2312  
2389 2313    enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{    // emit_reg_lea
2390 2314      int reg_encoding = $dst$$reg;
2391 2315      int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2392 2316      int index        = 0x04;            // 0x04 indicates no index
2393 2317      int scale        = 0x00;            // 0x00 indicates no scale
2394 2318      int displace     = $src1$$constant; // 0x00 indicates no displacement
2395 2319      bool disp_is_oop = false;
2396 2320      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2397 2321    %}
2398 2322  
2399 2323    enc_class min_enc (eRegI dst, eRegI src) %{    // MIN
2400 2324      // Compare dst,src
2401 2325      emit_opcode(cbuf,0x3B);
2402 2326      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2403 2327      // jmp dst < src around move
2404 2328      emit_opcode(cbuf,0x7C);
2405 2329      emit_d8(cbuf,2);
2406 2330      // move dst,src
2407 2331      emit_opcode(cbuf,0x8B);
2408 2332      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2409 2333    %}
2410 2334  
2411 2335    enc_class max_enc (eRegI dst, eRegI src) %{    // MAX
2412 2336      // Compare dst,src
2413 2337      emit_opcode(cbuf,0x3B);
2414 2338      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2415 2339      // jmp dst > src around move
2416 2340      emit_opcode(cbuf,0x7F);
2417 2341      emit_d8(cbuf,2);
2418 2342      // move dst,src
2419 2343      emit_opcode(cbuf,0x8B);
2420 2344      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2421 2345    %}
2422 2346  
2423 2347    enc_class enc_FP_store(memory mem, regD src) %{
2424 2348      // If src is FPR1, we can just FST to store it.
2425 2349      // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2426 2350      int reg_encoding = 0x2; // Just store
2427 2351      int base  = $mem$$base;
2428 2352      int index = $mem$$index;
2429 2353      int scale = $mem$$scale;
2430 2354      int displace = $mem$$disp;
2431 2355      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2432 2356      if( $src$$reg != FPR1L_enc ) {
2433 2357        reg_encoding = 0x3;  // Store & pop
2434 2358        emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2435 2359        emit_d8( cbuf, 0xC0-1+$src$$reg );
2436 2360      }
2437 2361      cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2438 2362      emit_opcode(cbuf,$primary);
2439 2363      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2440 2364    %}
2441 2365  
2442 2366    enc_class neg_reg(eRegI dst) %{
2443 2367      // NEG $dst
2444 2368      emit_opcode(cbuf,0xF7);
2445 2369      emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2446 2370    %}
2447 2371  
2448 2372    enc_class setLT_reg(eCXRegI dst) %{
2449 2373      // SETLT $dst
2450 2374      emit_opcode(cbuf,0x0F);
2451 2375      emit_opcode(cbuf,0x9C);
2452 2376      emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2453 2377    %}
2454 2378  
2455 2379    enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2456 2380      int tmpReg = $tmp$$reg;
2457 2381  
2458 2382      // SUB $p,$q
2459 2383      emit_opcode(cbuf,0x2B);
2460 2384      emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2461 2385      // SBB $tmp,$tmp
2462 2386      emit_opcode(cbuf,0x1B);
2463 2387      emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2464 2388      // AND $tmp,$y
2465 2389      emit_opcode(cbuf,0x23);
2466 2390      emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2467 2391      // ADD $p,$tmp
2468 2392      emit_opcode(cbuf,0x03);
2469 2393      emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2470 2394    %}
2471 2395  
2472 2396    enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
2473 2397      int tmpReg = $tmp$$reg;
2474 2398  
2475 2399      // SUB $p,$q
2476 2400      emit_opcode(cbuf,0x2B);
2477 2401      emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2478 2402      // SBB $tmp,$tmp
2479 2403      emit_opcode(cbuf,0x1B);
2480 2404      emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2481 2405      // AND $tmp,$y
2482 2406      cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2483 2407      emit_opcode(cbuf,0x23);
2484 2408      int reg_encoding = tmpReg;
2485 2409      int base  = $mem$$base;
2486 2410      int index = $mem$$index;
2487 2411      int scale = $mem$$scale;
2488 2412      int displace = $mem$$disp;
2489 2413      bool disp_is_oop = $mem->disp_is_oop();
2490 2414      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2491 2415      // ADD $p,$tmp
2492 2416      emit_opcode(cbuf,0x03);
2493 2417      emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2494 2418    %}
2495 2419  
2496 2420    enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2497 2421      // TEST shift,32
2498 2422      emit_opcode(cbuf,0xF7);
2499 2423      emit_rm(cbuf, 0x3, 0, ECX_enc);
2500 2424      emit_d32(cbuf,0x20);
2501 2425      // JEQ,s small
2502 2426      emit_opcode(cbuf, 0x74);
2503 2427      emit_d8(cbuf, 0x04);
2504 2428      // MOV    $dst.hi,$dst.lo
2505 2429      emit_opcode( cbuf, 0x8B );
2506 2430      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2507 2431      // CLR    $dst.lo
2508 2432      emit_opcode(cbuf, 0x33);
2509 2433      emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2510 2434  // small:
2511 2435      // SHLD   $dst.hi,$dst.lo,$shift
2512 2436      emit_opcode(cbuf,0x0F);
2513 2437      emit_opcode(cbuf,0xA5);
2514 2438      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2515 2439      // SHL    $dst.lo,$shift"
2516 2440      emit_opcode(cbuf,0xD3);
2517 2441      emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2518 2442    %}
2519 2443  
2520 2444    enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2521 2445      // TEST shift,32
2522 2446      emit_opcode(cbuf,0xF7);
2523 2447      emit_rm(cbuf, 0x3, 0, ECX_enc);
2524 2448      emit_d32(cbuf,0x20);
2525 2449      // JEQ,s small
2526 2450      emit_opcode(cbuf, 0x74);
2527 2451      emit_d8(cbuf, 0x04);
2528 2452      // MOV    $dst.lo,$dst.hi
2529 2453      emit_opcode( cbuf, 0x8B );
2530 2454      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2531 2455      // CLR    $dst.hi
2532 2456      emit_opcode(cbuf, 0x33);
2533 2457      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2534 2458  // small:
2535 2459      // SHRD   $dst.lo,$dst.hi,$shift
2536 2460      emit_opcode(cbuf,0x0F);
2537 2461      emit_opcode(cbuf,0xAD);
2538 2462      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2539 2463      // SHR    $dst.hi,$shift"
2540 2464      emit_opcode(cbuf,0xD3);
2541 2465      emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2542 2466    %}
2543 2467  
2544 2468    enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2545 2469      // TEST shift,32
2546 2470      emit_opcode(cbuf,0xF7);
2547 2471      emit_rm(cbuf, 0x3, 0, ECX_enc);
2548 2472      emit_d32(cbuf,0x20);
2549 2473      // JEQ,s small
2550 2474      emit_opcode(cbuf, 0x74);
2551 2475      emit_d8(cbuf, 0x05);
2552 2476      // MOV    $dst.lo,$dst.hi
2553 2477      emit_opcode( cbuf, 0x8B );
2554 2478      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2555 2479      // SAR    $dst.hi,31
2556 2480      emit_opcode(cbuf, 0xC1);
2557 2481      emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2558 2482      emit_d8(cbuf, 0x1F );
2559 2483  // small:
2560 2484      // SHRD   $dst.lo,$dst.hi,$shift
2561 2485      emit_opcode(cbuf,0x0F);
2562 2486      emit_opcode(cbuf,0xAD);
2563 2487      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2564 2488      // SAR    $dst.hi,$shift"
2565 2489      emit_opcode(cbuf,0xD3);
2566 2490      emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2567 2491    %}
2568 2492  
2569 2493  
2570 2494    // ----------------- Encodings for floating point unit -----------------
2571 2495    // May leave result in FPU-TOS or FPU reg depending on opcodes
2572 2496    enc_class OpcReg_F (regF src) %{    // FMUL, FDIV
2573 2497      $$$emit8$primary;
2574 2498      emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2575 2499    %}
2576 2500  
2577 2501    // Pop argument in FPR0 with FSTP ST(0)
2578 2502    enc_class PopFPU() %{
2579 2503      emit_opcode( cbuf, 0xDD );
2580 2504      emit_d8( cbuf, 0xD8 );
2581 2505    %}
2582 2506  
2583 2507    // !!!!! equivalent to Pop_Reg_F
2584 2508    enc_class Pop_Reg_D( regD dst ) %{
2585 2509      emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2586 2510      emit_d8( cbuf, 0xD8+$dst$$reg );
2587 2511    %}
2588 2512  
2589 2513    enc_class Push_Reg_D( regD dst ) %{
2590 2514      emit_opcode( cbuf, 0xD9 );
2591 2515      emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2592 2516    %}
2593 2517  
2594 2518    enc_class strictfp_bias1( regD dst ) %{
2595 2519      emit_opcode( cbuf, 0xDB );           // FLD m80real
2596 2520      emit_opcode( cbuf, 0x2D );
2597 2521      emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2598 2522      emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2599 2523      emit_opcode( cbuf, 0xC8+$dst$$reg );
2600 2524    %}
2601 2525  
2602 2526    enc_class strictfp_bias2( regD dst ) %{
2603 2527      emit_opcode( cbuf, 0xDB );           // FLD m80real
2604 2528      emit_opcode( cbuf, 0x2D );
2605 2529      emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2606 2530      emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2607 2531      emit_opcode( cbuf, 0xC8+$dst$$reg );
2608 2532    %}
2609 2533  
2610 2534    // Special case for moving an integer register to a stack slot.
2611 2535    enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2612 2536      store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2613 2537    %}
2614 2538  
2615 2539    // Special case for moving a register to a stack slot.
2616 2540    enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2617 2541      // Opcode already emitted
2618 2542      emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2619 2543      emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2620 2544      emit_d32(cbuf, $dst$$disp);   // Displacement
2621 2545    %}
2622 2546  
2623 2547    // Push the integer in stackSlot 'src' onto FP-stack
2624 2548    enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2625 2549      store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2626 2550    %}
2627 2551  
2628 2552    // Push the float in stackSlot 'src' onto FP-stack
2629 2553    enc_class Push_Mem_F( memory src ) %{    // FLD_S   [ESP+src]
2630 2554      store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2631 2555    %}
2632 2556  
2633 2557    // Push the double in stackSlot 'src' onto FP-stack
2634 2558    enc_class Push_Mem_D( memory src ) %{    // FLD_D   [ESP+src]
2635 2559      store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2636 2560    %}
2637 2561  
2638 2562    // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2639 2563    enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2640 2564      store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2641 2565    %}
2642 2566  
2643 2567    // Same as Pop_Mem_F except for opcode
2644 2568    // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2645 2569    enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2646 2570      store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2647 2571    %}
2648 2572  
2649 2573    enc_class Pop_Reg_F( regF dst ) %{
2650 2574      emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2651 2575      emit_d8( cbuf, 0xD8+$dst$$reg );
2652 2576    %}
2653 2577  
2654 2578    enc_class Push_Reg_F( regF dst ) %{
2655 2579      emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2656 2580      emit_d8( cbuf, 0xC0-1+$dst$$reg );
2657 2581    %}
2658 2582  
2659 2583    // Push FPU's float to a stack-slot, and pop FPU-stack
2660 2584    enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
2661 2585      int pop = 0x02;
2662 2586      if ($src$$reg != FPR1L_enc) {
2663 2587        emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2664 2588        emit_d8( cbuf, 0xC0-1+$src$$reg );
2665 2589        pop = 0x03;
2666 2590      }
2667 2591      store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2668 2592    %}
2669 2593  
2670 2594    // Push FPU's double to a stack-slot, and pop FPU-stack
2671 2595    enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
2672 2596      int pop = 0x02;
2673 2597      if ($src$$reg != FPR1L_enc) {
2674 2598        emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2675 2599        emit_d8( cbuf, 0xC0-1+$src$$reg );
2676 2600        pop = 0x03;
2677 2601      }
2678 2602      store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2679 2603    %}
2680 2604  
2681 2605    // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2682 2606    enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
2683 2607      int pop = 0xD0 - 1; // -1 since we skip FLD
2684 2608      if ($src$$reg != FPR1L_enc) {
2685 2609        emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2686 2610        emit_d8( cbuf, 0xC0-1+$src$$reg );
2687 2611        pop = 0xD8;
2688 2612      }
2689 2613      emit_opcode( cbuf, 0xDD );
2690 2614      emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2691 2615    %}
2692 2616  
2693 2617  
2694 2618    enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
2695 2619      MacroAssembler masm(&cbuf);
2696 2620      masm.fld_s(  $src1$$reg-1);   // nothing at TOS, load TOS from src1.reg
2697 2621      masm.fmul(   $src2$$reg+0);   // value at TOS
2698 2622      masm.fadd(   $src$$reg+0);    // value at TOS
2699 2623      masm.fstp_d( $dst$$reg+0);    // value at TOS, popped off after store
2700 2624    %}
2701 2625  
2702 2626  
2703 2627    enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2704 2628      // load dst in FPR0
2705 2629      emit_opcode( cbuf, 0xD9 );
2706 2630      emit_d8( cbuf, 0xC0-1+$dst$$reg );
2707 2631      if ($src$$reg != FPR1L_enc) {
2708 2632        // fincstp
2709 2633        emit_opcode (cbuf, 0xD9);
2710 2634        emit_opcode (cbuf, 0xF7);
2711 2635        // swap src with FPR1:
2712 2636        // FXCH FPR1 with src
2713 2637        emit_opcode(cbuf, 0xD9);
2714 2638        emit_d8(cbuf, 0xC8-1+$src$$reg );
2715 2639        // fdecstp
2716 2640        emit_opcode (cbuf, 0xD9);
2717 2641        emit_opcode (cbuf, 0xF6);
2718 2642      }
2719 2643    %}
2720 2644  
2721 2645    enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2722 2646      // Allocate a word
2723 2647      emit_opcode(cbuf,0x83);            // SUB ESP,8
2724 2648      emit_opcode(cbuf,0xEC);
2725 2649      emit_d8(cbuf,0x08);
2726 2650  
2727 2651      emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src1
2728 2652      emit_opcode  (cbuf, 0x0F );
2729 2653      emit_opcode  (cbuf, 0x11 );
2730 2654      encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2731 2655  
2732 2656      emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2733 2657      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2734 2658  
2735 2659      emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src0
2736 2660      emit_opcode  (cbuf, 0x0F );
2737 2661      emit_opcode  (cbuf, 0x11 );
2738 2662      encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2739 2663  
2740 2664      emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2741 2665      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2742 2666  
2743 2667    %}
2744 2668  
2745 2669    enc_class Push_ModX_encoding( regX src0, regX src1) %{
2746 2670      // Allocate a word
2747 2671      emit_opcode(cbuf,0x83);            // SUB ESP,4
2748 2672      emit_opcode(cbuf,0xEC);
2749 2673      emit_d8(cbuf,0x04);
2750 2674  
2751 2675      emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src1
2752 2676      emit_opcode  (cbuf, 0x0F );
2753 2677      emit_opcode  (cbuf, 0x11 );
2754 2678      encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2755 2679  
2756 2680      emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2757 2681      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2758 2682  
2759 2683      emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src0
2760 2684      emit_opcode  (cbuf, 0x0F );
2761 2685      emit_opcode  (cbuf, 0x11 );
2762 2686      encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2763 2687  
2764 2688      emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2765 2689      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2766 2690  
2767 2691    %}
2768 2692  
2769 2693    enc_class Push_ResultXD(regXD dst) %{
2770 2694      store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2771 2695  
2772 2696      // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2773 2697      emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2774 2698      emit_opcode  (cbuf, 0x0F );
2775 2699      emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2776 2700      encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2777 2701  
2778 2702      emit_opcode(cbuf,0x83);    // ADD ESP,8
2779 2703      emit_opcode(cbuf,0xC4);
2780 2704      emit_d8(cbuf,0x08);
2781 2705    %}
2782 2706  
2783 2707    enc_class Push_ResultX(regX dst, immI d8) %{
2784 2708      store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2785 2709  
2786 2710      emit_opcode  (cbuf, 0xF3 );     // MOVSS dst(xmm), [ESP]
2787 2711      emit_opcode  (cbuf, 0x0F );
2788 2712      emit_opcode  (cbuf, 0x10 );
2789 2713      encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2790 2714  
2791 2715      emit_opcode(cbuf,0x83);    // ADD ESP,d8 (4 or 8)
2792 2716      emit_opcode(cbuf,0xC4);
2793 2717      emit_d8(cbuf,$d8$$constant);
2794 2718    %}
2795 2719  
2796 2720    enc_class Push_SrcXD(regXD src) %{
2797 2721      // Allocate a word
2798 2722      emit_opcode(cbuf,0x83);            // SUB ESP,8
2799 2723      emit_opcode(cbuf,0xEC);
2800 2724      emit_d8(cbuf,0x08);
2801 2725  
2802 2726      emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src
2803 2727      emit_opcode  (cbuf, 0x0F );
2804 2728      emit_opcode  (cbuf, 0x11 );
2805 2729      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2806 2730  
2807 2731      emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2808 2732      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2809 2733    %}
2810 2734  
2811 2735    enc_class push_stack_temp_qword() %{
2812 2736      emit_opcode(cbuf,0x83);     // SUB ESP,8
2813 2737      emit_opcode(cbuf,0xEC);
2814 2738      emit_d8    (cbuf,0x08);
2815 2739    %}
2816 2740  
2817 2741    enc_class pop_stack_temp_qword() %{
2818 2742      emit_opcode(cbuf,0x83);     // ADD ESP,8
2819 2743      emit_opcode(cbuf,0xC4);
2820 2744      emit_d8    (cbuf,0x08);
2821 2745    %}
2822 2746  
2823 2747    enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2824 2748      emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], xmm_src
2825 2749      emit_opcode  (cbuf, 0x0F );
2826 2750      emit_opcode  (cbuf, 0x11 );
2827 2751      encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2828 2752  
2829 2753      emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2830 2754      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2831 2755    %}
2832 2756  
2833 2757    // Compute X^Y using Intel's fast hardware instructions, if possible.
2834 2758    // Otherwise return a NaN.
2835 2759    enc_class pow_exp_core_encoding %{
2836 2760      // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
2837 2761      emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
2838 2762      emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
2839 2763      emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
2840 2764      emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
2841 2765      emit_opcode(cbuf,0x1C);
2842 2766      emit_d8(cbuf,0x24);
2843 2767      emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
2844 2768      emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
2845 2769      emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
2846 2770      emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
2847 2771      encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2848 2772      emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
2849 2773      emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2850 2774      emit_d32(cbuf,0xFFFFF800);
2851 2775      emit_opcode(cbuf,0x81);                          // add rax,1023 - the double exponent bias
2852 2776      emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2853 2777      emit_d32(cbuf,1023);
2854 2778      emit_opcode(cbuf,0x8B);                          // mov rbx,eax
2855 2779      emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2856 2780      emit_opcode(cbuf,0xC1);                          // shl rax,20 - Slide to exponent position
2857 2781      emit_rm(cbuf,0x3,0x4,EAX_enc);
2858 2782      emit_d8(cbuf,20);
2859 2783      emit_opcode(cbuf,0x85);                          // test rbx,ecx - check for overflow
2860 2784      emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2861 2785      emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45);  // CMOVne rax,ecx - overflow; stuff NAN into EAX
2862 2786      emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2863 2787      emit_opcode(cbuf,0x89);                          // mov [esp+4],eax - Store as part of double word
2864 2788      encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2865 2789      emit_opcode(cbuf,0xC7);                          // mov [esp+0],0   - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2866 2790      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2867 2791      emit_d32(cbuf,0);
2868 2792      emit_opcode(cbuf,0xDC);                          // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2869 2793      encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2870 2794    %}
2871 2795  
2872 2796  //   enc_class Pop_Reg_Mod_D( regD dst, regD src)
2873 2797  //   was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2874 2798  
2875 2799    enc_class Push_Result_Mod_D( regD src) %{
2876 2800      if ($src$$reg != FPR1L_enc) {
2877 2801        // fincstp
2878 2802        emit_opcode (cbuf, 0xD9);
2879 2803        emit_opcode (cbuf, 0xF7);
2880 2804        // FXCH FPR1 with src
2881 2805        emit_opcode(cbuf, 0xD9);
2882 2806        emit_d8(cbuf, 0xC8-1+$src$$reg );
2883 2807        // fdecstp
2884 2808        emit_opcode (cbuf, 0xD9);
2885 2809        emit_opcode (cbuf, 0xF6);
2886 2810      }
2887 2811      // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2888 2812      // // FSTP   FPR$dst$$reg
2889 2813      // emit_opcode( cbuf, 0xDD );
2890 2814      // emit_d8( cbuf, 0xD8+$dst$$reg );
2891 2815    %}
2892 2816  
2893 2817    enc_class fnstsw_sahf_skip_parity() %{
2894 2818      // fnstsw ax
2895 2819      emit_opcode( cbuf, 0xDF );
2896 2820      emit_opcode( cbuf, 0xE0 );
2897 2821      // sahf
2898 2822      emit_opcode( cbuf, 0x9E );
2899 2823      // jnp  ::skip
2900 2824      emit_opcode( cbuf, 0x7B );
2901 2825      emit_opcode( cbuf, 0x05 );
2902 2826    %}
2903 2827  
2904 2828    enc_class emitModD() %{
2905 2829      // fprem must be iterative
2906 2830      // :: loop
2907 2831      // fprem
2908 2832      emit_opcode( cbuf, 0xD9 );
2909 2833      emit_opcode( cbuf, 0xF8 );
2910 2834      // wait
2911 2835      emit_opcode( cbuf, 0x9b );
2912 2836      // fnstsw ax
2913 2837      emit_opcode( cbuf, 0xDF );
2914 2838      emit_opcode( cbuf, 0xE0 );
2915 2839      // sahf
2916 2840      emit_opcode( cbuf, 0x9E );
2917 2841      // jp  ::loop
2918 2842      emit_opcode( cbuf, 0x0F );
2919 2843      emit_opcode( cbuf, 0x8A );
2920 2844      emit_opcode( cbuf, 0xF4 );
2921 2845      emit_opcode( cbuf, 0xFF );
2922 2846      emit_opcode( cbuf, 0xFF );
2923 2847      emit_opcode( cbuf, 0xFF );
2924 2848    %}
2925 2849  
2926 2850    enc_class fpu_flags() %{
2927 2851      // fnstsw_ax
2928 2852      emit_opcode( cbuf, 0xDF);
2929 2853      emit_opcode( cbuf, 0xE0);
2930 2854      // test ax,0x0400
2931 2855      emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2932 2856      emit_opcode( cbuf, 0xA9 );
2933 2857      emit_d16   ( cbuf, 0x0400 );
2934 2858      // // // This sequence works, but stalls for 12-16 cycles on PPro
2935 2859      // // test rax,0x0400
2936 2860      // emit_opcode( cbuf, 0xA9 );
2937 2861      // emit_d32   ( cbuf, 0x00000400 );
2938 2862      //
2939 2863      // jz exit (no unordered comparison)
2940 2864      emit_opcode( cbuf, 0x74 );
2941 2865      emit_d8    ( cbuf, 0x02 );
2942 2866      // mov ah,1 - treat as LT case (set carry flag)
2943 2867      emit_opcode( cbuf, 0xB4 );
2944 2868      emit_d8    ( cbuf, 0x01 );
2945 2869      // sahf
2946 2870      emit_opcode( cbuf, 0x9E);
2947 2871    %}
2948 2872  
2949 2873    enc_class cmpF_P6_fixup() %{
2950 2874      // Fixup the integer flags in case comparison involved a NaN
2951 2875      //
2952 2876      // JNP exit (no unordered comparison, P-flag is set by NaN)
2953 2877      emit_opcode( cbuf, 0x7B );
2954 2878      emit_d8    ( cbuf, 0x03 );
2955 2879      // MOV AH,1 - treat as LT case (set carry flag)
2956 2880      emit_opcode( cbuf, 0xB4 );
2957 2881      emit_d8    ( cbuf, 0x01 );
2958 2882      // SAHF
2959 2883      emit_opcode( cbuf, 0x9E);
2960 2884      // NOP     // target for branch to avoid branch to branch
2961 2885      emit_opcode( cbuf, 0x90);
2962 2886    %}
2963 2887  
2964 2888  //     fnstsw_ax();
2965 2889  //     sahf();
2966 2890  //     movl(dst, nan_result);
2967 2891  //     jcc(Assembler::parity, exit);
2968 2892  //     movl(dst, less_result);
2969 2893  //     jcc(Assembler::below, exit);
2970 2894  //     movl(dst, equal_result);
2971 2895  //     jcc(Assembler::equal, exit);
2972 2896  //     movl(dst, greater_result);
2973 2897  
2974 2898  // less_result     =  1;
2975 2899  // greater_result  = -1;
2976 2900  // equal_result    = 0;
2977 2901  // nan_result      = -1;
2978 2902  
2979 2903    enc_class CmpF_Result(eRegI dst) %{
2980 2904      // fnstsw_ax();
2981 2905      emit_opcode( cbuf, 0xDF);
2982 2906      emit_opcode( cbuf, 0xE0);
2983 2907      // sahf
2984 2908      emit_opcode( cbuf, 0x9E);
2985 2909      // movl(dst, nan_result);
2986 2910      emit_opcode( cbuf, 0xB8 + $dst$$reg);
2987 2911      emit_d32( cbuf, -1 );
2988 2912      // jcc(Assembler::parity, exit);
2989 2913      emit_opcode( cbuf, 0x7A );
2990 2914      emit_d8    ( cbuf, 0x13 );
2991 2915      // movl(dst, less_result);
2992 2916      emit_opcode( cbuf, 0xB8 + $dst$$reg);
2993 2917      emit_d32( cbuf, -1 );
2994 2918      // jcc(Assembler::below, exit);
2995 2919      emit_opcode( cbuf, 0x72 );
2996 2920      emit_d8    ( cbuf, 0x0C );
2997 2921      // movl(dst, equal_result);
2998 2922      emit_opcode( cbuf, 0xB8 + $dst$$reg);
2999 2923      emit_d32( cbuf, 0 );
3000 2924      // jcc(Assembler::equal, exit);
3001 2925      emit_opcode( cbuf, 0x74 );
3002 2926      emit_d8    ( cbuf, 0x05 );
3003 2927      // movl(dst, greater_result);
3004 2928      emit_opcode( cbuf, 0xB8 + $dst$$reg);
3005 2929      emit_d32( cbuf, 1 );
3006 2930    %}
3007 2931  
3008 2932  
3009 2933    // XMM version of CmpF_Result. Because the XMM compare
3010 2934    // instructions set the EFLAGS directly. It becomes simpler than
3011 2935    // the float version above.
3012 2936    enc_class CmpX_Result(eRegI dst) %{
3013 2937      MacroAssembler _masm(&cbuf);
3014 2938      Label nan, inc, done;
3015 2939  
3016 2940      __ jccb(Assembler::parity, nan);
3017 2941      __ jccb(Assembler::equal,  done);
3018 2942      __ jccb(Assembler::above,  inc);
3019 2943      __ bind(nan);
3020 2944      __ decrement(as_Register($dst$$reg)); // NO L qqq
3021 2945      __ jmpb(done);
3022 2946      __ bind(inc);
3023 2947      __ increment(as_Register($dst$$reg)); // NO L qqq
3024 2948      __ bind(done);
3025 2949    %}
3026 2950  
3027 2951    // Compare the longs and set flags
3028 2952    // BROKEN!  Do Not use as-is
3029 2953    enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
3030 2954      // CMP    $src1.hi,$src2.hi
3031 2955      emit_opcode( cbuf, 0x3B );
3032 2956      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3033 2957      // JNE,s  done
3034 2958      emit_opcode(cbuf,0x75);
3035 2959      emit_d8(cbuf, 2 );
3036 2960      // CMP    $src1.lo,$src2.lo
3037 2961      emit_opcode( cbuf, 0x3B );
3038 2962      emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3039 2963  // done:
3040 2964    %}
3041 2965  
3042 2966    enc_class convert_int_long( regL dst, eRegI src ) %{
3043 2967      // mov $dst.lo,$src
3044 2968      int dst_encoding = $dst$$reg;
3045 2969      int src_encoding = $src$$reg;
3046 2970      encode_Copy( cbuf, dst_encoding  , src_encoding );
3047 2971      // mov $dst.hi,$src
3048 2972      encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
3049 2973      // sar $dst.hi,31
3050 2974      emit_opcode( cbuf, 0xC1 );
3051 2975      emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
3052 2976      emit_d8(cbuf, 0x1F );
3053 2977    %}
3054 2978  
3055 2979    enc_class convert_long_double( eRegL src ) %{
3056 2980      // push $src.hi
3057 2981      emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3058 2982      // push $src.lo
3059 2983      emit_opcode(cbuf, 0x50+$src$$reg  );
3060 2984      // fild 64-bits at [SP]
3061 2985      emit_opcode(cbuf,0xdf);
3062 2986      emit_d8(cbuf, 0x6C);
3063 2987      emit_d8(cbuf, 0x24);
3064 2988      emit_d8(cbuf, 0x00);
3065 2989      // pop stack
3066 2990      emit_opcode(cbuf, 0x83); // add  SP, #8
3067 2991      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3068 2992      emit_d8(cbuf, 0x8);
3069 2993    %}
3070 2994  
3071 2995    enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
3072 2996      // IMUL   EDX:EAX,$src1
3073 2997      emit_opcode( cbuf, 0xF7 );
3074 2998      emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
3075 2999      // SAR    EDX,$cnt-32
3076 3000      int shift_count = ((int)$cnt$$constant) - 32;
3077 3001      if (shift_count > 0) {
3078 3002        emit_opcode(cbuf, 0xC1);
3079 3003        emit_rm(cbuf, 0x3, 7, $dst$$reg );
3080 3004        emit_d8(cbuf, shift_count);
3081 3005      }
3082 3006    %}
3083 3007  
3084 3008    // this version doesn't have add sp, 8
3085 3009    enc_class convert_long_double2( eRegL src ) %{
3086 3010      // push $src.hi
3087 3011      emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3088 3012      // push $src.lo
3089 3013      emit_opcode(cbuf, 0x50+$src$$reg  );
3090 3014      // fild 64-bits at [SP]
3091 3015      emit_opcode(cbuf,0xdf);
3092 3016      emit_d8(cbuf, 0x6C);
3093 3017      emit_d8(cbuf, 0x24);
3094 3018      emit_d8(cbuf, 0x00);
3095 3019    %}
3096 3020  
3097 3021    enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
3098 3022      // Basic idea: long = (long)int * (long)int
3099 3023      // IMUL EDX:EAX, src
3100 3024      emit_opcode( cbuf, 0xF7 );
3101 3025      emit_rm( cbuf, 0x3, 0x5, $src$$reg);
3102 3026    %}
3103 3027  
3104 3028    enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
3105 3029      // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
3106 3030      // MUL EDX:EAX, src
3107 3031      emit_opcode( cbuf, 0xF7 );
3108 3032      emit_rm( cbuf, 0x3, 0x4, $src$$reg);
3109 3033    %}
3110 3034  
3111 3035    enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
3112 3036      // Basic idea: lo(result) = lo(x_lo * y_lo)
3113 3037      //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
3114 3038      // MOV    $tmp,$src.lo
3115 3039      encode_Copy( cbuf, $tmp$$reg, $src$$reg );
3116 3040      // IMUL   $tmp,EDX
3117 3041      emit_opcode( cbuf, 0x0F );
3118 3042      emit_opcode( cbuf, 0xAF );
3119 3043      emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3120 3044      // MOV    EDX,$src.hi
3121 3045      encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
3122 3046      // IMUL   EDX,EAX
3123 3047      emit_opcode( cbuf, 0x0F );
3124 3048      emit_opcode( cbuf, 0xAF );
3125 3049      emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
3126 3050      // ADD    $tmp,EDX
3127 3051      emit_opcode( cbuf, 0x03 );
3128 3052      emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3129 3053      // MUL   EDX:EAX,$src.lo
3130 3054      emit_opcode( cbuf, 0xF7 );
3131 3055      emit_rm( cbuf, 0x3, 0x4, $src$$reg );
3132 3056      // ADD    EDX,ESI
3133 3057      emit_opcode( cbuf, 0x03 );
3134 3058      emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
3135 3059    %}
3136 3060  
3137 3061    enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
3138 3062      // Basic idea: lo(result) = lo(src * y_lo)
3139 3063      //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
3140 3064      // IMUL   $tmp,EDX,$src
3141 3065      emit_opcode( cbuf, 0x6B );
3142 3066      emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3143 3067      emit_d8( cbuf, (int)$src$$constant );
3144 3068      // MOV    EDX,$src
3145 3069      emit_opcode(cbuf, 0xB8 + EDX_enc);
3146 3070      emit_d32( cbuf, (int)$src$$constant );
3147 3071      // MUL   EDX:EAX,EDX
3148 3072      emit_opcode( cbuf, 0xF7 );
3149 3073      emit_rm( cbuf, 0x3, 0x4, EDX_enc );
3150 3074      // ADD    EDX,ESI
3151 3075      emit_opcode( cbuf, 0x03 );
3152 3076      emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
3153 3077    %}
3154 3078  
3155 3079    enc_class long_div( eRegL src1, eRegL src2 ) %{
3156 3080      // PUSH src1.hi
3157 3081      emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3158 3082      // PUSH src1.lo
3159 3083      emit_opcode(cbuf,               0x50+$src1$$reg  );
3160 3084      // PUSH src2.hi
3161 3085      emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3162 3086      // PUSH src2.lo
3163 3087      emit_opcode(cbuf,               0x50+$src2$$reg  );
3164 3088      // CALL directly to the runtime
3165 3089      cbuf.set_insts_mark();
3166 3090      emit_opcode(cbuf,0xE8);       // Call into runtime
3167 3091      emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3168 3092      // Restore stack
3169 3093      emit_opcode(cbuf, 0x83); // add  SP, #framesize
3170 3094      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3171 3095      emit_d8(cbuf, 4*4);
3172 3096    %}
3173 3097  
3174 3098    enc_class long_mod( eRegL src1, eRegL src2 ) %{
3175 3099      // PUSH src1.hi
3176 3100      emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3177 3101      // PUSH src1.lo
3178 3102      emit_opcode(cbuf,               0x50+$src1$$reg  );
3179 3103      // PUSH src2.hi
3180 3104      emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3181 3105      // PUSH src2.lo
3182 3106      emit_opcode(cbuf,               0x50+$src2$$reg  );
3183 3107      // CALL directly to the runtime
3184 3108      cbuf.set_insts_mark();
3185 3109      emit_opcode(cbuf,0xE8);       // Call into runtime
3186 3110      emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3187 3111      // Restore stack
3188 3112      emit_opcode(cbuf, 0x83); // add  SP, #framesize
3189 3113      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3190 3114      emit_d8(cbuf, 4*4);
3191 3115    %}
3192 3116  
3193 3117    enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
3194 3118      // MOV   $tmp,$src.lo
3195 3119      emit_opcode(cbuf, 0x8B);
3196 3120      emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
3197 3121      // OR    $tmp,$src.hi
3198 3122      emit_opcode(cbuf, 0x0B);
3199 3123      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
3200 3124    %}
3201 3125  
3202 3126    enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
3203 3127      // CMP    $src1.lo,$src2.lo
3204 3128      emit_opcode( cbuf, 0x3B );
3205 3129      emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3206 3130      // JNE,s  skip
3207 3131      emit_cc(cbuf, 0x70, 0x5);
3208 3132      emit_d8(cbuf,2);
3209 3133      // CMP    $src1.hi,$src2.hi
3210 3134      emit_opcode( cbuf, 0x3B );
3211 3135      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3212 3136    %}
3213 3137  
3214 3138    enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
3215 3139      // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
3216 3140      emit_opcode( cbuf, 0x3B );
3217 3141      emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3218 3142      // MOV    $tmp,$src1.hi
3219 3143      emit_opcode( cbuf, 0x8B );
3220 3144      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
3221 3145      // SBB   $tmp,$src2.hi\t! Compute flags for long compare
3222 3146      emit_opcode( cbuf, 0x1B );
3223 3147      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
3224 3148    %}
3225 3149  
3226 3150    enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
3227 3151      // XOR    $tmp,$tmp
3228 3152      emit_opcode(cbuf,0x33);  // XOR
3229 3153      emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3230 3154      // CMP    $tmp,$src.lo
3231 3155      emit_opcode( cbuf, 0x3B );
3232 3156      emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3233 3157      // SBB    $tmp,$src.hi
3234 3158      emit_opcode( cbuf, 0x1B );
3235 3159      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3236 3160    %}
3237 3161  
3238 3162   // Sniff, sniff... smells like Gnu Superoptimizer
3239 3163    enc_class neg_long( eRegL dst ) %{
3240 3164      emit_opcode(cbuf,0xF7);    // NEG hi
3241 3165      emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3242 3166      emit_opcode(cbuf,0xF7);    // NEG lo
3243 3167      emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
3244 3168      emit_opcode(cbuf,0x83);    // SBB hi,0
3245 3169      emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3246 3170      emit_d8    (cbuf,0 );
3247 3171    %}
3248 3172  
3249 3173    enc_class movq_ld(regXD dst, memory mem) %{
3250 3174      MacroAssembler _masm(&cbuf);
3251 3175      __ movq($dst$$XMMRegister, $mem$$Address);
3252 3176    %}
3253 3177  
3254 3178    enc_class movq_st(memory mem, regXD src) %{
3255 3179      MacroAssembler _masm(&cbuf);
3256 3180      __ movq($mem$$Address, $src$$XMMRegister);
3257 3181    %}
3258 3182  
3259 3183    enc_class pshufd_8x8(regX dst, regX src) %{
3260 3184      MacroAssembler _masm(&cbuf);
3261 3185  
3262 3186      encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3263 3187      __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3264 3188      __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3265 3189    %}
3266 3190  
3267 3191    enc_class pshufd_4x16(regX dst, regX src) %{
3268 3192      MacroAssembler _masm(&cbuf);
3269 3193  
3270 3194      __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3271 3195    %}
3272 3196  
3273 3197    enc_class pshufd(regXD dst, regXD src, int mode) %{
3274 3198      MacroAssembler _masm(&cbuf);
3275 3199  
3276 3200      __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3277 3201    %}
3278 3202  
3279 3203    enc_class pxor(regXD dst, regXD src) %{
3280 3204      MacroAssembler _masm(&cbuf);
3281 3205  
3282 3206      __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3283 3207    %}
3284 3208  
3285 3209    enc_class mov_i2x(regXD dst, eRegI src) %{
3286 3210      MacroAssembler _masm(&cbuf);
3287 3211  
3288 3212      __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3289 3213    %}
3290 3214  
3291 3215  
3292 3216    // Because the transitions from emitted code to the runtime
3293 3217    // monitorenter/exit helper stubs are so slow it's critical that
3294 3218    // we inline both the stack-locking fast-path and the inflated fast path.
3295 3219    //
3296 3220    // See also: cmpFastLock and cmpFastUnlock.
3297 3221    //
3298 3222    // What follows is a specialized inline transliteration of the code
3299 3223    // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
3300 3224    // another option would be to emit TrySlowEnter and TrySlowExit methods
3301 3225    // at startup-time.  These methods would accept arguments as
3302 3226    // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3303 3227    // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
3304 3228    // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3305 3229    // In practice, however, the # of lock sites is bounded and is usually small.
3306 3230    // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3307 3231    // if the processor uses simple bimodal branch predictors keyed by EIP
3308 3232    // Since the helper routines would be called from multiple synchronization
3309 3233    // sites.
3310 3234    //
3311 3235    // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3312 3236    // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
3313 3237    // to those specialized methods.  That'd give us a mostly platform-independent
3314 3238    // implementation that the JITs could optimize and inline at their pleasure.
3315 3239    // Done correctly, the only time we'd need to cross to native could would be
3316 3240    // to park() or unpark() threads.  We'd also need a few more unsafe operators
3317 3241    // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
3318 3242    // (b) explicit barriers or fence operations.
3319 3243    //
3320 3244    // TODO:
3321 3245    //
3322 3246    // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
3323 3247    //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
3324 3248    //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
3325 3249    //    the lock operators would typically be faster than reifying Self.
3326 3250    //
3327 3251    // *  Ideally I'd define the primitives as:
3328 3252    //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
3329 3253    //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
3330 3254    //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
3331 3255    //    Instead, we're stuck with a rather awkward and brittle register assignments below.
3332 3256    //    Furthermore the register assignments are overconstrained, possibly resulting in
3333 3257    //    sub-optimal code near the synchronization site.
3334 3258    //
3335 3259    // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
3336 3260    //    Alternately, use a better sp-proximity test.
3337 3261    //
3338 3262    // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
3339 3263    //    Either one is sufficient to uniquely identify a thread.
3340 3264    //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
3341 3265    //
3342 3266    // *  Intrinsify notify() and notifyAll() for the common cases where the
3343 3267    //    object is locked by the calling thread but the waitlist is empty.
3344 3268    //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
3345 3269    //
3346 3270    // *  use jccb and jmpb instead of jcc and jmp to improve code density.
3347 3271    //    But beware of excessive branch density on AMD Opterons.
3348 3272    //
3349 3273    // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
3350 3274    //    or failure of the fast-path.  If the fast-path fails then we pass
3351 3275    //    control to the slow-path, typically in C.  In Fast_Lock and
3352 3276    //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
3353 3277    //    will emit a conditional branch immediately after the node.
3354 3278    //    So we have branches to branches and lots of ICC.ZF games.
3355 3279    //    Instead, it might be better to have C2 pass a "FailureLabel"
3356 3280    //    into Fast_Lock and Fast_Unlock.  In the case of success, control
3357 3281    //    will drop through the node.  ICC.ZF is undefined at exit.
3358 3282    //    In the case of failure, the node will branch directly to the
3359 3283    //    FailureLabel
3360 3284  
3361 3285  
3362 3286    // obj: object to lock
3363 3287    // box: on-stack box address (displaced header location) - KILLED
3364 3288    // rax,: tmp -- KILLED
3365 3289    // scr: tmp -- KILLED
3366 3290    enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
3367 3291  
3368 3292      Register objReg = as_Register($obj$$reg);
3369 3293      Register boxReg = as_Register($box$$reg);
3370 3294      Register tmpReg = as_Register($tmp$$reg);
3371 3295      Register scrReg = as_Register($scr$$reg);
3372 3296  
3373 3297      // Ensure the register assignents are disjoint
3374 3298      guarantee (objReg != boxReg, "") ;
3375 3299      guarantee (objReg != tmpReg, "") ;
3376 3300      guarantee (objReg != scrReg, "") ;
3377 3301      guarantee (boxReg != tmpReg, "") ;
3378 3302      guarantee (boxReg != scrReg, "") ;
3379 3303      guarantee (tmpReg == as_Register(EAX_enc), "") ;
3380 3304  
3381 3305      MacroAssembler masm(&cbuf);
3382 3306  
3383 3307      if (_counters != NULL) {
3384 3308        masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3385 3309      }
3386 3310      if (EmitSync & 1) {
3387 3311          // set box->dhw = unused_mark (3)
3388 3312          // Force all sync thru slow-path: slow_enter() and slow_exit() 
3389 3313          masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;             
3390 3314          masm.cmpptr (rsp, (int32_t)0) ;                        
3391 3315      } else 
3392 3316      if (EmitSync & 2) { 
3393 3317          Label DONE_LABEL ;           
3394 3318          if (UseBiasedLocking) {
3395 3319             // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3396 3320             masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3397 3321          }
3398 3322  
3399 3323          masm.movptr(tmpReg, Address(objReg, 0)) ;          // fetch markword 
3400 3324          masm.orptr (tmpReg, 0x1);
3401 3325          masm.movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS 
3402 3326          if (os::is_MP()) { masm.lock();  }
3403 3327          masm.cmpxchgptr(boxReg, Address(objReg, 0));          // Updates tmpReg
3404 3328          masm.jcc(Assembler::equal, DONE_LABEL);
3405 3329          // Recursive locking
3406 3330          masm.subptr(tmpReg, rsp);
3407 3331          masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3408 3332          masm.movptr(Address(boxReg, 0), tmpReg);
3409 3333          masm.bind(DONE_LABEL) ; 
3410 3334      } else {  
3411 3335        // Possible cases that we'll encounter in fast_lock 
3412 3336        // ------------------------------------------------
3413 3337        // * Inflated
3414 3338        //    -- unlocked
3415 3339        //    -- Locked
3416 3340        //       = by self
3417 3341        //       = by other
3418 3342        // * biased
3419 3343        //    -- by Self
3420 3344        //    -- by other
3421 3345        // * neutral
3422 3346        // * stack-locked
3423 3347        //    -- by self
3424 3348        //       = sp-proximity test hits
3425 3349        //       = sp-proximity test generates false-negative
3426 3350        //    -- by other
3427 3351        //
3428 3352  
3429 3353        Label IsInflated, DONE_LABEL, PopDone ;
3430 3354  
3431 3355        // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3432 3356        // order to reduce the number of conditional branches in the most common cases.
3433 3357        // Beware -- there's a subtle invariant that fetch of the markword
3434 3358        // at [FETCH], below, will never observe a biased encoding (*101b).
3435 3359        // If this invariant is not held we risk exclusion (safety) failure.
3436 3360        if (UseBiasedLocking && !UseOptoBiasInlining) {
3437 3361          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3438 3362        }
3439 3363  
3440 3364        masm.movptr(tmpReg, Address(objReg, 0)) ;         // [FETCH]
3441 3365        masm.testptr(tmpReg, 0x02) ;                      // Inflated v (Stack-locked or neutral)
3442 3366        masm.jccb  (Assembler::notZero, IsInflated) ;
3443 3367  
3444 3368        // Attempt stack-locking ...
3445 3369        masm.orptr (tmpReg, 0x1);
3446 3370        masm.movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
3447 3371        if (os::is_MP()) { masm.lock();  }
3448 3372        masm.cmpxchgptr(boxReg, Address(objReg, 0));           // Updates tmpReg
3449 3373        if (_counters != NULL) {
3450 3374          masm.cond_inc32(Assembler::equal,
3451 3375                          ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3452 3376        }
3453 3377        masm.jccb (Assembler::equal, DONE_LABEL);
3454 3378  
3455 3379        // Recursive locking
3456 3380        masm.subptr(tmpReg, rsp);
3457 3381        masm.andptr(tmpReg, 0xFFFFF003 );
3458 3382        masm.movptr(Address(boxReg, 0), tmpReg);
3459 3383        if (_counters != NULL) {
3460 3384          masm.cond_inc32(Assembler::equal,
3461 3385                          ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3462 3386        }
3463 3387        masm.jmp  (DONE_LABEL) ;
3464 3388  
3465 3389        masm.bind (IsInflated) ;
3466 3390  
3467 3391        // The object is inflated.
3468 3392        //
3469 3393        // TODO-FIXME: eliminate the ugly use of manifest constants:
3470 3394        //   Use markOopDesc::monitor_value instead of "2".
3471 3395        //   use markOop::unused_mark() instead of "3".
3472 3396        // The tmpReg value is an objectMonitor reference ORed with
3473 3397        // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
3474 3398        // objectmonitor pointer by masking off the "2" bit or we can just
3475 3399        // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3476 3400        // field offsets with "-2" to compensate for and annul the low-order tag bit.
3477 3401        //
3478 3402        // I use the latter as it avoids AGI stalls.
3479 3403        // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3480 3404        // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3481 3405        //
3482 3406        #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3483 3407  
3484 3408        // boxReg refers to the on-stack BasicLock in the current frame.
3485 3409        // We'd like to write:
3486 3410        //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
3487 3411        // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
3488 3412        // additional latency as we have another ST in the store buffer that must drain.
3489 3413  
3490 3414        if (EmitSync & 8192) { 
3491 3415           masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
3492 3416           masm.get_thread (scrReg) ; 
3493 3417           masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2] 
3494 3418           masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
3495 3419           if (os::is_MP()) { masm.lock(); } 
3496 3420           masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3497 3421        } else 
3498 3422        if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
3499 3423           masm.movptr(scrReg, boxReg) ; 
3500 3424           masm.movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2] 
3501 3425  
3502 3426           // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3503 3427           if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3504 3428              // prefetchw [eax + Offset(_owner)-2]
3505 3429              masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3506 3430           }
3507 3431  
3508 3432           if ((EmitSync & 64) == 0) {
3509 3433             // Optimistic form: consider XORL tmpReg,tmpReg
3510 3434             masm.movptr(tmpReg, NULL_WORD) ; 
3511 3435           } else { 
3512 3436             // Can suffer RTS->RTO upgrades on shared or cold $ lines
3513 3437             // Test-And-CAS instead of CAS
3514 3438             masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3515 3439             masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3516 3440             masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3517 3441           }
3518 3442  
3519 3443           // Appears unlocked - try to swing _owner from null to non-null.
3520 3444           // Ideally, I'd manifest "Self" with get_thread and then attempt
3521 3445           // to CAS the register containing Self into m->Owner.
3522 3446           // But we don't have enough registers, so instead we can either try to CAS
3523 3447           // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
3524 3448           // we later store "Self" into m->Owner.  Transiently storing a stack address
3525 3449           // (rsp or the address of the box) into  m->owner is harmless.
3526 3450           // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3527 3451           if (os::is_MP()) { masm.lock();  }
3528 3452           masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3529 3453           masm.movptr(Address(scrReg, 0), 3) ;          // box->_displaced_header = 3
3530 3454           masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3531 3455           masm.get_thread (scrReg) ;                    // beware: clobbers ICCs
3532 3456           masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 
3533 3457           masm.xorptr(boxReg, boxReg) ;                 // set icc.ZFlag = 1 to indicate success
3534 3458                         
3535 3459           // If the CAS fails we can either retry or pass control to the slow-path.  
3536 3460           // We use the latter tactic.  
3537 3461           // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3538 3462           // If the CAS was successful ...
3539 3463           //   Self has acquired the lock
3540 3464           //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3541 3465           // Intentional fall-through into DONE_LABEL ...
3542 3466        } else {
3543 3467           masm.movptr(Address(boxReg, 0), 3) ;       // results in ST-before-CAS penalty
3544 3468           masm.movptr(boxReg, tmpReg) ; 
3545 3469  
3546 3470           // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3547 3471           if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3548 3472              // prefetchw [eax + Offset(_owner)-2]
3549 3473              masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3550 3474           }
3551 3475  
3552 3476           if ((EmitSync & 64) == 0) {
3553 3477             // Optimistic form
3554 3478             masm.xorptr  (tmpReg, tmpReg) ; 
3555 3479           } else { 
3556 3480             // Can suffer RTS->RTO upgrades on shared or cold $ lines
3557 3481             masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3558 3482             masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3559 3483             masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3560 3484           }
3561 3485  
3562 3486           // Appears unlocked - try to swing _owner from null to non-null.
3563 3487           // Use either "Self" (in scr) or rsp as thread identity in _owner.
3564 3488           // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3565 3489           masm.get_thread (scrReg) ;
3566 3490           if (os::is_MP()) { masm.lock(); }
3567 3491           masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3568 3492  
3569 3493           // If the CAS fails we can either retry or pass control to the slow-path.
3570 3494           // We use the latter tactic.
3571 3495           // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3572 3496           // If the CAS was successful ...
3573 3497           //   Self has acquired the lock
3574 3498           //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3575 3499           // Intentional fall-through into DONE_LABEL ...
3576 3500        }
3577 3501  
3578 3502        // DONE_LABEL is a hot target - we'd really like to place it at the
3579 3503        // start of cache line by padding with NOPs.
3580 3504        // See the AMD and Intel software optimization manuals for the
3581 3505        // most efficient "long" NOP encodings.
3582 3506        // Unfortunately none of our alignment mechanisms suffice.
3583 3507        masm.bind(DONE_LABEL);
3584 3508  
3585 3509        // Avoid branch-to-branch on AMD processors
3586 3510        // This appears to be superstition.
3587 3511        if (EmitSync & 32) masm.nop() ;
3588 3512  
3589 3513  
3590 3514        // At DONE_LABEL the icc ZFlag is set as follows ...
3591 3515        // Fast_Unlock uses the same protocol.
3592 3516        // ZFlag == 1 -> Success
3593 3517        // ZFlag == 0 -> Failure - force control through the slow-path
3594 3518      }
3595 3519    %}
3596 3520  
3597 3521    // obj: object to unlock
3598 3522    // box: box address (displaced header location), killed.  Must be EAX.
3599 3523    // rbx,: killed tmp; cannot be obj nor box.
3600 3524    //
3601 3525    // Some commentary on balanced locking:
3602 3526    //
3603 3527    // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3604 3528    // Methods that don't have provably balanced locking are forced to run in the
3605 3529    // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3606 3530    // The interpreter provides two properties:
3607 3531    // I1:  At return-time the interpreter automatically and quietly unlocks any
3608 3532    //      objects acquired the current activation (frame).  Recall that the
3609 3533    //      interpreter maintains an on-stack list of locks currently held by
3610 3534    //      a frame.
3611 3535    // I2:  If a method attempts to unlock an object that is not held by the
3612 3536    //      the frame the interpreter throws IMSX.
3613 3537    //
3614 3538    // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3615 3539    // B() doesn't have provably balanced locking so it runs in the interpreter.
3616 3540    // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
3617 3541    // is still locked by A().
3618 3542    //
3619 3543    // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
3620 3544    // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3621 3545    // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
3622 3546    // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3623 3547  
3624 3548    enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3625 3549  
3626 3550      Register objReg = as_Register($obj$$reg);
3627 3551      Register boxReg = as_Register($box$$reg);
3628 3552      Register tmpReg = as_Register($tmp$$reg);
3629 3553  
3630 3554      guarantee (objReg != boxReg, "") ;
3631 3555      guarantee (objReg != tmpReg, "") ;
3632 3556      guarantee (boxReg != tmpReg, "") ;
3633 3557      guarantee (boxReg == as_Register(EAX_enc), "") ;
3634 3558      MacroAssembler masm(&cbuf);
3635 3559  
3636 3560      if (EmitSync & 4) {
3637 3561        // Disable - inhibit all inlining.  Force control through the slow-path
3638 3562        masm.cmpptr (rsp, 0) ; 
3639 3563      } else 
3640 3564      if (EmitSync & 8) {
3641 3565        Label DONE_LABEL ;
3642 3566        if (UseBiasedLocking) {
3643 3567           masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3644 3568        }
3645 3569        // classic stack-locking code ...
3646 3570        masm.movptr(tmpReg, Address(boxReg, 0)) ;
3647 3571        masm.testptr(tmpReg, tmpReg) ;
3648 3572        masm.jcc   (Assembler::zero, DONE_LABEL) ;
3649 3573        if (os::is_MP()) { masm.lock(); }
3650 3574        masm.cmpxchgptr(tmpReg, Address(objReg, 0));          // Uses EAX which is box
3651 3575        masm.bind(DONE_LABEL);
3652 3576      } else {
3653 3577        Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3654 3578  
3655 3579        // Critically, the biased locking test must have precedence over
3656 3580        // and appear before the (box->dhw == 0) recursive stack-lock test.
3657 3581        if (UseBiasedLocking && !UseOptoBiasInlining) {
3658 3582           masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3659 3583        }
3660 3584        
3661 3585        masm.cmpptr(Address(boxReg, 0), 0) ;            // Examine the displaced header
3662 3586        masm.movptr(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
3663 3587        masm.jccb  (Assembler::zero, DONE_LABEL) ;      // 0 indicates recursive stack-lock
3664 3588  
3665 3589        masm.testptr(tmpReg, 0x02) ;                     // Inflated? 
3666 3590        masm.jccb  (Assembler::zero, Stacked) ;
3667 3591  
3668 3592        masm.bind  (Inflated) ;
3669 3593        // It's inflated.
3670 3594        // Despite our balanced locking property we still check that m->_owner == Self
3671 3595        // as java routines or native JNI code called by this thread might
3672 3596        // have released the lock.
3673 3597        // Refer to the comments in synchronizer.cpp for how we might encode extra
3674 3598        // state in _succ so we can avoid fetching EntryList|cxq.
3675 3599        //
3676 3600        // I'd like to add more cases in fast_lock() and fast_unlock() --
3677 3601        // such as recursive enter and exit -- but we have to be wary of
3678 3602        // I$ bloat, T$ effects and BP$ effects.
3679 3603        //
3680 3604        // If there's no contention try a 1-0 exit.  That is, exit without
3681 3605        // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
3682 3606        // we detect and recover from the race that the 1-0 exit admits.
3683 3607        //
3684 3608        // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3685 3609        // before it STs null into _owner, releasing the lock.  Updates
3686 3610        // to data protected by the critical section must be visible before
3687 3611        // we drop the lock (and thus before any other thread could acquire
3688 3612        // the lock and observe the fields protected by the lock).
3689 3613        // IA32's memory-model is SPO, so STs are ordered with respect to
3690 3614        // each other and there's no need for an explicit barrier (fence).
3691 3615        // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3692 3616  
3693 3617        masm.get_thread (boxReg) ;
3694 3618        if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) {
3695 3619          // prefetchw [ebx + Offset(_owner)-2]
3696 3620          masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3697 3621        }
3698 3622  
3699 3623        // Note that we could employ various encoding schemes to reduce
3700 3624        // the number of loads below (currently 4) to just 2 or 3.
3701 3625        // Refer to the comments in synchronizer.cpp.
3702 3626        // In practice the chain of fetches doesn't seem to impact performance, however.
3703 3627        if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3704 3628           // Attempt to reduce branch density - AMD's branch predictor.
3705 3629           masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3706 3630           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3707 3631           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3708 3632           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3709 3633           masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3710 3634           masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3711 3635           masm.jmpb  (DONE_LABEL) ; 
3712 3636        } else { 
3713 3637           masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3714 3638           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3715 3639           masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3716 3640           masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3717 3641           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3718 3642           masm.jccb  (Assembler::notZero, CheckSucc) ; 
3719 3643           masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3720 3644           masm.jmpb  (DONE_LABEL) ; 
3721 3645        }
3722 3646  
3723 3647        // The Following code fragment (EmitSync & 65536) improves the performance of
3724 3648        // contended applications and contended synchronization microbenchmarks.
3725 3649        // Unfortunately the emission of the code - even though not executed - causes regressions
3726 3650        // in scimark and jetstream, evidently because of $ effects.  Replacing the code
3727 3651        // with an equal number of never-executed NOPs results in the same regression.
3728 3652        // We leave it off by default.
3729 3653  
3730 3654        if ((EmitSync & 65536) != 0) {
3731 3655           Label LSuccess, LGoSlowPath ;
3732 3656  
3733 3657           masm.bind  (CheckSucc) ;
3734 3658  
3735 3659           // Optional pre-test ... it's safe to elide this
3736 3660           if ((EmitSync & 16) == 0) { 
3737 3661              masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3738 3662              masm.jccb  (Assembler::zero, LGoSlowPath) ; 
3739 3663           }
3740 3664  
3741 3665           // We have a classic Dekker-style idiom:
3742 3666           //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
3743 3667           // There are a number of ways to implement the barrier:
3744 3668           // (1) lock:andl &m->_owner, 0
3745 3669           //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3746 3670           //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3747 3671           //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3748 3672           // (2) If supported, an explicit MFENCE is appealing.
3749 3673           //     In older IA32 processors MFENCE is slower than lock:add or xchg
3750 3674           //     particularly if the write-buffer is full as might be the case if
3751 3675           //     if stores closely precede the fence or fence-equivalent instruction.
3752 3676           //     In more modern implementations MFENCE appears faster, however.
3753 3677           // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3754 3678           //     The $lines underlying the top-of-stack should be in M-state.
3755 3679           //     The locked add instruction is serializing, of course.
3756 3680           // (4) Use xchg, which is serializing
3757 3681           //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3758 3682           // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3759 3683           //     The integer condition codes will tell us if succ was 0.
3760 3684           //     Since _succ and _owner should reside in the same $line and
3761 3685           //     we just stored into _owner, it's likely that the $line
3762 3686           //     remains in M-state for the lock:orl.
3763 3687           //
3764 3688           // We currently use (3), although it's likely that switching to (2)
3765 3689           // is correct for the future.
3766 3690              
3767 3691           masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3768 3692           if (os::is_MP()) { 
3769 3693              if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 
3770 3694                masm.mfence();
3771 3695              } else { 
3772 3696                masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; 
3773 3697              }
3774 3698           }
3775 3699           // Ratify _succ remains non-null
3776 3700           masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3777 3701           masm.jccb  (Assembler::notZero, LSuccess) ; 
3778 3702  
3779 3703           masm.xorptr(boxReg, boxReg) ;                  // box is really EAX
3780 3704           if (os::is_MP()) { masm.lock(); }
3781 3705           masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3782 3706           masm.jccb  (Assembler::notEqual, LSuccess) ;
3783 3707           // Since we're low on registers we installed rsp as a placeholding in _owner.
3784 3708           // Now install Self over rsp.  This is safe as we're transitioning from
3785 3709           // non-null to non=null
3786 3710           masm.get_thread (boxReg) ;
3787 3711           masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3788 3712           // Intentional fall-through into LGoSlowPath ...
3789 3713  
3790 3714           masm.bind  (LGoSlowPath) ; 
3791 3715           masm.orptr(boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3792 3716           masm.jmpb  (DONE_LABEL) ; 
3793 3717  
3794 3718           masm.bind  (LSuccess) ; 
3795 3719           masm.xorptr(boxReg, boxReg) ;                 // set ICC.ZF=1 to indicate success
3796 3720           masm.jmpb  (DONE_LABEL) ; 
3797 3721        }
3798 3722  
3799 3723        masm.bind (Stacked) ;
3800 3724        // It's not inflated and it's not recursively stack-locked and it's not biased.
3801 3725        // It must be stack-locked.
3802 3726        // Try to reset the header to displaced header.
3803 3727        // The "box" value on the stack is stable, so we can reload
3804 3728        // and be assured we observe the same value as above.
3805 3729        masm.movptr(tmpReg, Address(boxReg, 0)) ;
3806 3730        if (os::is_MP()) {   masm.lock();    }
3807 3731        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3808 3732        // Intention fall-thru into DONE_LABEL
3809 3733  
3810 3734  
3811 3735        // DONE_LABEL is a hot target - we'd really like to place it at the
3812 3736        // start of cache line by padding with NOPs.
3813 3737        // See the AMD and Intel software optimization manuals for the
3814 3738        // most efficient "long" NOP encodings.
3815 3739        // Unfortunately none of our alignment mechanisms suffice.
3816 3740        if ((EmitSync & 65536) == 0) {
3817 3741           masm.bind (CheckSucc) ;
3818 3742        }
3819 3743        masm.bind(DONE_LABEL);
3820 3744  
3821 3745        // Avoid branch to branch on AMD processors
3822 3746        if (EmitSync & 32768) { masm.nop() ; }
3823 3747      }
3824 3748    %}
3825 3749  
3826 3750  
3827 3751    enc_class enc_pop_rdx() %{
3828 3752      emit_opcode(cbuf,0x5A);
3829 3753    %}
3830 3754  
3831 3755    enc_class enc_rethrow() %{
3832 3756      cbuf.set_insts_mark();
3833 3757      emit_opcode(cbuf, 0xE9);        // jmp    entry
3834 3758      emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
3835 3759                     runtime_call_Relocation::spec(), RELOC_IMM32 );
3836 3760    %}
3837 3761  
3838 3762  
3839 3763    // Convert a double to an int.  Java semantics require we do complex
3840 3764    // manglelations in the corner cases.  So we set the rounding mode to
3841 3765    // 'zero', store the darned double down as an int, and reset the
3842 3766    // rounding mode to 'nearest'.  The hardware throws an exception which
3843 3767    // patches up the correct value directly to the stack.
3844 3768    enc_class D2I_encoding( regD src ) %{
3845 3769      // Flip to round-to-zero mode.  We attempted to allow invalid-op
3846 3770      // exceptions here, so that a NAN or other corner-case value will
3847 3771      // thrown an exception (but normal values get converted at full speed).
3848 3772      // However, I2C adapters and other float-stack manglers leave pending
3849 3773      // invalid-op exceptions hanging.  We would have to clear them before
3850 3774      // enabling them and that is more expensive than just testing for the
3851 3775      // invalid value Intel stores down in the corner cases.
3852 3776      emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3853 3777      emit_opcode(cbuf,0x2D);
3854 3778      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3855 3779      // Allocate a word
3856 3780      emit_opcode(cbuf,0x83);            // SUB ESP,4
3857 3781      emit_opcode(cbuf,0xEC);
3858 3782      emit_d8(cbuf,0x04);
3859 3783      // Encoding assumes a double has been pushed into FPR0.
3860 3784      // Store down the double as an int, popping the FPU stack
3861 3785      emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3862 3786      emit_opcode(cbuf,0x1C);
3863 3787      emit_d8(cbuf,0x24);
3864 3788      // Restore the rounding mode; mask the exception
3865 3789      emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3866 3790      emit_opcode(cbuf,0x2D);
3867 3791      emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3868 3792          ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3869 3793          : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3870 3794  
3871 3795      // Load the converted int; adjust CPU stack
3872 3796      emit_opcode(cbuf,0x58);       // POP EAX
3873 3797      emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3874 3798      emit_d32   (cbuf,0x80000000); //         0x80000000
3875 3799      emit_opcode(cbuf,0x75);       // JNE around_slow_call
3876 3800      emit_d8    (cbuf,0x07);       // Size of slow_call
3877 3801      // Push src onto stack slow-path
3878 3802      emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3879 3803      emit_d8    (cbuf,0xC0-1+$src$$reg );
3880 3804      // CALL directly to the runtime
3881 3805      cbuf.set_insts_mark();
3882 3806      emit_opcode(cbuf,0xE8);       // Call into runtime
3883 3807      emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3884 3808      // Carry on here...
3885 3809    %}
3886 3810  
3887 3811    enc_class D2L_encoding( regD src ) %{
3888 3812      emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3889 3813      emit_opcode(cbuf,0x2D);
3890 3814      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3891 3815      // Allocate a word
3892 3816      emit_opcode(cbuf,0x83);            // SUB ESP,8
3893 3817      emit_opcode(cbuf,0xEC);
3894 3818      emit_d8(cbuf,0x08);
3895 3819      // Encoding assumes a double has been pushed into FPR0.
3896 3820      // Store down the double as a long, popping the FPU stack
3897 3821      emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3898 3822      emit_opcode(cbuf,0x3C);
3899 3823      emit_d8(cbuf,0x24);
3900 3824      // Restore the rounding mode; mask the exception
3901 3825      emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3902 3826      emit_opcode(cbuf,0x2D);
3903 3827      emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3904 3828          ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3905 3829          : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3906 3830  
3907 3831      // Load the converted int; adjust CPU stack
3908 3832      emit_opcode(cbuf,0x58);       // POP EAX
3909 3833      emit_opcode(cbuf,0x5A);       // POP EDX
3910 3834      emit_opcode(cbuf,0x81);       // CMP EDX,imm
3911 3835      emit_d8    (cbuf,0xFA);       // rdx
3912 3836      emit_d32   (cbuf,0x80000000); //         0x80000000
3913 3837      emit_opcode(cbuf,0x75);       // JNE around_slow_call
3914 3838      emit_d8    (cbuf,0x07+4);     // Size of slow_call
3915 3839      emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3916 3840      emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3917 3841      emit_opcode(cbuf,0x75);       // JNE around_slow_call
3918 3842      emit_d8    (cbuf,0x07);       // Size of slow_call
3919 3843      // Push src onto stack slow-path
3920 3844      emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3921 3845      emit_d8    (cbuf,0xC0-1+$src$$reg );
3922 3846      // CALL directly to the runtime
3923 3847      cbuf.set_insts_mark();
3924 3848      emit_opcode(cbuf,0xE8);       // Call into runtime
3925 3849      emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3926 3850      // Carry on here...
3927 3851    %}
3928 3852  
3929 3853    enc_class X2L_encoding( regX src ) %{
3930 3854      // Allocate a word
3931 3855      emit_opcode(cbuf,0x83);      // SUB ESP,8
3932 3856      emit_opcode(cbuf,0xEC);
3933 3857      emit_d8(cbuf,0x08);
3934 3858  
3935 3859      emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3936 3860      emit_opcode  (cbuf, 0x0F );
3937 3861      emit_opcode  (cbuf, 0x11 );
3938 3862      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3939 3863  
3940 3864      emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3941 3865      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3942 3866  
3943 3867      emit_opcode(cbuf,0xD9);      // FLDCW  trunc
3944 3868      emit_opcode(cbuf,0x2D);
3945 3869      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3946 3870  
3947 3871      // Encoding assumes a double has been pushed into FPR0.
3948 3872      // Store down the double as a long, popping the FPU stack
3949 3873      emit_opcode(cbuf,0xDF);      // FISTP [ESP]
3950 3874      emit_opcode(cbuf,0x3C);
3951 3875      emit_d8(cbuf,0x24);
3952 3876  
3953 3877      // Restore the rounding mode; mask the exception
3954 3878      emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
3955 3879      emit_opcode(cbuf,0x2D);
3956 3880      emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3957 3881        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3958 3882        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3959 3883  
3960 3884      // Load the converted int; adjust CPU stack
3961 3885      emit_opcode(cbuf,0x58);      // POP EAX
3962 3886  
3963 3887      emit_opcode(cbuf,0x5A);      // POP EDX
3964 3888  
3965 3889      emit_opcode(cbuf,0x81);      // CMP EDX,imm
3966 3890      emit_d8    (cbuf,0xFA);      // rdx
3967 3891      emit_d32   (cbuf,0x80000000);//         0x80000000
3968 3892  
3969 3893      emit_opcode(cbuf,0x75);      // JNE around_slow_call
3970 3894      emit_d8    (cbuf,0x13+4);    // Size of slow_call
3971 3895  
3972 3896      emit_opcode(cbuf,0x85);      // TEST EAX,EAX
3973 3897      emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
3974 3898  
3975 3899      emit_opcode(cbuf,0x75);      // JNE around_slow_call
3976 3900      emit_d8    (cbuf,0x13);      // Size of slow_call
3977 3901  
3978 3902      // Allocate a word
3979 3903      emit_opcode(cbuf,0x83);      // SUB ESP,4
3980 3904      emit_opcode(cbuf,0xEC);
3981 3905      emit_d8(cbuf,0x04);
3982 3906  
3983 3907      emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3984 3908      emit_opcode  (cbuf, 0x0F );
3985 3909      emit_opcode  (cbuf, 0x11 );
3986 3910      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3987 3911  
3988 3912      emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3989 3913      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3990 3914  
3991 3915      emit_opcode(cbuf,0x83);      // ADD ESP,4
3992 3916      emit_opcode(cbuf,0xC4);
3993 3917      emit_d8(cbuf,0x04);
3994 3918  
3995 3919      // CALL directly to the runtime
3996 3920      cbuf.set_insts_mark();
3997 3921      emit_opcode(cbuf,0xE8);       // Call into runtime
3998 3922      emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3999 3923      // Carry on here...
4000 3924    %}
4001 3925  
4002 3926    enc_class XD2L_encoding( regXD src ) %{
4003 3927      // Allocate a word
4004 3928      emit_opcode(cbuf,0x83);      // SUB ESP,8
4005 3929      emit_opcode(cbuf,0xEC);
4006 3930      emit_d8(cbuf,0x08);
4007 3931  
4008 3932      emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
4009 3933      emit_opcode  (cbuf, 0x0F );
4010 3934      emit_opcode  (cbuf, 0x11 );
4011 3935      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4012 3936  
4013 3937      emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
4014 3938      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4015 3939  
4016 3940      emit_opcode(cbuf,0xD9);      // FLDCW  trunc
4017 3941      emit_opcode(cbuf,0x2D);
4018 3942      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
4019 3943  
4020 3944      // Encoding assumes a double has been pushed into FPR0.
4021 3945      // Store down the double as a long, popping the FPU stack
4022 3946      emit_opcode(cbuf,0xDF);      // FISTP [ESP]
4023 3947      emit_opcode(cbuf,0x3C);
4024 3948      emit_d8(cbuf,0x24);
4025 3949  
4026 3950      // Restore the rounding mode; mask the exception
4027 3951      emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
4028 3952      emit_opcode(cbuf,0x2D);
4029 3953      emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
4030 3954        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
4031 3955        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
4032 3956  
4033 3957      // Load the converted int; adjust CPU stack
4034 3958      emit_opcode(cbuf,0x58);      // POP EAX
4035 3959  
4036 3960      emit_opcode(cbuf,0x5A);      // POP EDX
4037 3961  
4038 3962      emit_opcode(cbuf,0x81);      // CMP EDX,imm
4039 3963      emit_d8    (cbuf,0xFA);      // rdx
4040 3964      emit_d32   (cbuf,0x80000000); //         0x80000000
4041 3965  
4042 3966      emit_opcode(cbuf,0x75);      // JNE around_slow_call
4043 3967      emit_d8    (cbuf,0x13+4);    // Size of slow_call
4044 3968  
4045 3969      emit_opcode(cbuf,0x85);      // TEST EAX,EAX
4046 3970      emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
4047 3971  
4048 3972      emit_opcode(cbuf,0x75);      // JNE around_slow_call
4049 3973      emit_d8    (cbuf,0x13);      // Size of slow_call
4050 3974  
4051 3975      // Push src onto stack slow-path
4052 3976      // Allocate a word
4053 3977      emit_opcode(cbuf,0x83);      // SUB ESP,8
4054 3978      emit_opcode(cbuf,0xEC);
4055 3979      emit_d8(cbuf,0x08);
4056 3980  
4057 3981      emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
4058 3982      emit_opcode  (cbuf, 0x0F );
4059 3983      emit_opcode  (cbuf, 0x11 );
4060 3984      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4061 3985  
4062 3986      emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
4063 3987      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4064 3988  
4065 3989      emit_opcode(cbuf,0x83);      // ADD ESP,8
4066 3990      emit_opcode(cbuf,0xC4);
4067 3991      emit_d8(cbuf,0x08);
4068 3992  
4069 3993      // CALL directly to the runtime
4070 3994      cbuf.set_insts_mark();
4071 3995      emit_opcode(cbuf,0xE8);      // Call into runtime
4072 3996      emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4073 3997      // Carry on here...
4074 3998    %}
4075 3999  
4076 4000    enc_class D2X_encoding( regX dst, regD src ) %{
4077 4001      // Allocate a word
4078 4002      emit_opcode(cbuf,0x83);            // SUB ESP,4
4079 4003      emit_opcode(cbuf,0xEC);
4080 4004      emit_d8(cbuf,0x04);
4081 4005      int pop = 0x02;
4082 4006      if ($src$$reg != FPR1L_enc) {
4083 4007        emit_opcode( cbuf, 0xD9 );       // FLD    ST(i-1)
4084 4008        emit_d8( cbuf, 0xC0-1+$src$$reg );
4085 4009        pop = 0x03;
4086 4010      }
4087 4011      store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S  [ESP]
4088 4012  
4089 4013      emit_opcode  (cbuf, 0xF3 );        // MOVSS dst(xmm), [ESP]
4090 4014      emit_opcode  (cbuf, 0x0F );
4091 4015      emit_opcode  (cbuf, 0x10 );
4092 4016      encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
4093 4017  
4094 4018      emit_opcode(cbuf,0x83);            // ADD ESP,4
4095 4019      emit_opcode(cbuf,0xC4);
4096 4020      emit_d8(cbuf,0x04);
4097 4021      // Carry on here...
4098 4022    %}
4099 4023  
4100 4024    enc_class FX2I_encoding( regX src, eRegI dst ) %{
4101 4025      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
4102 4026  
4103 4027      // Compare the result to see if we need to go to the slow path
4104 4028      emit_opcode(cbuf,0x81);       // CMP dst,imm
4105 4029      emit_rm    (cbuf,0x3,0x7,$dst$$reg);
4106 4030      emit_d32   (cbuf,0x80000000); //         0x80000000
4107 4031  
4108 4032      emit_opcode(cbuf,0x75);       // JNE around_slow_call
4109 4033      emit_d8    (cbuf,0x13);       // Size of slow_call
4110 4034      // Store xmm to a temp memory
4111 4035      // location and push it onto stack.
4112 4036  
4113 4037      emit_opcode(cbuf,0x83);  // SUB ESP,4
4114 4038      emit_opcode(cbuf,0xEC);
4115 4039      emit_d8(cbuf, $primary ? 0x8 : 0x4);
4116 4040  
4117 4041      emit_opcode  (cbuf, $primary ? 0xF2 : 0xF3 );   // MOVSS [ESP], xmm
4118 4042      emit_opcode  (cbuf, 0x0F );
4119 4043      emit_opcode  (cbuf, 0x11 );
4120 4044      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4121 4045  
4122 4046      emit_opcode(cbuf, $primary ? 0xDD : 0xD9 );      // FLD [ESP]
4123 4047      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4124 4048  
4125 4049      emit_opcode(cbuf,0x83);    // ADD ESP,4
4126 4050      emit_opcode(cbuf,0xC4);
4127 4051      emit_d8(cbuf, $primary ? 0x8 : 0x4);
4128 4052  
4129 4053      // CALL directly to the runtime
4130 4054      cbuf.set_insts_mark();
4131 4055      emit_opcode(cbuf,0xE8);       // Call into runtime
4132 4056      emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4133 4057  
4134 4058      // Carry on here...
4135 4059    %}
4136 4060  
4137 4061    enc_class X2D_encoding( regD dst, regX src ) %{
4138 4062      // Allocate a word
4139 4063      emit_opcode(cbuf,0x83);     // SUB ESP,4
4140 4064      emit_opcode(cbuf,0xEC);
4141 4065      emit_d8(cbuf,0x04);
4142 4066  
4143 4067      emit_opcode  (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4144 4068      emit_opcode  (cbuf, 0x0F );
4145 4069      emit_opcode  (cbuf, 0x11 );
4146 4070      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4147 4071  
4148 4072      emit_opcode(cbuf,0xD9 );    // FLD_S [ESP]
4149 4073      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4150 4074  
4151 4075      emit_opcode(cbuf,0x83);     // ADD ESP,4
4152 4076      emit_opcode(cbuf,0xC4);
4153 4077      emit_d8(cbuf,0x04);
4154 4078  
4155 4079      // Carry on here...
4156 4080    %}
4157 4081  
4158 4082    enc_class AbsXF_encoding(regX dst) %{
4159 4083      address signmask_address=(address)float_signmask_pool;
4160 4084      // andpd:\tANDPS  $dst,[signconst]
4161 4085      emit_opcode(cbuf, 0x0F);
4162 4086      emit_opcode(cbuf, 0x54);
4163 4087      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4164 4088      emit_d32(cbuf, (int)signmask_address);
4165 4089    %}
4166 4090  
4167 4091    enc_class AbsXD_encoding(regXD dst) %{
4168 4092      address signmask_address=(address)double_signmask_pool;
4169 4093      // andpd:\tANDPD  $dst,[signconst]
4170 4094      emit_opcode(cbuf, 0x66);
4171 4095      emit_opcode(cbuf, 0x0F);
4172 4096      emit_opcode(cbuf, 0x54);
4173 4097      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4174 4098      emit_d32(cbuf, (int)signmask_address);
4175 4099    %}
4176 4100  
4177 4101    enc_class NegXF_encoding(regX dst) %{
4178 4102      address signmask_address=(address)float_signflip_pool;
4179 4103      // andpd:\tXORPS  $dst,[signconst]
4180 4104      emit_opcode(cbuf, 0x0F);
4181 4105      emit_opcode(cbuf, 0x57);
4182 4106      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4183 4107      emit_d32(cbuf, (int)signmask_address);
4184 4108    %}
4185 4109  
4186 4110    enc_class NegXD_encoding(regXD dst) %{
4187 4111      address signmask_address=(address)double_signflip_pool;
4188 4112      // andpd:\tXORPD  $dst,[signconst]
4189 4113      emit_opcode(cbuf, 0x66);
4190 4114      emit_opcode(cbuf, 0x0F);
4191 4115      emit_opcode(cbuf, 0x57);
4192 4116      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4193 4117      emit_d32(cbuf, (int)signmask_address);
4194 4118    %}
4195 4119  
4196 4120    enc_class FMul_ST_reg( eRegF src1 ) %{
4197 4121      // Operand was loaded from memory into fp ST (stack top)
4198 4122      // FMUL   ST,$src  /* D8 C8+i */
4199 4123      emit_opcode(cbuf, 0xD8);
4200 4124      emit_opcode(cbuf, 0xC8 + $src1$$reg);
4201 4125    %}
4202 4126  
4203 4127    enc_class FAdd_ST_reg( eRegF src2 ) %{
4204 4128      // FADDP  ST,src2  /* D8 C0+i */
4205 4129      emit_opcode(cbuf, 0xD8);
4206 4130      emit_opcode(cbuf, 0xC0 + $src2$$reg);
4207 4131      //could use FADDP  src2,fpST  /* DE C0+i */
4208 4132    %}
4209 4133  
4210 4134    enc_class FAddP_reg_ST( eRegF src2 ) %{
4211 4135      // FADDP  src2,ST  /* DE C0+i */
4212 4136      emit_opcode(cbuf, 0xDE);
4213 4137      emit_opcode(cbuf, 0xC0 + $src2$$reg);
4214 4138    %}
4215 4139  
4216 4140    enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
4217 4141      // Operand has been loaded into fp ST (stack top)
4218 4142        // FSUB   ST,$src1
4219 4143        emit_opcode(cbuf, 0xD8);
4220 4144        emit_opcode(cbuf, 0xE0 + $src1$$reg);
4221 4145  
4222 4146        // FDIV
4223 4147        emit_opcode(cbuf, 0xD8);
4224 4148        emit_opcode(cbuf, 0xF0 + $src2$$reg);
4225 4149    %}
4226 4150  
4227 4151    enc_class MulFAddF (eRegF src1, eRegF src2) %{
4228 4152      // Operand was loaded from memory into fp ST (stack top)
4229 4153      // FADD   ST,$src  /* D8 C0+i */
4230 4154      emit_opcode(cbuf, 0xD8);
4231 4155      emit_opcode(cbuf, 0xC0 + $src1$$reg);
4232 4156  
4233 4157      // FMUL  ST,src2  /* D8 C*+i */
4234 4158      emit_opcode(cbuf, 0xD8);
4235 4159      emit_opcode(cbuf, 0xC8 + $src2$$reg);
4236 4160    %}
4237 4161  
4238 4162  
4239 4163    enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
4240 4164      // Operand was loaded from memory into fp ST (stack top)
4241 4165      // FADD   ST,$src  /* D8 C0+i */
4242 4166      emit_opcode(cbuf, 0xD8);
4243 4167      emit_opcode(cbuf, 0xC0 + $src1$$reg);
4244 4168  
4245 4169      // FMULP  src2,ST  /* DE C8+i */
4246 4170      emit_opcode(cbuf, 0xDE);
4247 4171      emit_opcode(cbuf, 0xC8 + $src2$$reg);
4248 4172    %}
4249 4173  
4250 4174    // Atomically load the volatile long
4251 4175    enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
4252 4176      emit_opcode(cbuf,0xDF);
4253 4177      int rm_byte_opcode = 0x05;
4254 4178      int base     = $mem$$base;
4255 4179      int index    = $mem$$index;
4256 4180      int scale    = $mem$$scale;
4257 4181      int displace = $mem$$disp;
4258 4182      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4259 4183      encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4260 4184      store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4261 4185    %}
4262 4186  
4263 4187    enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4264 4188      { // Atomic long load
4265 4189        // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4266 4190        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4267 4191        emit_opcode(cbuf,0x0F);
4268 4192        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4269 4193        int base     = $mem$$base;
4270 4194        int index    = $mem$$index;
4271 4195        int scale    = $mem$$scale;
4272 4196        int displace = $mem$$disp;
4273 4197        bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4274 4198        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4275 4199      }
4276 4200      { // MOVSD $dst,$tmp ! atomic long store
4277 4201        emit_opcode(cbuf,0xF2);
4278 4202        emit_opcode(cbuf,0x0F);
4279 4203        emit_opcode(cbuf,0x11);
4280 4204        int base     = $dst$$base;
4281 4205        int index    = $dst$$index;
4282 4206        int scale    = $dst$$scale;
4283 4207        int displace = $dst$$disp;
4284 4208        bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4285 4209        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4286 4210      }
4287 4211    %}
4288 4212  
4289 4213    enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4290 4214      { // Atomic long load
4291 4215        // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4292 4216        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4293 4217        emit_opcode(cbuf,0x0F);
4294 4218        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4295 4219        int base     = $mem$$base;
4296 4220        int index    = $mem$$index;
4297 4221        int scale    = $mem$$scale;
4298 4222        int displace = $mem$$disp;
4299 4223        bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4300 4224        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4301 4225      }
4302 4226      { // MOVD $dst.lo,$tmp
4303 4227        emit_opcode(cbuf,0x66);
4304 4228        emit_opcode(cbuf,0x0F);
4305 4229        emit_opcode(cbuf,0x7E);
4306 4230        emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4307 4231      }
4308 4232      { // PSRLQ $tmp,32
4309 4233        emit_opcode(cbuf,0x66);
4310 4234        emit_opcode(cbuf,0x0F);
4311 4235        emit_opcode(cbuf,0x73);
4312 4236        emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4313 4237        emit_d8(cbuf, 0x20);
4314 4238      }
4315 4239      { // MOVD $dst.hi,$tmp
4316 4240        emit_opcode(cbuf,0x66);
4317 4241        emit_opcode(cbuf,0x0F);
4318 4242        emit_opcode(cbuf,0x7E);
4319 4243        emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4320 4244      }
4321 4245    %}
4322 4246  
4323 4247    // Volatile Store Long.  Must be atomic, so move it into
4324 4248    // the FP TOS and then do a 64-bit FIST.  Has to probe the
4325 4249    // target address before the store (for null-ptr checks)
4326 4250    // so the memory operand is used twice in the encoding.
4327 4251    enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
4328 4252      store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
4329 4253      cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
4330 4254      emit_opcode(cbuf,0xDF);
4331 4255      int rm_byte_opcode = 0x07;
4332 4256      int base     = $mem$$base;
4333 4257      int index    = $mem$$index;
4334 4258      int scale    = $mem$$scale;
4335 4259      int displace = $mem$$disp;
4336 4260      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4337 4261      encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4338 4262    %}
4339 4263  
4340 4264    enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4341 4265      { // Atomic long load
4342 4266        // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4343 4267        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4344 4268        emit_opcode(cbuf,0x0F);
4345 4269        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4346 4270        int base     = $src$$base;
4347 4271        int index    = $src$$index;
4348 4272        int scale    = $src$$scale;
4349 4273        int displace = $src$$disp;
4350 4274        bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4351 4275        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4352 4276      }
4353 4277      cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
4354 4278      { // MOVSD $mem,$tmp ! atomic long store
4355 4279        emit_opcode(cbuf,0xF2);
4356 4280        emit_opcode(cbuf,0x0F);
4357 4281        emit_opcode(cbuf,0x11);
4358 4282        int base     = $mem$$base;
4359 4283        int index    = $mem$$index;
4360 4284        int scale    = $mem$$scale;
4361 4285        int displace = $mem$$disp;
4362 4286        bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4363 4287        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4364 4288      }
4365 4289    %}
4366 4290  
4367 4291    enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4368 4292      { // MOVD $tmp,$src.lo
4369 4293        emit_opcode(cbuf,0x66);
4370 4294        emit_opcode(cbuf,0x0F);
4371 4295        emit_opcode(cbuf,0x6E);
4372 4296        emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4373 4297      }
4374 4298      { // MOVD $tmp2,$src.hi
4375 4299        emit_opcode(cbuf,0x66);
4376 4300        emit_opcode(cbuf,0x0F);
4377 4301        emit_opcode(cbuf,0x6E);
4378 4302        emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4379 4303      }
4380 4304      { // PUNPCKLDQ $tmp,$tmp2
4381 4305        emit_opcode(cbuf,0x66);
4382 4306        emit_opcode(cbuf,0x0F);
4383 4307        emit_opcode(cbuf,0x62);
4384 4308        emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4385 4309      }
4386 4310      cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
4387 4311      { // MOVSD $mem,$tmp ! atomic long store
4388 4312        emit_opcode(cbuf,0xF2);
4389 4313        emit_opcode(cbuf,0x0F);
4390 4314        emit_opcode(cbuf,0x11);
4391 4315        int base     = $mem$$base;
4392 4316        int index    = $mem$$index;
4393 4317        int scale    = $mem$$scale;
4394 4318        int displace = $mem$$disp;
4395 4319        bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4396 4320        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4397 4321      }
4398 4322    %}
4399 4323  
4400 4324    // Safepoint Poll.  This polls the safepoint page, and causes an
4401 4325    // exception if it is not readable. Unfortunately, it kills the condition code
4402 4326    // in the process
4403 4327    // We current use TESTL [spp],EDI
4404 4328    // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
4405 4329  
4406 4330    enc_class Safepoint_Poll() %{
4407 4331      cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
4408 4332      emit_opcode(cbuf,0x85);
4409 4333      emit_rm (cbuf, 0x0, 0x7, 0x5);
4410 4334      emit_d32(cbuf, (intptr_t)os::get_polling_page());
4411 4335    %}
4412 4336  %}
4413 4337  
4414 4338  
4415 4339  //----------FRAME--------------------------------------------------------------
4416 4340  // Definition of frame structure and management information.
4417 4341  //
4418 4342  //  S T A C K   L A Y O U T    Allocators stack-slot number
4419 4343  //                             |   (to get allocators register number
4420 4344  //  G  Owned by    |        |  v    add OptoReg::stack0())
4421 4345  //  r   CALLER     |        |
4422 4346  //  o     |        +--------+      pad to even-align allocators stack-slot
4423 4347  //  w     V        |  pad0  |        numbers; owned by CALLER
4424 4348  //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4425 4349  //  h     ^        |   in   |  5
4426 4350  //        |        |  args  |  4   Holes in incoming args owned by SELF
4427 4351  //  |     |        |        |  3
4428 4352  //  |     |        +--------+
4429 4353  //  V     |        | old out|      Empty on Intel, window on Sparc
4430 4354  //        |    old |preserve|      Must be even aligned.
4431 4355  //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4432 4356  //        |        |   in   |  3   area for Intel ret address
4433 4357  //     Owned by    |preserve|      Empty on Sparc.
4434 4358  //       SELF      +--------+
4435 4359  //        |        |  pad2  |  2   pad to align old SP
4436 4360  //        |        +--------+  1
4437 4361  //        |        | locks  |  0
4438 4362  //        |        +--------+----> OptoReg::stack0(), even aligned
4439 4363  //        |        |  pad1  | 11   pad to align new SP
4440 4364  //        |        +--------+
4441 4365  //        |        |        | 10
4442 4366  //        |        | spills |  9   spills
4443 4367  //        V        |        |  8   (pad0 slot for callee)
4444 4368  //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4445 4369  //        ^        |  out   |  7
4446 4370  //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4447 4371  //     Owned by    +--------+
4448 4372  //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4449 4373  //        |    new |preserve|      Must be even-aligned.
4450 4374  //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4451 4375  //        |        |        |
4452 4376  //
4453 4377  // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4454 4378  //         known from SELF's arguments and the Java calling convention.
4455 4379  //         Region 6-7 is determined per call site.
4456 4380  // Note 2: If the calling convention leaves holes in the incoming argument
4457 4381  //         area, those holes are owned by SELF.  Holes in the outgoing area
4458 4382  //         are owned by the CALLEE.  Holes should not be nessecary in the
4459 4383  //         incoming area, as the Java calling convention is completely under
4460 4384  //         the control of the AD file.  Doubles can be sorted and packed to
4461 4385  //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4462 4386  //         varargs C calling conventions.
4463 4387  // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4464 4388  //         even aligned with pad0 as needed.
4465 4389  //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4466 4390  //         region 6-11 is even aligned; it may be padded out more so that
4467 4391  //         the region from SP to FP meets the minimum stack alignment.
4468 4392  
4469 4393  frame %{
4470 4394    // What direction does stack grow in (assumed to be same for C & Java)
4471 4395    stack_direction(TOWARDS_LOW);
4472 4396  
4473 4397    // These three registers define part of the calling convention
4474 4398    // between compiled code and the interpreter.
4475 4399    inline_cache_reg(EAX);                // Inline Cache Register
4476 4400    interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
4477 4401  
4478 4402    // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
4479 4403    cisc_spilling_operand_name(indOffset32);
4480 4404  
4481 4405    // Number of stack slots consumed by locking an object
4482 4406    sync_stack_slots(1);
4483 4407  
4484 4408    // Compiled code's Frame Pointer
4485 4409    frame_pointer(ESP);
4486 4410    // Interpreter stores its frame pointer in a register which is
4487 4411    // stored to the stack by I2CAdaptors.
4488 4412    // I2CAdaptors convert from interpreted java to compiled java.
4489 4413    interpreter_frame_pointer(EBP);
4490 4414  
4491 4415    // Stack alignment requirement
4492 4416    // Alignment size in bytes (128-bit -> 16 bytes)
4493 4417    stack_alignment(StackAlignmentInBytes);
4494 4418  
4495 4419    // Number of stack slots between incoming argument block and the start of
4496 4420    // a new frame.  The PROLOG must add this many slots to the stack.  The
4497 4421    // EPILOG must remove this many slots.  Intel needs one slot for
4498 4422    // return address and one for rbp, (must save rbp)
4499 4423    in_preserve_stack_slots(2+VerifyStackAtCalls);
4500 4424  
4501 4425    // Number of outgoing stack slots killed above the out_preserve_stack_slots
4502 4426    // for calls to C.  Supports the var-args backing area for register parms.
4503 4427    varargs_C_out_slots_killed(0);
4504 4428  
4505 4429    // The after-PROLOG location of the return address.  Location of
4506 4430    // return address specifies a type (REG or STACK) and a number
4507 4431    // representing the register number (i.e. - use a register name) or
4508 4432    // stack slot.
4509 4433    // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4510 4434    // Otherwise, it is above the locks and verification slot and alignment word
4511 4435    return_addr(STACK - 1 +
4512 4436                round_to(1+VerifyStackAtCalls+
4513 4437                Compile::current()->fixed_slots(),
4514 4438                (StackAlignmentInBytes/wordSize)));
4515 4439  
4516 4440    // Body of function which returns an integer array locating
4517 4441    // arguments either in registers or in stack slots.  Passed an array
4518 4442    // of ideal registers called "sig" and a "length" count.  Stack-slot
4519 4443    // offsets are based on outgoing arguments, i.e. a CALLER setting up
4520 4444    // arguments for a CALLEE.  Incoming stack arguments are
4521 4445    // automatically biased by the preserve_stack_slots field above.
4522 4446    calling_convention %{
4523 4447      // No difference between ingoing/outgoing just pass false
4524 4448      SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4525 4449    %}
4526 4450  
4527 4451  
4528 4452    // Body of function which returns an integer array locating
4529 4453    // arguments either in registers or in stack slots.  Passed an array
4530 4454    // of ideal registers called "sig" and a "length" count.  Stack-slot
4531 4455    // offsets are based on outgoing arguments, i.e. a CALLER setting up
4532 4456    // arguments for a CALLEE.  Incoming stack arguments are
4533 4457    // automatically biased by the preserve_stack_slots field above.
4534 4458    c_calling_convention %{
4535 4459      // This is obviously always outgoing
4536 4460      (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4537 4461    %}
4538 4462  
4539 4463    // Location of C & interpreter return values
4540 4464    c_return_value %{
4541 4465      assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4542 4466      static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4543 4467      static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4544 4468  
4545 4469      // in SSE2+ mode we want to keep the FPU stack clean so pretend
4546 4470      // that C functions return float and double results in XMM0.
4547 4471      if( ideal_reg == Op_RegD && UseSSE>=2 )
4548 4472        return OptoRegPair(XMM0b_num,XMM0a_num);
4549 4473      if( ideal_reg == Op_RegF && UseSSE>=2 )
4550 4474        return OptoRegPair(OptoReg::Bad,XMM0a_num);
4551 4475  
4552 4476      return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4553 4477    %}
4554 4478  
4555 4479    // Location of return values
4556 4480    return_value %{
4557 4481      assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4558 4482      static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4559 4483      static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4560 4484      if( ideal_reg == Op_RegD && UseSSE>=2 )
4561 4485        return OptoRegPair(XMM0b_num,XMM0a_num);
4562 4486      if( ideal_reg == Op_RegF && UseSSE>=1 )
4563 4487        return OptoRegPair(OptoReg::Bad,XMM0a_num);
4564 4488      return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4565 4489    %}
4566 4490  
4567 4491  %}
4568 4492  
4569 4493  //----------ATTRIBUTES---------------------------------------------------------
4570 4494  //----------Operand Attributes-------------------------------------------------
4571 4495  op_attrib op_cost(0);        // Required cost attribute
4572 4496  
4573 4497  //----------Instruction Attributes---------------------------------------------
4574 4498  ins_attrib ins_cost(100);       // Required cost attribute
4575 4499  ins_attrib ins_size(8);         // Required size attribute (in bits)
4576 4500  ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4577 4501  ins_attrib ins_short_branch(0); // Required flag: is this instruction a
4578 4502                                  // non-matching short branch variant of some
4579 4503                                                              // long branch?
4580 4504  ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
4581 4505                                  // specifies the alignment that some part of the instruction (not
4582 4506                                  // necessarily the start) requires.  If > 1, a compute_padding()
4583 4507                                  // function must be provided for the instruction
4584 4508  
4585 4509  //----------OPERANDS-----------------------------------------------------------
4586 4510  // Operand definitions must precede instruction definitions for correct parsing
4587 4511  // in the ADLC because operands constitute user defined types which are used in
4588 4512  // instruction definitions.
4589 4513  
4590 4514  //----------Simple Operands----------------------------------------------------
4591 4515  // Immediate Operands
4592 4516  // Integer Immediate
4593 4517  operand immI() %{
4594 4518    match(ConI);
4595 4519  
4596 4520    op_cost(10);
4597 4521    format %{ %}
4598 4522    interface(CONST_INTER);
4599 4523  %}
4600 4524  
4601 4525  // Constant for test vs zero
4602 4526  operand immI0() %{
4603 4527    predicate(n->get_int() == 0);
4604 4528    match(ConI);
4605 4529  
4606 4530    op_cost(0);
4607 4531    format %{ %}
4608 4532    interface(CONST_INTER);
4609 4533  %}
4610 4534  
4611 4535  // Constant for increment
4612 4536  operand immI1() %{
4613 4537    predicate(n->get_int() == 1);
4614 4538    match(ConI);
4615 4539  
4616 4540    op_cost(0);
4617 4541    format %{ %}
4618 4542    interface(CONST_INTER);
4619 4543  %}
4620 4544  
4621 4545  // Constant for decrement
4622 4546  operand immI_M1() %{
4623 4547    predicate(n->get_int() == -1);
4624 4548    match(ConI);
4625 4549  
4626 4550    op_cost(0);
4627 4551    format %{ %}
4628 4552    interface(CONST_INTER);
4629 4553  %}
4630 4554  
4631 4555  // Valid scale values for addressing modes
4632 4556  operand immI2() %{
4633 4557    predicate(0 <= n->get_int() && (n->get_int() <= 3));
4634 4558    match(ConI);
4635 4559  
4636 4560    format %{ %}
4637 4561    interface(CONST_INTER);
4638 4562  %}
4639 4563  
4640 4564  operand immI8() %{
4641 4565    predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
4642 4566    match(ConI);
4643 4567  
4644 4568    op_cost(5);
4645 4569    format %{ %}
4646 4570    interface(CONST_INTER);
4647 4571  %}
4648 4572  
4649 4573  operand immI16() %{
4650 4574    predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4651 4575    match(ConI);
4652 4576  
4653 4577    op_cost(10);
4654 4578    format %{ %}
4655 4579    interface(CONST_INTER);
4656 4580  %}
4657 4581  
4658 4582  // Constant for long shifts
4659 4583  operand immI_32() %{
4660 4584    predicate( n->get_int() == 32 );
4661 4585    match(ConI);
4662 4586  
4663 4587    op_cost(0);
4664 4588    format %{ %}
4665 4589    interface(CONST_INTER);
4666 4590  %}
4667 4591  
4668 4592  operand immI_1_31() %{
4669 4593    predicate( n->get_int() >= 1 && n->get_int() <= 31 );
4670 4594    match(ConI);
4671 4595  
4672 4596    op_cost(0);
4673 4597    format %{ %}
4674 4598    interface(CONST_INTER);
4675 4599  %}
4676 4600  
4677 4601  operand immI_32_63() %{
4678 4602    predicate( n->get_int() >= 32 && n->get_int() <= 63 );
4679 4603    match(ConI);
4680 4604    op_cost(0);
4681 4605  
4682 4606    format %{ %}
4683 4607    interface(CONST_INTER);
4684 4608  %}
4685 4609  
4686 4610  operand immI_1() %{
4687 4611    predicate( n->get_int() == 1 );
4688 4612    match(ConI);
4689 4613  
4690 4614    op_cost(0);
4691 4615    format %{ %}
4692 4616    interface(CONST_INTER);
4693 4617  %}
4694 4618  
4695 4619  operand immI_2() %{
4696 4620    predicate( n->get_int() == 2 );
4697 4621    match(ConI);
4698 4622  
4699 4623    op_cost(0);
4700 4624    format %{ %}
4701 4625    interface(CONST_INTER);
4702 4626  %}
4703 4627  
4704 4628  operand immI_3() %{
4705 4629    predicate( n->get_int() == 3 );
4706 4630    match(ConI);
4707 4631  
4708 4632    op_cost(0);
4709 4633    format %{ %}
4710 4634    interface(CONST_INTER);
4711 4635  %}
4712 4636  
4713 4637  // Pointer Immediate
4714 4638  operand immP() %{
4715 4639    match(ConP);
4716 4640  
4717 4641    op_cost(10);
4718 4642    format %{ %}
4719 4643    interface(CONST_INTER);
4720 4644  %}
4721 4645  
4722 4646  // NULL Pointer Immediate
4723 4647  operand immP0() %{
4724 4648    predicate( n->get_ptr() == 0 );
4725 4649    match(ConP);
4726 4650    op_cost(0);
4727 4651  
4728 4652    format %{ %}
4729 4653    interface(CONST_INTER);
4730 4654  %}
4731 4655  
4732 4656  // Long Immediate
4733 4657  operand immL() %{
4734 4658    match(ConL);
4735 4659  
4736 4660    op_cost(20);
4737 4661    format %{ %}
4738 4662    interface(CONST_INTER);
4739 4663  %}
4740 4664  
4741 4665  // Long Immediate zero
4742 4666  operand immL0() %{
4743 4667    predicate( n->get_long() == 0L );
4744 4668    match(ConL);
4745 4669    op_cost(0);
4746 4670  
4747 4671    format %{ %}
4748 4672    interface(CONST_INTER);
4749 4673  %}
4750 4674  
4751 4675  // Long Immediate zero
4752 4676  operand immL_M1() %{
4753 4677    predicate( n->get_long() == -1L );
4754 4678    match(ConL);
4755 4679    op_cost(0);
4756 4680  
4757 4681    format %{ %}
4758 4682    interface(CONST_INTER);
4759 4683  %}
4760 4684  
4761 4685  // Long immediate from 0 to 127.
4762 4686  // Used for a shorter form of long mul by 10.
4763 4687  operand immL_127() %{
4764 4688    predicate((0 <= n->get_long()) && (n->get_long() <= 127));
4765 4689    match(ConL);
4766 4690    op_cost(0);
4767 4691  
4768 4692    format %{ %}
4769 4693    interface(CONST_INTER);
4770 4694  %}
4771 4695  
4772 4696  // Long Immediate: low 32-bit mask
4773 4697  operand immL_32bits() %{
4774 4698    predicate(n->get_long() == 0xFFFFFFFFL);
4775 4699    match(ConL);
4776 4700    op_cost(0);
4777 4701  
4778 4702    format %{ %}
4779 4703    interface(CONST_INTER);
4780 4704  %}
4781 4705  
4782 4706  // Long Immediate: low 32-bit mask
4783 4707  operand immL32() %{
4784 4708    predicate(n->get_long() == (int)(n->get_long()));
4785 4709    match(ConL);
4786 4710    op_cost(20);
4787 4711  
4788 4712    format %{ %}
4789 4713    interface(CONST_INTER);
4790 4714  %}
4791 4715  
4792 4716  //Double Immediate zero
4793 4717  operand immD0() %{

↓ open down ↓

2684 lines elided

↑ open up ↑

4794 4718    // Do additional (and counter-intuitive) test against NaN to work around VC++
4795 4719    // bug that generates code such that NaNs compare equal to 0.0
4796 4720    predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4797 4721    match(ConD);
4798 4722  
4799 4723    op_cost(5);
4800 4724    format %{ %}
4801 4725    interface(CONST_INTER);
4802 4726  %}
4803 4727  
4804      -// Double Immediate
     4728 +// Double Immediate one
4805 4729  operand immD1() %{
4806 4730    predicate( UseSSE<=1 && n->getd() == 1.0 );
4807 4731    match(ConD);
4808 4732  
4809 4733    op_cost(5);
4810 4734    format %{ %}
4811 4735    interface(CONST_INTER);
4812 4736  %}
4813 4737  
4814 4738  // Double Immediate

4815 4739  operand immD() %{
4816 4740    predicate(UseSSE<=1);
4817 4741    match(ConD);
4818 4742  
4819 4743    op_cost(5);
4820 4744    format %{ %}
4821 4745    interface(CONST_INTER);
4822 4746  %}
4823 4747  
4824 4748  operand immXD() %{
4825 4749    predicate(UseSSE>=2);
4826 4750    match(ConD);
4827 4751  
4828 4752    op_cost(5);
4829 4753    format %{ %}
4830 4754    interface(CONST_INTER);
4831 4755  %}
4832 4756  
4833 4757  // Double Immediate zero
4834 4758  operand immXD0() %{
4835 4759    // Do additional (and counter-intuitive) test against NaN to work around VC++
4836 4760    // bug that generates code such that NaNs compare equal to 0.0 AND do not

↓ open down ↓

22 lines elided

↑ open up ↑

4837 4761    // compare equal to -0.0.
4838 4762    predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4839 4763    match(ConD);
4840 4764  
4841 4765    format %{ %}
4842 4766    interface(CONST_INTER);
4843 4767  %}
4844 4768  
4845 4769  // Float Immediate zero
4846 4770  operand immF0() %{
4847      -  predicate( UseSSE == 0 && n->getf() == 0.0 );
     4771 +  predicate(UseSSE == 0 && n->getf() == 0.0F);
     4772 +  match(ConF);
     4773 +
     4774 +  op_cost(5);
     4775 +  format %{ %}
     4776 +  interface(CONST_INTER);
     4777 +%}
     4778 +
     4779 +// Float Immediate one
     4780 +operand immF1() %{
     4781 +  predicate(UseSSE == 0 && n->getf() == 1.0F);
4848 4782    match(ConF);
4849 4783  
4850 4784    op_cost(5);
4851 4785    format %{ %}
4852 4786    interface(CONST_INTER);
4853 4787  %}
4854 4788  
4855 4789  // Float Immediate
4856 4790  operand immF() %{
4857 4791    predicate( UseSSE == 0 );

4858 4792    match(ConF);
4859 4793  
4860 4794    op_cost(5);
4861 4795    format %{ %}
4862 4796    interface(CONST_INTER);
4863 4797  %}
4864 4798  
4865 4799  // Float Immediate
4866 4800  operand immXF() %{
4867 4801    predicate(UseSSE >= 1);
4868 4802    match(ConF);
4869 4803  
4870 4804    op_cost(5);
4871 4805    format %{ %}
4872 4806    interface(CONST_INTER);
4873 4807  %}
4874 4808  
4875 4809  // Float Immediate zero.  Zero and not -0.0
4876 4810  operand immXF0() %{
4877 4811    predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4878 4812    match(ConF);
4879 4813  
4880 4814    op_cost(5);
4881 4815    format %{ %}
4882 4816    interface(CONST_INTER);
4883 4817  %}
4884 4818  
4885 4819  // Immediates for special shifts (sign extend)
4886 4820  
4887 4821  // Constants for increment
4888 4822  operand immI_16() %{
4889 4823    predicate( n->get_int() == 16 );
4890 4824    match(ConI);
4891 4825  
4892 4826    format %{ %}
4893 4827    interface(CONST_INTER);
4894 4828  %}
4895 4829  
4896 4830  operand immI_24() %{
4897 4831    predicate( n->get_int() == 24 );
4898 4832    match(ConI);
4899 4833  
4900 4834    format %{ %}
4901 4835    interface(CONST_INTER);
4902 4836  %}
4903 4837  
4904 4838  // Constant for byte-wide masking
4905 4839  operand immI_255() %{
4906 4840    predicate( n->get_int() == 255 );
4907 4841    match(ConI);
4908 4842  
4909 4843    format %{ %}
4910 4844    interface(CONST_INTER);
4911 4845  %}
4912 4846  
4913 4847  // Constant for short-wide masking
4914 4848  operand immI_65535() %{
4915 4849    predicate(n->get_int() == 65535);
4916 4850    match(ConI);
4917 4851  
4918 4852    format %{ %}
4919 4853    interface(CONST_INTER);
4920 4854  %}
4921 4855  
4922 4856  // Register Operands
4923 4857  // Integer Register
4924 4858  operand eRegI() %{
4925 4859    constraint(ALLOC_IN_RC(e_reg));
4926 4860    match(RegI);
4927 4861    match(xRegI);
4928 4862    match(eAXRegI);
4929 4863    match(eBXRegI);
4930 4864    match(eCXRegI);
4931 4865    match(eDXRegI);
4932 4866    match(eDIRegI);
4933 4867    match(eSIRegI);
4934 4868  
4935 4869    format %{ %}
4936 4870    interface(REG_INTER);
4937 4871  %}
4938 4872  
4939 4873  // Subset of Integer Register
4940 4874  operand xRegI(eRegI reg) %{
4941 4875    constraint(ALLOC_IN_RC(x_reg));
4942 4876    match(reg);
4943 4877    match(eAXRegI);
4944 4878    match(eBXRegI);
4945 4879    match(eCXRegI);
4946 4880    match(eDXRegI);
4947 4881  
4948 4882    format %{ %}
4949 4883    interface(REG_INTER);
4950 4884  %}
4951 4885  
4952 4886  // Special Registers
4953 4887  operand eAXRegI(xRegI reg) %{
4954 4888    constraint(ALLOC_IN_RC(eax_reg));
4955 4889    match(reg);
4956 4890    match(eRegI);
4957 4891  
4958 4892    format %{ "EAX" %}
4959 4893    interface(REG_INTER);
4960 4894  %}
4961 4895  
4962 4896  // Special Registers
4963 4897  operand eBXRegI(xRegI reg) %{
4964 4898    constraint(ALLOC_IN_RC(ebx_reg));
4965 4899    match(reg);
4966 4900    match(eRegI);
4967 4901  
4968 4902    format %{ "EBX" %}
4969 4903    interface(REG_INTER);
4970 4904  %}
4971 4905  
4972 4906  operand eCXRegI(xRegI reg) %{
4973 4907    constraint(ALLOC_IN_RC(ecx_reg));
4974 4908    match(reg);
4975 4909    match(eRegI);
4976 4910  
4977 4911    format %{ "ECX" %}
4978 4912    interface(REG_INTER);
4979 4913  %}
4980 4914  
4981 4915  operand eDXRegI(xRegI reg) %{
4982 4916    constraint(ALLOC_IN_RC(edx_reg));
4983 4917    match(reg);
4984 4918    match(eRegI);
4985 4919  
4986 4920    format %{ "EDX" %}
4987 4921    interface(REG_INTER);
4988 4922  %}
4989 4923  
4990 4924  operand eDIRegI(xRegI reg) %{
4991 4925    constraint(ALLOC_IN_RC(edi_reg));
4992 4926    match(reg);
4993 4927    match(eRegI);
4994 4928  
4995 4929    format %{ "EDI" %}
4996 4930    interface(REG_INTER);
4997 4931  %}
4998 4932  
4999 4933  operand naxRegI() %{
5000 4934    constraint(ALLOC_IN_RC(nax_reg));
5001 4935    match(RegI);
5002 4936    match(eCXRegI);
5003 4937    match(eDXRegI);
5004 4938    match(eSIRegI);
5005 4939    match(eDIRegI);
5006 4940  
5007 4941    format %{ %}
5008 4942    interface(REG_INTER);
5009 4943  %}
5010 4944  
5011 4945  operand nadxRegI() %{
5012 4946    constraint(ALLOC_IN_RC(nadx_reg));
5013 4947    match(RegI);
5014 4948    match(eBXRegI);
5015 4949    match(eCXRegI);
5016 4950    match(eSIRegI);
5017 4951    match(eDIRegI);
5018 4952  
5019 4953    format %{ %}
5020 4954    interface(REG_INTER);
5021 4955  %}
5022 4956  
5023 4957  operand ncxRegI() %{
5024 4958    constraint(ALLOC_IN_RC(ncx_reg));
5025 4959    match(RegI);
5026 4960    match(eAXRegI);
5027 4961    match(eDXRegI);
5028 4962    match(eSIRegI);
5029 4963    match(eDIRegI);
5030 4964  
5031 4965    format %{ %}
5032 4966    interface(REG_INTER);
5033 4967  %}
5034 4968  
5035 4969  // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
5036 4970  // //
5037 4971  operand eSIRegI(xRegI reg) %{
5038 4972     constraint(ALLOC_IN_RC(esi_reg));
5039 4973     match(reg);
5040 4974     match(eRegI);
5041 4975  
5042 4976     format %{ "ESI" %}
5043 4977     interface(REG_INTER);
5044 4978  %}
5045 4979  
5046 4980  // Pointer Register
5047 4981  operand anyRegP() %{
5048 4982    constraint(ALLOC_IN_RC(any_reg));
5049 4983    match(RegP);
5050 4984    match(eAXRegP);
5051 4985    match(eBXRegP);
5052 4986    match(eCXRegP);
5053 4987    match(eDIRegP);
5054 4988    match(eRegP);
5055 4989  
5056 4990    format %{ %}
5057 4991    interface(REG_INTER);
5058 4992  %}
5059 4993  
5060 4994  operand eRegP() %{
5061 4995    constraint(ALLOC_IN_RC(e_reg));
5062 4996    match(RegP);
5063 4997    match(eAXRegP);
5064 4998    match(eBXRegP);
5065 4999    match(eCXRegP);
5066 5000    match(eDIRegP);
5067 5001  
5068 5002    format %{ %}
5069 5003    interface(REG_INTER);
5070 5004  %}
5071 5005  
5072 5006  // On windows95, EBP is not safe to use for implicit null tests.
5073 5007  operand eRegP_no_EBP() %{
5074 5008    constraint(ALLOC_IN_RC(e_reg_no_rbp));
5075 5009    match(RegP);
5076 5010    match(eAXRegP);
5077 5011    match(eBXRegP);
5078 5012    match(eCXRegP);
5079 5013    match(eDIRegP);
5080 5014  
5081 5015    op_cost(100);
5082 5016    format %{ %}
5083 5017    interface(REG_INTER);
5084 5018  %}
5085 5019  
5086 5020  operand naxRegP() %{
5087 5021    constraint(ALLOC_IN_RC(nax_reg));
5088 5022    match(RegP);
5089 5023    match(eBXRegP);
5090 5024    match(eDXRegP);
5091 5025    match(eCXRegP);
5092 5026    match(eSIRegP);
5093 5027    match(eDIRegP);
5094 5028  
5095 5029    format %{ %}
5096 5030    interface(REG_INTER);
5097 5031  %}
5098 5032  
5099 5033  operand nabxRegP() %{
5100 5034    constraint(ALLOC_IN_RC(nabx_reg));
5101 5035    match(RegP);
5102 5036    match(eCXRegP);
5103 5037    match(eDXRegP);
5104 5038    match(eSIRegP);
5105 5039    match(eDIRegP);
5106 5040  
5107 5041    format %{ %}
5108 5042    interface(REG_INTER);
5109 5043  %}
5110 5044  
5111 5045  operand pRegP() %{
5112 5046    constraint(ALLOC_IN_RC(p_reg));
5113 5047    match(RegP);
5114 5048    match(eBXRegP);
5115 5049    match(eDXRegP);
5116 5050    match(eSIRegP);
5117 5051    match(eDIRegP);
5118 5052  
5119 5053    format %{ %}
5120 5054    interface(REG_INTER);
5121 5055  %}
5122 5056  
5123 5057  // Special Registers
5124 5058  // Return a pointer value
5125 5059  operand eAXRegP(eRegP reg) %{
5126 5060    constraint(ALLOC_IN_RC(eax_reg));
5127 5061    match(reg);
5128 5062    format %{ "EAX" %}
5129 5063    interface(REG_INTER);
5130 5064  %}
5131 5065  
5132 5066  // Used in AtomicAdd
5133 5067  operand eBXRegP(eRegP reg) %{
5134 5068    constraint(ALLOC_IN_RC(ebx_reg));
5135 5069    match(reg);
5136 5070    format %{ "EBX" %}
5137 5071    interface(REG_INTER);
5138 5072  %}
5139 5073  
5140 5074  // Tail-call (interprocedural jump) to interpreter
5141 5075  operand eCXRegP(eRegP reg) %{
5142 5076    constraint(ALLOC_IN_RC(ecx_reg));
5143 5077    match(reg);
5144 5078    format %{ "ECX" %}
5145 5079    interface(REG_INTER);
5146 5080  %}
5147 5081  
5148 5082  operand eSIRegP(eRegP reg) %{
5149 5083    constraint(ALLOC_IN_RC(esi_reg));
5150 5084    match(reg);
5151 5085    format %{ "ESI" %}
5152 5086    interface(REG_INTER);
5153 5087  %}
5154 5088  
5155 5089  // Used in rep stosw
5156 5090  operand eDIRegP(eRegP reg) %{
5157 5091    constraint(ALLOC_IN_RC(edi_reg));
5158 5092    match(reg);
5159 5093    format %{ "EDI" %}
5160 5094    interface(REG_INTER);
5161 5095  %}
5162 5096  
5163 5097  operand eBPRegP() %{
5164 5098    constraint(ALLOC_IN_RC(ebp_reg));
5165 5099    match(RegP);
5166 5100    format %{ "EBP" %}
5167 5101    interface(REG_INTER);
5168 5102  %}
5169 5103  
5170 5104  operand eRegL() %{
5171 5105    constraint(ALLOC_IN_RC(long_reg));
5172 5106    match(RegL);
5173 5107    match(eADXRegL);
5174 5108  
5175 5109    format %{ %}
5176 5110    interface(REG_INTER);
5177 5111  %}
5178 5112  
5179 5113  operand eADXRegL( eRegL reg ) %{
5180 5114    constraint(ALLOC_IN_RC(eadx_reg));
5181 5115    match(reg);
5182 5116  
5183 5117    format %{ "EDX:EAX" %}
5184 5118    interface(REG_INTER);
5185 5119  %}
5186 5120  
5187 5121  operand eBCXRegL( eRegL reg ) %{
5188 5122    constraint(ALLOC_IN_RC(ebcx_reg));
5189 5123    match(reg);
5190 5124  
5191 5125    format %{ "EBX:ECX" %}
5192 5126    interface(REG_INTER);
5193 5127  %}
5194 5128  
5195 5129  // Special case for integer high multiply
5196 5130  operand eADXRegL_low_only() %{
5197 5131    constraint(ALLOC_IN_RC(eadx_reg));
5198 5132    match(RegL);
5199 5133  
5200 5134    format %{ "EAX" %}
5201 5135    interface(REG_INTER);
5202 5136  %}
5203 5137  
5204 5138  // Flags register, used as output of compare instructions
5205 5139  operand eFlagsReg() %{
5206 5140    constraint(ALLOC_IN_RC(int_flags));
5207 5141    match(RegFlags);
5208 5142  
5209 5143    format %{ "EFLAGS" %}
5210 5144    interface(REG_INTER);
5211 5145  %}
5212 5146  
5213 5147  // Flags register, used as output of FLOATING POINT compare instructions
5214 5148  operand eFlagsRegU() %{
5215 5149    constraint(ALLOC_IN_RC(int_flags));
5216 5150    match(RegFlags);
5217 5151  
5218 5152    format %{ "EFLAGS_U" %}
5219 5153    interface(REG_INTER);
5220 5154  %}
5221 5155  
5222 5156  operand eFlagsRegUCF() %{
5223 5157    constraint(ALLOC_IN_RC(int_flags));
5224 5158    match(RegFlags);
5225 5159    predicate(false);
5226 5160  
5227 5161    format %{ "EFLAGS_U_CF" %}
5228 5162    interface(REG_INTER);
5229 5163  %}
5230 5164  
5231 5165  // Condition Code Register used by long compare
5232 5166  operand flagsReg_long_LTGE() %{
5233 5167    constraint(ALLOC_IN_RC(int_flags));
5234 5168    match(RegFlags);
5235 5169    format %{ "FLAGS_LTGE" %}
5236 5170    interface(REG_INTER);
5237 5171  %}
5238 5172  operand flagsReg_long_EQNE() %{
5239 5173    constraint(ALLOC_IN_RC(int_flags));
5240 5174    match(RegFlags);
5241 5175    format %{ "FLAGS_EQNE" %}
5242 5176    interface(REG_INTER);
5243 5177  %}
5244 5178  operand flagsReg_long_LEGT() %{
5245 5179    constraint(ALLOC_IN_RC(int_flags));
5246 5180    match(RegFlags);
5247 5181    format %{ "FLAGS_LEGT" %}
5248 5182    interface(REG_INTER);
5249 5183  %}
5250 5184  
5251 5185  // Float register operands
5252 5186  operand regD() %{
5253 5187    predicate( UseSSE < 2 );
5254 5188    constraint(ALLOC_IN_RC(dbl_reg));
5255 5189    match(RegD);
5256 5190    match(regDPR1);
5257 5191    match(regDPR2);
5258 5192    format %{ %}
5259 5193    interface(REG_INTER);
5260 5194  %}
5261 5195  
5262 5196  operand regDPR1(regD reg) %{
5263 5197    predicate( UseSSE < 2 );
5264 5198    constraint(ALLOC_IN_RC(dbl_reg0));
5265 5199    match(reg);
5266 5200    format %{ "FPR1" %}
5267 5201    interface(REG_INTER);
5268 5202  %}
5269 5203  
5270 5204  operand regDPR2(regD reg) %{
5271 5205    predicate( UseSSE < 2 );
5272 5206    constraint(ALLOC_IN_RC(dbl_reg1));
5273 5207    match(reg);
5274 5208    format %{ "FPR2" %}
5275 5209    interface(REG_INTER);
5276 5210  %}
5277 5211  
5278 5212  operand regnotDPR1(regD reg) %{
5279 5213    predicate( UseSSE < 2 );
5280 5214    constraint(ALLOC_IN_RC(dbl_notreg0));
5281 5215    match(reg);
5282 5216    format %{ %}
5283 5217    interface(REG_INTER);
5284 5218  %}
5285 5219  
5286 5220  // XMM Double register operands
5287 5221  operand regXD() %{
5288 5222    predicate( UseSSE>=2 );
5289 5223    constraint(ALLOC_IN_RC(xdb_reg));
5290 5224    match(RegD);
5291 5225    match(regXD6);
5292 5226    match(regXD7);
5293 5227    format %{ %}
5294 5228    interface(REG_INTER);
5295 5229  %}
5296 5230  
5297 5231  // XMM6 double register operands
5298 5232  operand regXD6(regXD reg) %{
5299 5233    predicate( UseSSE>=2 );
5300 5234    constraint(ALLOC_IN_RC(xdb_reg6));
5301 5235    match(reg);
5302 5236    format %{ "XMM6" %}
5303 5237    interface(REG_INTER);
5304 5238  %}
5305 5239  
5306 5240  // XMM7 double register operands
5307 5241  operand regXD7(regXD reg) %{
5308 5242    predicate( UseSSE>=2 );
5309 5243    constraint(ALLOC_IN_RC(xdb_reg7));
5310 5244    match(reg);
5311 5245    format %{ "XMM7" %}
5312 5246    interface(REG_INTER);
5313 5247  %}
5314 5248  
5315 5249  // Float register operands
5316 5250  operand regF() %{
5317 5251    predicate( UseSSE < 2 );
5318 5252    constraint(ALLOC_IN_RC(flt_reg));
5319 5253    match(RegF);
5320 5254    match(regFPR1);
5321 5255    format %{ %}
5322 5256    interface(REG_INTER);
5323 5257  %}
5324 5258  
5325 5259  // Float register operands
5326 5260  operand regFPR1(regF reg) %{
5327 5261    predicate( UseSSE < 2 );
5328 5262    constraint(ALLOC_IN_RC(flt_reg0));
5329 5263    match(reg);
5330 5264    format %{ "FPR1" %}
5331 5265    interface(REG_INTER);
5332 5266  %}
5333 5267  
5334 5268  // XMM register operands
5335 5269  operand regX() %{
5336 5270    predicate( UseSSE>=1 );
5337 5271    constraint(ALLOC_IN_RC(xmm_reg));
5338 5272    match(RegF);
5339 5273    format %{ %}
5340 5274    interface(REG_INTER);
5341 5275  %}
5342 5276  
5343 5277  
5344 5278  //----------Memory Operands----------------------------------------------------
5345 5279  // Direct Memory Operand
5346 5280  operand direct(immP addr) %{
5347 5281    match(addr);
5348 5282  
5349 5283    format %{ "[$addr]" %}
5350 5284    interface(MEMORY_INTER) %{
5351 5285      base(0xFFFFFFFF);
5352 5286      index(0x4);
5353 5287      scale(0x0);
5354 5288      disp($addr);
5355 5289    %}
5356 5290  %}
5357 5291  
5358 5292  // Indirect Memory Operand
5359 5293  operand indirect(eRegP reg) %{
5360 5294    constraint(ALLOC_IN_RC(e_reg));
5361 5295    match(reg);
5362 5296  
5363 5297    format %{ "[$reg]" %}
5364 5298    interface(MEMORY_INTER) %{
5365 5299      base($reg);
5366 5300      index(0x4);
5367 5301      scale(0x0);
5368 5302      disp(0x0);
5369 5303    %}
5370 5304  %}
5371 5305  
5372 5306  // Indirect Memory Plus Short Offset Operand
5373 5307  operand indOffset8(eRegP reg, immI8 off) %{
5374 5308    match(AddP reg off);
5375 5309  
5376 5310    format %{ "[$reg + $off]" %}
5377 5311    interface(MEMORY_INTER) %{
5378 5312      base($reg);
5379 5313      index(0x4);
5380 5314      scale(0x0);
5381 5315      disp($off);
5382 5316    %}
5383 5317  %}
5384 5318  
5385 5319  // Indirect Memory Plus Long Offset Operand
5386 5320  operand indOffset32(eRegP reg, immI off) %{
5387 5321    match(AddP reg off);
5388 5322  
5389 5323    format %{ "[$reg + $off]" %}
5390 5324    interface(MEMORY_INTER) %{
5391 5325      base($reg);
5392 5326      index(0x4);
5393 5327      scale(0x0);
5394 5328      disp($off);
5395 5329    %}
5396 5330  %}
5397 5331  
5398 5332  // Indirect Memory Plus Long Offset Operand
5399 5333  operand indOffset32X(eRegI reg, immP off) %{
5400 5334    match(AddP off reg);
5401 5335  
5402 5336    format %{ "[$reg + $off]" %}
5403 5337    interface(MEMORY_INTER) %{
5404 5338      base($reg);
5405 5339      index(0x4);
5406 5340      scale(0x0);
5407 5341      disp($off);
5408 5342    %}
5409 5343  %}
5410 5344  
5411 5345  // Indirect Memory Plus Index Register Plus Offset Operand
5412 5346  operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
5413 5347    match(AddP (AddP reg ireg) off);
5414 5348  
5415 5349    op_cost(10);
5416 5350    format %{"[$reg + $off + $ireg]" %}
5417 5351    interface(MEMORY_INTER) %{
5418 5352      base($reg);
5419 5353      index($ireg);
5420 5354      scale(0x0);
5421 5355      disp($off);
5422 5356    %}
5423 5357  %}
5424 5358  
5425 5359  // Indirect Memory Plus Index Register Plus Offset Operand
5426 5360  operand indIndex(eRegP reg, eRegI ireg) %{
5427 5361    match(AddP reg ireg);
5428 5362  
5429 5363    op_cost(10);
5430 5364    format %{"[$reg + $ireg]" %}
5431 5365    interface(MEMORY_INTER) %{
5432 5366      base($reg);
5433 5367      index($ireg);
5434 5368      scale(0x0);
5435 5369      disp(0x0);
5436 5370    %}
5437 5371  %}
5438 5372  
5439 5373  // // -------------------------------------------------------------------------
5440 5374  // // 486 architecture doesn't support "scale * index + offset" with out a base
5441 5375  // // -------------------------------------------------------------------------
5442 5376  // // Scaled Memory Operands
5443 5377  // // Indirect Memory Times Scale Plus Offset Operand
5444 5378  // operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
5445 5379  //   match(AddP off (LShiftI ireg scale));
5446 5380  //
5447 5381  //   op_cost(10);
5448 5382  //   format %{"[$off + $ireg << $scale]" %}
5449 5383  //   interface(MEMORY_INTER) %{
5450 5384  //     base(0x4);
5451 5385  //     index($ireg);
5452 5386  //     scale($scale);
5453 5387  //     disp($off);
5454 5388  //   %}
5455 5389  // %}
5456 5390  
5457 5391  // Indirect Memory Times Scale Plus Index Register
5458 5392  operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
5459 5393    match(AddP reg (LShiftI ireg scale));
5460 5394  
5461 5395    op_cost(10);
5462 5396    format %{"[$reg + $ireg << $scale]" %}
5463 5397    interface(MEMORY_INTER) %{
5464 5398      base($reg);
5465 5399      index($ireg);
5466 5400      scale($scale);
5467 5401      disp(0x0);
5468 5402    %}
5469 5403  %}
5470 5404  
5471 5405  // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5472 5406  operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
5473 5407    match(AddP (AddP reg (LShiftI ireg scale)) off);
5474 5408  
5475 5409    op_cost(10);
5476 5410    format %{"[$reg + $off + $ireg << $scale]" %}
5477 5411    interface(MEMORY_INTER) %{
5478 5412      base($reg);
5479 5413      index($ireg);
5480 5414      scale($scale);
5481 5415      disp($off);
5482 5416    %}
5483 5417  %}
5484 5418  
5485 5419  //----------Load Long Memory Operands------------------------------------------
5486 5420  // The load-long idiom will use it's address expression again after loading
5487 5421  // the first word of the long.  If the load-long destination overlaps with
5488 5422  // registers used in the addressing expression, the 2nd half will be loaded
5489 5423  // from a clobbered address.  Fix this by requiring that load-long use
5490 5424  // address registers that do not overlap with the load-long target.
5491 5425  
5492 5426  // load-long support
5493 5427  operand load_long_RegP() %{
5494 5428    constraint(ALLOC_IN_RC(esi_reg));
5495 5429    match(RegP);
5496 5430    match(eSIRegP);
5497 5431    op_cost(100);
5498 5432    format %{  %}
5499 5433    interface(REG_INTER);
5500 5434  %}
5501 5435  
5502 5436  // Indirect Memory Operand Long
5503 5437  operand load_long_indirect(load_long_RegP reg) %{
5504 5438    constraint(ALLOC_IN_RC(esi_reg));
5505 5439    match(reg);
5506 5440  
5507 5441    format %{ "[$reg]" %}
5508 5442    interface(MEMORY_INTER) %{
5509 5443      base($reg);
5510 5444      index(0x4);
5511 5445      scale(0x0);
5512 5446      disp(0x0);
5513 5447    %}
5514 5448  %}
5515 5449  
5516 5450  // Indirect Memory Plus Long Offset Operand
5517 5451  operand load_long_indOffset32(load_long_RegP reg, immI off) %{
5518 5452    match(AddP reg off);
5519 5453  
5520 5454    format %{ "[$reg + $off]" %}
5521 5455    interface(MEMORY_INTER) %{
5522 5456      base($reg);
5523 5457      index(0x4);
5524 5458      scale(0x0);
5525 5459      disp($off);
5526 5460    %}
5527 5461  %}
5528 5462  
5529 5463  opclass load_long_memory(load_long_indirect, load_long_indOffset32);
5530 5464  
5531 5465  
5532 5466  //----------Special Memory Operands--------------------------------------------
5533 5467  // Stack Slot Operand - This operand is used for loading and storing temporary
5534 5468  //                      values on the stack where a match requires a value to
5535 5469  //                      flow through memory.
5536 5470  operand stackSlotP(sRegP reg) %{
5537 5471    constraint(ALLOC_IN_RC(stack_slots));
5538 5472    // No match rule because this operand is only generated in matching
5539 5473    format %{ "[$reg]" %}
5540 5474    interface(MEMORY_INTER) %{
5541 5475      base(0x4);   // ESP
5542 5476      index(0x4);  // No Index
5543 5477      scale(0x0);  // No Scale
5544 5478      disp($reg);  // Stack Offset
5545 5479    %}
5546 5480  %}
5547 5481  
5548 5482  operand stackSlotI(sRegI reg) %{
5549 5483    constraint(ALLOC_IN_RC(stack_slots));
5550 5484    // No match rule because this operand is only generated in matching
5551 5485    format %{ "[$reg]" %}
5552 5486    interface(MEMORY_INTER) %{
5553 5487      base(0x4);   // ESP
5554 5488      index(0x4);  // No Index
5555 5489      scale(0x0);  // No Scale
5556 5490      disp($reg);  // Stack Offset
5557 5491    %}
5558 5492  %}
5559 5493  
5560 5494  operand stackSlotF(sRegF reg) %{
5561 5495    constraint(ALLOC_IN_RC(stack_slots));
5562 5496    // No match rule because this operand is only generated in matching
5563 5497    format %{ "[$reg]" %}
5564 5498    interface(MEMORY_INTER) %{
5565 5499      base(0x4);   // ESP
5566 5500      index(0x4);  // No Index
5567 5501      scale(0x0);  // No Scale
5568 5502      disp($reg);  // Stack Offset
5569 5503    %}
5570 5504  %}
5571 5505  
5572 5506  operand stackSlotD(sRegD reg) %{
5573 5507    constraint(ALLOC_IN_RC(stack_slots));
5574 5508    // No match rule because this operand is only generated in matching
5575 5509    format %{ "[$reg]" %}
5576 5510    interface(MEMORY_INTER) %{
5577 5511      base(0x4);   // ESP
5578 5512      index(0x4);  // No Index
5579 5513      scale(0x0);  // No Scale
5580 5514      disp($reg);  // Stack Offset
5581 5515    %}
5582 5516  %}
5583 5517  
5584 5518  operand stackSlotL(sRegL reg) %{
5585 5519    constraint(ALLOC_IN_RC(stack_slots));
5586 5520    // No match rule because this operand is only generated in matching
5587 5521    format %{ "[$reg]" %}
5588 5522    interface(MEMORY_INTER) %{
5589 5523      base(0x4);   // ESP
5590 5524      index(0x4);  // No Index
5591 5525      scale(0x0);  // No Scale
5592 5526      disp($reg);  // Stack Offset
5593 5527    %}
5594 5528  %}
5595 5529  
5596 5530  //----------Memory Operands - Win95 Implicit Null Variants----------------
5597 5531  // Indirect Memory Operand
5598 5532  operand indirect_win95_safe(eRegP_no_EBP reg)
5599 5533  %{
5600 5534    constraint(ALLOC_IN_RC(e_reg));
5601 5535    match(reg);
5602 5536  
5603 5537    op_cost(100);
5604 5538    format %{ "[$reg]" %}
5605 5539    interface(MEMORY_INTER) %{
5606 5540      base($reg);
5607 5541      index(0x4);
5608 5542      scale(0x0);
5609 5543      disp(0x0);
5610 5544    %}
5611 5545  %}
5612 5546  
5613 5547  // Indirect Memory Plus Short Offset Operand
5614 5548  operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
5615 5549  %{
5616 5550    match(AddP reg off);
5617 5551  
5618 5552    op_cost(100);
5619 5553    format %{ "[$reg + $off]" %}
5620 5554    interface(MEMORY_INTER) %{
5621 5555      base($reg);
5622 5556      index(0x4);
5623 5557      scale(0x0);
5624 5558      disp($off);
5625 5559    %}
5626 5560  %}
5627 5561  
5628 5562  // Indirect Memory Plus Long Offset Operand
5629 5563  operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
5630 5564  %{
5631 5565    match(AddP reg off);
5632 5566  
5633 5567    op_cost(100);
5634 5568    format %{ "[$reg + $off]" %}
5635 5569    interface(MEMORY_INTER) %{
5636 5570      base($reg);
5637 5571      index(0x4);
5638 5572      scale(0x0);
5639 5573      disp($off);
5640 5574    %}
5641 5575  %}
5642 5576  
5643 5577  // Indirect Memory Plus Index Register Plus Offset Operand
5644 5578  operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
5645 5579  %{
5646 5580    match(AddP (AddP reg ireg) off);
5647 5581  
5648 5582    op_cost(100);
5649 5583    format %{"[$reg + $off + $ireg]" %}
5650 5584    interface(MEMORY_INTER) %{
5651 5585      base($reg);
5652 5586      index($ireg);
5653 5587      scale(0x0);
5654 5588      disp($off);
5655 5589    %}
5656 5590  %}
5657 5591  
5658 5592  // Indirect Memory Times Scale Plus Index Register
5659 5593  operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
5660 5594  %{
5661 5595    match(AddP reg (LShiftI ireg scale));
5662 5596  
5663 5597    op_cost(100);
5664 5598    format %{"[$reg + $ireg << $scale]" %}
5665 5599    interface(MEMORY_INTER) %{
5666 5600      base($reg);
5667 5601      index($ireg);
5668 5602      scale($scale);
5669 5603      disp(0x0);
5670 5604    %}
5671 5605  %}
5672 5606  
5673 5607  // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5674 5608  operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
5675 5609  %{
5676 5610    match(AddP (AddP reg (LShiftI ireg scale)) off);
5677 5611  
5678 5612    op_cost(100);
5679 5613    format %{"[$reg + $off + $ireg << $scale]" %}
5680 5614    interface(MEMORY_INTER) %{
5681 5615      base($reg);
5682 5616      index($ireg);
5683 5617      scale($scale);
5684 5618      disp($off);
5685 5619    %}
5686 5620  %}
5687 5621  
5688 5622  //----------Conditional Branch Operands----------------------------------------
5689 5623  // Comparison Op  - This is the operation of the comparison, and is limited to
5690 5624  //                  the following set of codes:
5691 5625  //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5692 5626  //
5693 5627  // Other attributes of the comparison, such as unsignedness, are specified
5694 5628  // by the comparison instruction that sets a condition code flags register.
5695 5629  // That result is represented by a flags operand whose subtype is appropriate
5696 5630  // to the unsignedness (etc.) of the comparison.
5697 5631  //
5698 5632  // Later, the instruction which matches both the Comparison Op (a Bool) and
5699 5633  // the flags (produced by the Cmp) specifies the coding of the comparison op
5700 5634  // by matching a specific subtype of Bool operand below, such as cmpOpU.
5701 5635  
5702 5636  // Comparision Code
5703 5637  operand cmpOp() %{
5704 5638    match(Bool);
5705 5639  
5706 5640    format %{ "" %}
5707 5641    interface(COND_INTER) %{
5708 5642      equal(0x4, "e");
5709 5643      not_equal(0x5, "ne");
5710 5644      less(0xC, "l");
5711 5645      greater_equal(0xD, "ge");
5712 5646      less_equal(0xE, "le");
5713 5647      greater(0xF, "g");
5714 5648    %}
5715 5649  %}
5716 5650  
5717 5651  // Comparison Code, unsigned compare.  Used by FP also, with
5718 5652  // C2 (unordered) turned into GT or LT already.  The other bits
5719 5653  // C0 and C3 are turned into Carry & Zero flags.
5720 5654  operand cmpOpU() %{
5721 5655    match(Bool);
5722 5656  
5723 5657    format %{ "" %}
5724 5658    interface(COND_INTER) %{
5725 5659      equal(0x4, "e");
5726 5660      not_equal(0x5, "ne");
5727 5661      less(0x2, "b");
5728 5662      greater_equal(0x3, "nb");
5729 5663      less_equal(0x6, "be");
5730 5664      greater(0x7, "nbe");
5731 5665    %}
5732 5666  %}
5733 5667  
5734 5668  // Floating comparisons that don't require any fixup for the unordered case
5735 5669  operand cmpOpUCF() %{
5736 5670    match(Bool);
5737 5671    predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5738 5672              n->as_Bool()->_test._test == BoolTest::ge ||
5739 5673              n->as_Bool()->_test._test == BoolTest::le ||
5740 5674              n->as_Bool()->_test._test == BoolTest::gt);
5741 5675    format %{ "" %}
5742 5676    interface(COND_INTER) %{
5743 5677      equal(0x4, "e");
5744 5678      not_equal(0x5, "ne");
5745 5679      less(0x2, "b");
5746 5680      greater_equal(0x3, "nb");
5747 5681      less_equal(0x6, "be");
5748 5682      greater(0x7, "nbe");
5749 5683    %}
5750 5684  %}
5751 5685  
5752 5686  
5753 5687  // Floating comparisons that can be fixed up with extra conditional jumps
5754 5688  operand cmpOpUCF2() %{
5755 5689    match(Bool);
5756 5690    predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5757 5691              n->as_Bool()->_test._test == BoolTest::eq);
5758 5692    format %{ "" %}
5759 5693    interface(COND_INTER) %{
5760 5694      equal(0x4, "e");
5761 5695      not_equal(0x5, "ne");
5762 5696      less(0x2, "b");
5763 5697      greater_equal(0x3, "nb");
5764 5698      less_equal(0x6, "be");
5765 5699      greater(0x7, "nbe");
5766 5700    %}
5767 5701  %}
5768 5702  
5769 5703  // Comparison Code for FP conditional move
5770 5704  operand cmpOp_fcmov() %{
5771 5705    match(Bool);
5772 5706  
5773 5707    format %{ "" %}
5774 5708    interface(COND_INTER) %{
5775 5709      equal        (0x0C8);
5776 5710      not_equal    (0x1C8);
5777 5711      less         (0x0C0);
5778 5712      greater_equal(0x1C0);
5779 5713      less_equal   (0x0D0);
5780 5714      greater      (0x1D0);
5781 5715    %}
5782 5716  %}
5783 5717  
5784 5718  // Comparision Code used in long compares
5785 5719  operand cmpOp_commute() %{
5786 5720    match(Bool);
5787 5721  
5788 5722    format %{ "" %}
5789 5723    interface(COND_INTER) %{
5790 5724      equal(0x4, "e");
5791 5725      not_equal(0x5, "ne");
5792 5726      less(0xF, "g");
5793 5727      greater_equal(0xE, "le");
5794 5728      less_equal(0xD, "ge");
5795 5729      greater(0xC, "l");
5796 5730    %}
5797 5731  %}
5798 5732  
5799 5733  //----------OPERAND CLASSES----------------------------------------------------
5800 5734  // Operand Classes are groups of operands that are used as to simplify
5801 5735  // instruction definitions by not requiring the AD writer to specify separate
5802 5736  // instructions for every form of operand when the instruction accepts
5803 5737  // multiple operand types with the same basic encoding and format.  The classic
5804 5738  // case of this is memory operands.
5805 5739  
5806 5740  opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
5807 5741                 indIndex, indIndexScale, indIndexScaleOffset);
5808 5742  
5809 5743  // Long memory operations are encoded in 2 instructions and a +4 offset.
5810 5744  // This means some kind of offset is always required and you cannot use
5811 5745  // an oop as the offset (done when working on static globals).
5812 5746  opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
5813 5747                      indIndex, indIndexScale, indIndexScaleOffset);
5814 5748  
5815 5749  
5816 5750  //----------PIPELINE-----------------------------------------------------------
5817 5751  // Rules which define the behavior of the target architectures pipeline.
5818 5752  pipeline %{
5819 5753  
5820 5754  //----------ATTRIBUTES---------------------------------------------------------
5821 5755  attributes %{
5822 5756    variable_size_instructions;        // Fixed size instructions
5823 5757    max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5824 5758    instruction_unit_size = 1;         // An instruction is 1 bytes long
5825 5759    instruction_fetch_unit_size = 16;  // The processor fetches one line
5826 5760    instruction_fetch_units = 1;       // of 16 bytes
5827 5761  
5828 5762    // List of nop instructions
5829 5763    nops( MachNop );
5830 5764  %}
5831 5765  
5832 5766  //----------RESOURCES----------------------------------------------------------
5833 5767  // Resources are the functional units available to the machine
5834 5768  
5835 5769  // Generic P2/P3 pipeline
5836 5770  // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5837 5771  // 3 instructions decoded per cycle.
5838 5772  // 2 load/store ops per cycle, 1 branch, 1 FPU,
5839 5773  // 2 ALU op, only ALU0 handles mul/div instructions.
5840 5774  resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5841 5775             MS0, MS1, MEM = MS0 | MS1,
5842 5776             BR, FPU,
5843 5777             ALU0, ALU1, ALU = ALU0 | ALU1 );
5844 5778  
5845 5779  //----------PIPELINE DESCRIPTION-----------------------------------------------
5846 5780  // Pipeline Description specifies the stages in the machine's pipeline
5847 5781  
5848 5782  // Generic P2/P3 pipeline
5849 5783  pipe_desc(S0, S1, S2, S3, S4, S5);
5850 5784  
5851 5785  //----------PIPELINE CLASSES---------------------------------------------------
5852 5786  // Pipeline Classes describe the stages in which input and output are
5853 5787  // referenced by the hardware pipeline.
5854 5788  
5855 5789  // Naming convention: ialu or fpu
5856 5790  // Then: _reg
5857 5791  // Then: _reg if there is a 2nd register
5858 5792  // Then: _long if it's a pair of instructions implementing a long
5859 5793  // Then: _fat if it requires the big decoder
5860 5794  //   Or: _mem if it requires the big decoder and a memory unit.
5861 5795  
5862 5796  // Integer ALU reg operation
5863 5797  pipe_class ialu_reg(eRegI dst) %{
5864 5798      single_instruction;
5865 5799      dst    : S4(write);
5866 5800      dst    : S3(read);
5867 5801      DECODE : S0;        // any decoder
5868 5802      ALU    : S3;        // any alu
5869 5803  %}
5870 5804  
5871 5805  // Long ALU reg operation
5872 5806  pipe_class ialu_reg_long(eRegL dst) %{
5873 5807      instruction_count(2);
5874 5808      dst    : S4(write);
5875 5809      dst    : S3(read);
5876 5810      DECODE : S0(2);     // any 2 decoders
5877 5811      ALU    : S3(2);     // both alus
5878 5812  %}
5879 5813  
5880 5814  // Integer ALU reg operation using big decoder
5881 5815  pipe_class ialu_reg_fat(eRegI dst) %{
5882 5816      single_instruction;
5883 5817      dst    : S4(write);
5884 5818      dst    : S3(read);
5885 5819      D0     : S0;        // big decoder only
5886 5820      ALU    : S3;        // any alu
5887 5821  %}
5888 5822  
5889 5823  // Long ALU reg operation using big decoder
5890 5824  pipe_class ialu_reg_long_fat(eRegL dst) %{
5891 5825      instruction_count(2);
5892 5826      dst    : S4(write);
5893 5827      dst    : S3(read);
5894 5828      D0     : S0(2);     // big decoder only; twice
5895 5829      ALU    : S3(2);     // any 2 alus
5896 5830  %}
5897 5831  
5898 5832  // Integer ALU reg-reg operation
5899 5833  pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
5900 5834      single_instruction;
5901 5835      dst    : S4(write);
5902 5836      src    : S3(read);
5903 5837      DECODE : S0;        // any decoder
5904 5838      ALU    : S3;        // any alu
5905 5839  %}
5906 5840  
5907 5841  // Long ALU reg-reg operation
5908 5842  pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
5909 5843      instruction_count(2);
5910 5844      dst    : S4(write);
5911 5845      src    : S3(read);
5912 5846      DECODE : S0(2);     // any 2 decoders
5913 5847      ALU    : S3(2);     // both alus
5914 5848  %}
5915 5849  
5916 5850  // Integer ALU reg-reg operation
5917 5851  pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
5918 5852      single_instruction;
5919 5853      dst    : S4(write);
5920 5854      src    : S3(read);
5921 5855      D0     : S0;        // big decoder only
5922 5856      ALU    : S3;        // any alu
5923 5857  %}
5924 5858  
5925 5859  // Long ALU reg-reg operation
5926 5860  pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
5927 5861      instruction_count(2);
5928 5862      dst    : S4(write);
5929 5863      src    : S3(read);
5930 5864      D0     : S0(2);     // big decoder only; twice
5931 5865      ALU    : S3(2);     // both alus
5932 5866  %}
5933 5867  
5934 5868  // Integer ALU reg-mem operation
5935 5869  pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
5936 5870      single_instruction;
5937 5871      dst    : S5(write);
5938 5872      mem    : S3(read);
5939 5873      D0     : S0;        // big decoder only
5940 5874      ALU    : S4;        // any alu
5941 5875      MEM    : S3;        // any mem
5942 5876  %}
5943 5877  
5944 5878  // Long ALU reg-mem operation
5945 5879  pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
5946 5880      instruction_count(2);
5947 5881      dst    : S5(write);
5948 5882      mem    : S3(read);
5949 5883      D0     : S0(2);     // big decoder only; twice
5950 5884      ALU    : S4(2);     // any 2 alus
5951 5885      MEM    : S3(2);     // both mems
5952 5886  %}
5953 5887  
5954 5888  // Integer mem operation (prefetch)
5955 5889  pipe_class ialu_mem(memory mem)
5956 5890  %{
5957 5891      single_instruction;
5958 5892      mem    : S3(read);
5959 5893      D0     : S0;        // big decoder only
5960 5894      MEM    : S3;        // any mem
5961 5895  %}
5962 5896  
5963 5897  // Integer Store to Memory
5964 5898  pipe_class ialu_mem_reg(memory mem, eRegI src) %{
5965 5899      single_instruction;
5966 5900      mem    : S3(read);
5967 5901      src    : S5(read);
5968 5902      D0     : S0;        // big decoder only
5969 5903      ALU    : S4;        // any alu
5970 5904      MEM    : S3;
5971 5905  %}
5972 5906  
5973 5907  // Long Store to Memory
5974 5908  pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
5975 5909      instruction_count(2);
5976 5910      mem    : S3(read);
5977 5911      src    : S5(read);
5978 5912      D0     : S0(2);     // big decoder only; twice
5979 5913      ALU    : S4(2);     // any 2 alus
5980 5914      MEM    : S3(2);     // Both mems
5981 5915  %}
5982 5916  
5983 5917  // Integer Store to Memory
5984 5918  pipe_class ialu_mem_imm(memory mem) %{
5985 5919      single_instruction;
5986 5920      mem    : S3(read);
5987 5921      D0     : S0;        // big decoder only
5988 5922      ALU    : S4;        // any alu
5989 5923      MEM    : S3;
5990 5924  %}
5991 5925  
5992 5926  // Integer ALU0 reg-reg operation
5993 5927  pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
5994 5928      single_instruction;
5995 5929      dst    : S4(write);
5996 5930      src    : S3(read);
5997 5931      D0     : S0;        // Big decoder only
5998 5932      ALU0   : S3;        // only alu0
5999 5933  %}
6000 5934  
6001 5935  // Integer ALU0 reg-mem operation
6002 5936  pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
6003 5937      single_instruction;
6004 5938      dst    : S5(write);
6005 5939      mem    : S3(read);
6006 5940      D0     : S0;        // big decoder only
6007 5941      ALU0   : S4;        // ALU0 only
6008 5942      MEM    : S3;        // any mem
6009 5943  %}
6010 5944  
6011 5945  // Integer ALU reg-reg operation
6012 5946  pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
6013 5947      single_instruction;
6014 5948      cr     : S4(write);
6015 5949      src1   : S3(read);
6016 5950      src2   : S3(read);
6017 5951      DECODE : S0;        // any decoder
6018 5952      ALU    : S3;        // any alu
6019 5953  %}
6020 5954  
6021 5955  // Integer ALU reg-imm operation
6022 5956  pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
6023 5957      single_instruction;
6024 5958      cr     : S4(write);
6025 5959      src1   : S3(read);
6026 5960      DECODE : S0;        // any decoder
6027 5961      ALU    : S3;        // any alu
6028 5962  %}
6029 5963  
6030 5964  // Integer ALU reg-mem operation
6031 5965  pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
6032 5966      single_instruction;
6033 5967      cr     : S4(write);
6034 5968      src1   : S3(read);
6035 5969      src2   : S3(read);
6036 5970      D0     : S0;        // big decoder only
6037 5971      ALU    : S4;        // any alu
6038 5972      MEM    : S3;
6039 5973  %}
6040 5974  
6041 5975  // Conditional move reg-reg
6042 5976  pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
6043 5977      instruction_count(4);
6044 5978      y      : S4(read);
6045 5979      q      : S3(read);
6046 5980      p      : S3(read);
6047 5981      DECODE : S0(4);     // any decoder
6048 5982  %}
6049 5983  
6050 5984  // Conditional move reg-reg
6051 5985  pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
6052 5986      single_instruction;
6053 5987      dst    : S4(write);
6054 5988      src    : S3(read);
6055 5989      cr     : S3(read);
6056 5990      DECODE : S0;        // any decoder
6057 5991  %}
6058 5992  
6059 5993  // Conditional move reg-mem
6060 5994  pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
6061 5995      single_instruction;
6062 5996      dst    : S4(write);
6063 5997      src    : S3(read);
6064 5998      cr     : S3(read);
6065 5999      DECODE : S0;        // any decoder
6066 6000      MEM    : S3;
6067 6001  %}
6068 6002  
6069 6003  // Conditional move reg-reg long
6070 6004  pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
6071 6005      single_instruction;
6072 6006      dst    : S4(write);
6073 6007      src    : S3(read);
6074 6008      cr     : S3(read);
6075 6009      DECODE : S0(2);     // any 2 decoders
6076 6010  %}
6077 6011  
6078 6012  // Conditional move double reg-reg
6079 6013  pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
6080 6014      single_instruction;
6081 6015      dst    : S4(write);
6082 6016      src    : S3(read);
6083 6017      cr     : S3(read);
6084 6018      DECODE : S0;        // any decoder
6085 6019  %}
6086 6020  
6087 6021  // Float reg-reg operation
6088 6022  pipe_class fpu_reg(regD dst) %{
6089 6023      instruction_count(2);
6090 6024      dst    : S3(read);
6091 6025      DECODE : S0(2);     // any 2 decoders
6092 6026      FPU    : S3;
6093 6027  %}
6094 6028  
6095 6029  // Float reg-reg operation
6096 6030  pipe_class fpu_reg_reg(regD dst, regD src) %{
6097 6031      instruction_count(2);
6098 6032      dst    : S4(write);
6099 6033      src    : S3(read);
6100 6034      DECODE : S0(2);     // any 2 decoders
6101 6035      FPU    : S3;
6102 6036  %}
6103 6037  
6104 6038  // Float reg-reg operation
6105 6039  pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
6106 6040      instruction_count(3);
6107 6041      dst    : S4(write);
6108 6042      src1   : S3(read);
6109 6043      src2   : S3(read);
6110 6044      DECODE : S0(3);     // any 3 decoders
6111 6045      FPU    : S3(2);
6112 6046  %}
6113 6047  
6114 6048  // Float reg-reg operation
6115 6049  pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
6116 6050      instruction_count(4);
6117 6051      dst    : S4(write);
6118 6052      src1   : S3(read);
6119 6053      src2   : S3(read);
6120 6054      src3   : S3(read);
6121 6055      DECODE : S0(4);     // any 3 decoders
6122 6056      FPU    : S3(2);
6123 6057  %}
6124 6058  
6125 6059  // Float reg-reg operation
6126 6060  pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
6127 6061      instruction_count(4);
6128 6062      dst    : S4(write);
6129 6063      src1   : S3(read);
6130 6064      src2   : S3(read);
6131 6065      src3   : S3(read);
6132 6066      DECODE : S1(3);     // any 3 decoders
6133 6067      D0     : S0;        // Big decoder only
6134 6068      FPU    : S3(2);
6135 6069      MEM    : S3;
6136 6070  %}
6137 6071  
6138 6072  // Float reg-mem operation
6139 6073  pipe_class fpu_reg_mem(regD dst, memory mem) %{
6140 6074      instruction_count(2);
6141 6075      dst    : S5(write);
6142 6076      mem    : S3(read);
6143 6077      D0     : S0;        // big decoder only
6144 6078      DECODE : S1;        // any decoder for FPU POP
6145 6079      FPU    : S4;
6146 6080      MEM    : S3;        // any mem
6147 6081  %}
6148 6082  
6149 6083  // Float reg-mem operation
6150 6084  pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
6151 6085      instruction_count(3);
6152 6086      dst    : S5(write);
6153 6087      src1   : S3(read);
6154 6088      mem    : S3(read);
6155 6089      D0     : S0;        // big decoder only
6156 6090      DECODE : S1(2);     // any decoder for FPU POP
6157 6091      FPU    : S4;
6158 6092      MEM    : S3;        // any mem
6159 6093  %}
6160 6094  
6161 6095  // Float mem-reg operation
6162 6096  pipe_class fpu_mem_reg(memory mem, regD src) %{
6163 6097      instruction_count(2);
6164 6098      src    : S5(read);
6165 6099      mem    : S3(read);
6166 6100      DECODE : S0;        // any decoder for FPU PUSH
6167 6101      D0     : S1;        // big decoder only
6168 6102      FPU    : S4;
6169 6103      MEM    : S3;        // any mem
6170 6104  %}
6171 6105  
6172 6106  pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
6173 6107      instruction_count(3);
6174 6108      src1   : S3(read);
6175 6109      src2   : S3(read);
6176 6110      mem    : S3(read);
6177 6111      DECODE : S0(2);     // any decoder for FPU PUSH
6178 6112      D0     : S1;        // big decoder only
6179 6113      FPU    : S4;
6180 6114      MEM    : S3;        // any mem
6181 6115  %}
6182 6116  
6183 6117  pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
6184 6118      instruction_count(3);
6185 6119      src1   : S3(read);
6186 6120      src2   : S3(read);
6187 6121      mem    : S4(read);
6188 6122      DECODE : S0;        // any decoder for FPU PUSH
6189 6123      D0     : S0(2);     // big decoder only
6190 6124      FPU    : S4;
6191 6125      MEM    : S3(2);     // any mem
6192 6126  %}
6193 6127  
6194 6128  pipe_class fpu_mem_mem(memory dst, memory src1) %{
6195 6129      instruction_count(2);
6196 6130      src1   : S3(read);
6197 6131      dst    : S4(read);
6198 6132      D0     : S0(2);     // big decoder only
6199 6133      MEM    : S3(2);     // any mem
6200 6134  %}
6201 6135  
6202 6136  pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
6203 6137      instruction_count(3);
6204 6138      src1   : S3(read);
6205 6139      src2   : S3(read);
6206 6140      dst    : S4(read);
6207 6141      D0     : S0(3);     // big decoder only
6208 6142      FPU    : S4;
6209 6143      MEM    : S3(3);     // any mem
6210 6144  %}
6211 6145  
6212 6146  pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
6213 6147      instruction_count(3);
6214 6148      src1   : S4(read);
6215 6149      mem    : S4(read);
6216 6150      DECODE : S0;        // any decoder for FPU PUSH
6217 6151      D0     : S0(2);     // big decoder only
6218 6152      FPU    : S4;
6219 6153      MEM    : S3(2);     // any mem
6220 6154  %}
6221 6155  
6222 6156  // Float load constant
6223 6157  pipe_class fpu_reg_con(regD dst) %{
6224 6158      instruction_count(2);
6225 6159      dst    : S5(write);
6226 6160      D0     : S0;        // big decoder only for the load
6227 6161      DECODE : S1;        // any decoder for FPU POP
6228 6162      FPU    : S4;
6229 6163      MEM    : S3;        // any mem
6230 6164  %}
6231 6165  
6232 6166  // Float load constant
6233 6167  pipe_class fpu_reg_reg_con(regD dst, regD src) %{
6234 6168      instruction_count(3);
6235 6169      dst    : S5(write);
6236 6170      src    : S3(read);
6237 6171      D0     : S0;        // big decoder only for the load
6238 6172      DECODE : S1(2);     // any decoder for FPU POP
6239 6173      FPU    : S4;
6240 6174      MEM    : S3;        // any mem
6241 6175  %}
6242 6176  
6243 6177  // UnConditional branch
6244 6178  pipe_class pipe_jmp( label labl ) %{
6245 6179      single_instruction;
6246 6180      BR   : S3;
6247 6181  %}
6248 6182  
6249 6183  // Conditional branch
6250 6184  pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
6251 6185      single_instruction;
6252 6186      cr    : S1(read);
6253 6187      BR    : S3;
6254 6188  %}
6255 6189  
6256 6190  // Allocation idiom
6257 6191  pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
6258 6192      instruction_count(1); force_serialization;
6259 6193      fixed_latency(6);
6260 6194      heap_ptr : S3(read);
6261 6195      DECODE   : S0(3);
6262 6196      D0       : S2;
6263 6197      MEM      : S3;
6264 6198      ALU      : S3(2);
6265 6199      dst      : S5(write);
6266 6200      BR       : S5;
6267 6201  %}
6268 6202  
6269 6203  // Generic big/slow expanded idiom
6270 6204  pipe_class pipe_slow(  ) %{
6271 6205      instruction_count(10); multiple_bundles; force_serialization;
6272 6206      fixed_latency(100);
6273 6207      D0  : S0(2);
6274 6208      MEM : S3(2);
6275 6209  %}
6276 6210  
6277 6211  // The real do-nothing guy
6278 6212  pipe_class empty( ) %{
6279 6213      instruction_count(0);
6280 6214  %}
6281 6215  
6282 6216  // Define the class for the Nop node
6283 6217  define %{
6284 6218     MachNop = empty;
6285 6219  %}
6286 6220  
6287 6221  %}
6288 6222  
6289 6223  //----------INSTRUCTIONS-------------------------------------------------------
6290 6224  //
6291 6225  // match      -- States which machine-independent subtree may be replaced
6292 6226  //               by this instruction.
6293 6227  // ins_cost   -- The estimated cost of this instruction is used by instruction
6294 6228  //               selection to identify a minimum cost tree of machine
6295 6229  //               instructions that matches a tree of machine-independent
6296 6230  //               instructions.
6297 6231  // format     -- A string providing the disassembly for this instruction.
6298 6232  //               The value of an instruction's operand may be inserted
6299 6233  //               by referring to it with a '$' prefix.
6300 6234  // opcode     -- Three instruction opcodes may be provided.  These are referred
6301 6235  //               to within an encode class as $primary, $secondary, and $tertiary
6302 6236  //               respectively.  The primary opcode is commonly used to
6303 6237  //               indicate the type of machine instruction, while secondary
6304 6238  //               and tertiary are often used for prefix options or addressing
6305 6239  //               modes.
6306 6240  // ins_encode -- A list of encode classes with parameters. The encode class
6307 6241  //               name must have been defined in an 'enc_class' specification
6308 6242  //               in the encode section of the architecture description.
6309 6243  
6310 6244  //----------BSWAP-Instruction--------------------------------------------------
6311 6245  instruct bytes_reverse_int(eRegI dst) %{
6312 6246    match(Set dst (ReverseBytesI dst));
6313 6247  
6314 6248    format %{ "BSWAP  $dst" %}
6315 6249    opcode(0x0F, 0xC8);
6316 6250    ins_encode( OpcP, OpcSReg(dst) );
6317 6251    ins_pipe( ialu_reg );
6318 6252  %}
6319 6253  
6320 6254  instruct bytes_reverse_long(eRegL dst) %{
6321 6255    match(Set dst (ReverseBytesL dst));
6322 6256  
6323 6257    format %{ "BSWAP  $dst.lo\n\t"
6324 6258              "BSWAP  $dst.hi\n\t"
6325 6259              "XCHG   $dst.lo $dst.hi" %}
6326 6260  
6327 6261    ins_cost(125);
6328 6262    ins_encode( bswap_long_bytes(dst) );
6329 6263    ins_pipe( ialu_reg_reg);
6330 6264  %}
6331 6265  
6332 6266  instruct bytes_reverse_unsigned_short(eRegI dst) %{
6333 6267    match(Set dst (ReverseBytesUS dst));
6334 6268  
6335 6269    format %{ "BSWAP  $dst\n\t" 
6336 6270              "SHR    $dst,16\n\t" %}
6337 6271    ins_encode %{
6338 6272      __ bswapl($dst$$Register);
6339 6273      __ shrl($dst$$Register, 16); 
6340 6274    %}
6341 6275    ins_pipe( ialu_reg );
6342 6276  %}
6343 6277  
6344 6278  instruct bytes_reverse_short(eRegI dst) %{
6345 6279    match(Set dst (ReverseBytesS dst));
6346 6280  
6347 6281    format %{ "BSWAP  $dst\n\t" 
6348 6282              "SAR    $dst,16\n\t" %}
6349 6283    ins_encode %{
6350 6284      __ bswapl($dst$$Register);
6351 6285      __ sarl($dst$$Register, 16); 
6352 6286    %}
6353 6287    ins_pipe( ialu_reg );
6354 6288  %}
6355 6289  
6356 6290  
6357 6291  //---------- Zeros Count Instructions ------------------------------------------
6358 6292  
6359 6293  instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6360 6294    predicate(UseCountLeadingZerosInstruction);
6361 6295    match(Set dst (CountLeadingZerosI src));
6362 6296    effect(KILL cr);
6363 6297  
6364 6298    format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
6365 6299    ins_encode %{
6366 6300      __ lzcntl($dst$$Register, $src$$Register);
6367 6301    %}
6368 6302    ins_pipe(ialu_reg);
6369 6303  %}
6370 6304  
6371 6305  instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
6372 6306    predicate(!UseCountLeadingZerosInstruction);
6373 6307    match(Set dst (CountLeadingZerosI src));
6374 6308    effect(KILL cr);
6375 6309  
6376 6310    format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
6377 6311              "JNZ    skip\n\t"
6378 6312              "MOV    $dst, -1\n"
6379 6313        "skip:\n\t"
6380 6314              "NEG    $dst\n\t"
6381 6315              "ADD    $dst, 31" %}
6382 6316    ins_encode %{
6383 6317      Register Rdst = $dst$$Register;
6384 6318      Register Rsrc = $src$$Register;
6385 6319      Label skip;
6386 6320      __ bsrl(Rdst, Rsrc);
6387 6321      __ jccb(Assembler::notZero, skip);
6388 6322      __ movl(Rdst, -1);
6389 6323      __ bind(skip);
6390 6324      __ negl(Rdst);
6391 6325      __ addl(Rdst, BitsPerInt - 1);
6392 6326    %}
6393 6327    ins_pipe(ialu_reg);
6394 6328  %}
6395 6329  
6396 6330  instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6397 6331    predicate(UseCountLeadingZerosInstruction);
6398 6332    match(Set dst (CountLeadingZerosL src));
6399 6333    effect(TEMP dst, KILL cr);
6400 6334  
6401 6335    format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
6402 6336              "JNC    done\n\t"
6403 6337              "LZCNT  $dst, $src.lo\n\t"
6404 6338              "ADD    $dst, 32\n"
6405 6339        "done:" %}
6406 6340    ins_encode %{
6407 6341      Register Rdst = $dst$$Register;
6408 6342      Register Rsrc = $src$$Register;
6409 6343      Label done;
6410 6344      __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
6411 6345      __ jccb(Assembler::carryClear, done);
6412 6346      __ lzcntl(Rdst, Rsrc);
6413 6347      __ addl(Rdst, BitsPerInt);
6414 6348      __ bind(done);
6415 6349    %}
6416 6350    ins_pipe(ialu_reg);
6417 6351  %}
6418 6352  
6419 6353  instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
6420 6354    predicate(!UseCountLeadingZerosInstruction);
6421 6355    match(Set dst (CountLeadingZerosL src));
6422 6356    effect(TEMP dst, KILL cr);
6423 6357  
6424 6358    format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
6425 6359              "JZ     msw_is_zero\n\t"
6426 6360              "ADD    $dst, 32\n\t"
6427 6361              "JMP    not_zero\n"
6428 6362        "msw_is_zero:\n\t"
6429 6363              "BSR    $dst, $src.lo\n\t"
6430 6364              "JNZ    not_zero\n\t"
6431 6365              "MOV    $dst, -1\n"
6432 6366        "not_zero:\n\t"
6433 6367              "NEG    $dst\n\t"
6434 6368              "ADD    $dst, 63\n" %}
6435 6369   ins_encode %{
6436 6370      Register Rdst = $dst$$Register;
6437 6371      Register Rsrc = $src$$Register;
6438 6372      Label msw_is_zero;
6439 6373      Label not_zero;
6440 6374      __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
6441 6375      __ jccb(Assembler::zero, msw_is_zero);
6442 6376      __ addl(Rdst, BitsPerInt);
6443 6377      __ jmpb(not_zero);
6444 6378      __ bind(msw_is_zero);
6445 6379      __ bsrl(Rdst, Rsrc);
6446 6380      __ jccb(Assembler::notZero, not_zero);
6447 6381      __ movl(Rdst, -1);
6448 6382      __ bind(not_zero);
6449 6383      __ negl(Rdst);
6450 6384      __ addl(Rdst, BitsPerLong - 1);
6451 6385    %}
6452 6386    ins_pipe(ialu_reg);
6453 6387  %}
6454 6388  
6455 6389  instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6456 6390    match(Set dst (CountTrailingZerosI src));
6457 6391    effect(KILL cr);
6458 6392  
6459 6393    format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
6460 6394              "JNZ    done\n\t"
6461 6395              "MOV    $dst, 32\n"
6462 6396        "done:" %}
6463 6397    ins_encode %{
6464 6398      Register Rdst = $dst$$Register;
6465 6399      Label done;
6466 6400      __ bsfl(Rdst, $src$$Register);
6467 6401      __ jccb(Assembler::notZero, done);
6468 6402      __ movl(Rdst, BitsPerInt);
6469 6403      __ bind(done);
6470 6404    %}
6471 6405    ins_pipe(ialu_reg);
6472 6406  %}
6473 6407  
6474 6408  instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6475 6409    match(Set dst (CountTrailingZerosL src));
6476 6410    effect(TEMP dst, KILL cr);
6477 6411  
6478 6412    format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
6479 6413              "JNZ    done\n\t"
6480 6414              "BSF    $dst, $src.hi\n\t"
6481 6415              "JNZ    msw_not_zero\n\t"
6482 6416              "MOV    $dst, 32\n"
6483 6417        "msw_not_zero:\n\t"
6484 6418              "ADD    $dst, 32\n"
6485 6419        "done:" %}
6486 6420    ins_encode %{
6487 6421      Register Rdst = $dst$$Register;
6488 6422      Register Rsrc = $src$$Register;
6489 6423      Label msw_not_zero;
6490 6424      Label done;
6491 6425      __ bsfl(Rdst, Rsrc);
6492 6426      __ jccb(Assembler::notZero, done);
6493 6427      __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
6494 6428      __ jccb(Assembler::notZero, msw_not_zero);
6495 6429      __ movl(Rdst, BitsPerInt);
6496 6430      __ bind(msw_not_zero);
6497 6431      __ addl(Rdst, BitsPerInt);
6498 6432      __ bind(done);
6499 6433    %}
6500 6434    ins_pipe(ialu_reg);
6501 6435  %}
6502 6436  
6503 6437  
6504 6438  //---------- Population Count Instructions -------------------------------------
6505 6439  
6506 6440  instruct popCountI(eRegI dst, eRegI src) %{
6507 6441    predicate(UsePopCountInstruction);
6508 6442    match(Set dst (PopCountI src));
6509 6443  
6510 6444    format %{ "POPCNT $dst, $src" %}
6511 6445    ins_encode %{
6512 6446      __ popcntl($dst$$Register, $src$$Register);
6513 6447    %}
6514 6448    ins_pipe(ialu_reg);
6515 6449  %}
6516 6450  
6517 6451  instruct popCountI_mem(eRegI dst, memory mem) %{
6518 6452    predicate(UsePopCountInstruction);
6519 6453    match(Set dst (PopCountI (LoadI mem)));
6520 6454  
6521 6455    format %{ "POPCNT $dst, $mem" %}
6522 6456    ins_encode %{
6523 6457      __ popcntl($dst$$Register, $mem$$Address);
6524 6458    %}
6525 6459    ins_pipe(ialu_reg);
6526 6460  %}
6527 6461  
6528 6462  // Note: Long.bitCount(long) returns an int.
6529 6463  instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
6530 6464    predicate(UsePopCountInstruction);
6531 6465    match(Set dst (PopCountL src));
6532 6466    effect(KILL cr, TEMP tmp, TEMP dst);
6533 6467  
6534 6468    format %{ "POPCNT $dst, $src.lo\n\t"
6535 6469              "POPCNT $tmp, $src.hi\n\t"
6536 6470              "ADD    $dst, $tmp" %}
6537 6471    ins_encode %{
6538 6472      __ popcntl($dst$$Register, $src$$Register);
6539 6473      __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
6540 6474      __ addl($dst$$Register, $tmp$$Register);
6541 6475    %}
6542 6476    ins_pipe(ialu_reg);
6543 6477  %}
6544 6478  
6545 6479  // Note: Long.bitCount(long) returns an int.
6546 6480  instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
6547 6481    predicate(UsePopCountInstruction);
6548 6482    match(Set dst (PopCountL (LoadL mem)));
6549 6483    effect(KILL cr, TEMP tmp, TEMP dst);
6550 6484  
6551 6485    format %{ "POPCNT $dst, $mem\n\t"
6552 6486              "POPCNT $tmp, $mem+4\n\t"
6553 6487              "ADD    $dst, $tmp" %}
6554 6488    ins_encode %{
6555 6489      //__ popcntl($dst$$Register, $mem$$Address$$first);
6556 6490      //__ popcntl($tmp$$Register, $mem$$Address$$second);
6557 6491      __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
6558 6492      __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
6559 6493      __ addl($dst$$Register, $tmp$$Register);
6560 6494    %}
6561 6495    ins_pipe(ialu_reg);
6562 6496  %}
6563 6497  
6564 6498  
6565 6499  //----------Load/Store/Move Instructions---------------------------------------
6566 6500  //----------Load Instructions--------------------------------------------------
6567 6501  // Load Byte (8bit signed)
6568 6502  instruct loadB(xRegI dst, memory mem) %{
6569 6503    match(Set dst (LoadB mem));
6570 6504  
6571 6505    ins_cost(125);
6572 6506    format %{ "MOVSX8 $dst,$mem\t# byte" %}
6573 6507  
6574 6508    ins_encode %{
6575 6509      __ movsbl($dst$$Register, $mem$$Address);
6576 6510    %}
6577 6511  
6578 6512    ins_pipe(ialu_reg_mem);
6579 6513  %}
6580 6514  
6581 6515  // Load Byte (8bit signed) into Long Register
6582 6516  instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6583 6517    match(Set dst (ConvI2L (LoadB mem)));
6584 6518    effect(KILL cr);
6585 6519  
6586 6520    ins_cost(375);
6587 6521    format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
6588 6522              "MOV    $dst.hi,$dst.lo\n\t"
6589 6523              "SAR    $dst.hi,7" %}
6590 6524  
6591 6525    ins_encode %{
6592 6526      __ movsbl($dst$$Register, $mem$$Address);
6593 6527      __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6594 6528      __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
6595 6529    %}
6596 6530  
6597 6531    ins_pipe(ialu_reg_mem);
6598 6532  %}
6599 6533  
6600 6534  // Load Unsigned Byte (8bit UNsigned)
6601 6535  instruct loadUB(xRegI dst, memory mem) %{
6602 6536    match(Set dst (LoadUB mem));
6603 6537  
6604 6538    ins_cost(125);
6605 6539    format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
6606 6540  
6607 6541    ins_encode %{
6608 6542      __ movzbl($dst$$Register, $mem$$Address);
6609 6543    %}
6610 6544  
6611 6545    ins_pipe(ialu_reg_mem);
6612 6546  %}
6613 6547  
6614 6548  // Load Unsigned Byte (8 bit UNsigned) into Long Register
6615 6549  instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6616 6550    match(Set dst (ConvI2L (LoadUB mem)));
6617 6551    effect(KILL cr);
6618 6552  
6619 6553    ins_cost(250);
6620 6554    format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
6621 6555              "XOR    $dst.hi,$dst.hi" %}
6622 6556  
6623 6557    ins_encode %{
6624 6558      Register Rdst = $dst$$Register;
6625 6559      __ movzbl(Rdst, $mem$$Address);
6626 6560      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6627 6561    %}
6628 6562  
6629 6563    ins_pipe(ialu_reg_mem);
6630 6564  %}
6631 6565  
6632 6566  // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
6633 6567  instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
6634 6568    match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6635 6569    effect(KILL cr);
6636 6570  
6637 6571    format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
6638 6572              "XOR    $dst.hi,$dst.hi\n\t"
6639 6573              "AND    $dst.lo,$mask" %}
6640 6574    ins_encode %{
6641 6575      Register Rdst = $dst$$Register;
6642 6576      __ movzbl(Rdst, $mem$$Address);
6643 6577      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6644 6578      __ andl(Rdst, $mask$$constant);
6645 6579    %}
6646 6580    ins_pipe(ialu_reg_mem);
6647 6581  %}
6648 6582  
6649 6583  // Load Short (16bit signed)
6650 6584  instruct loadS(eRegI dst, memory mem) %{
6651 6585    match(Set dst (LoadS mem));
6652 6586  
6653 6587    ins_cost(125);
6654 6588    format %{ "MOVSX  $dst,$mem\t# short" %}
6655 6589  
6656 6590    ins_encode %{
6657 6591      __ movswl($dst$$Register, $mem$$Address);
6658 6592    %}
6659 6593  
6660 6594    ins_pipe(ialu_reg_mem);
6661 6595  %}
6662 6596  
6663 6597  // Load Short (16 bit signed) to Byte (8 bit signed)
6664 6598  instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6665 6599    match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6666 6600  
6667 6601    ins_cost(125);
6668 6602    format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
6669 6603    ins_encode %{
6670 6604      __ movsbl($dst$$Register, $mem$$Address);
6671 6605    %}
6672 6606    ins_pipe(ialu_reg_mem);
6673 6607  %}
6674 6608  
6675 6609  // Load Short (16bit signed) into Long Register
6676 6610  instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6677 6611    match(Set dst (ConvI2L (LoadS mem)));
6678 6612    effect(KILL cr);
6679 6613  
6680 6614    ins_cost(375);
6681 6615    format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
6682 6616              "MOV    $dst.hi,$dst.lo\n\t"
6683 6617              "SAR    $dst.hi,15" %}
6684 6618  
6685 6619    ins_encode %{
6686 6620      __ movswl($dst$$Register, $mem$$Address);
6687 6621      __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6688 6622      __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
6689 6623    %}
6690 6624  
6691 6625    ins_pipe(ialu_reg_mem);
6692 6626  %}
6693 6627  
6694 6628  // Load Unsigned Short/Char (16bit unsigned)
6695 6629  instruct loadUS(eRegI dst, memory mem) %{
6696 6630    match(Set dst (LoadUS mem));
6697 6631  
6698 6632    ins_cost(125);
6699 6633    format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
6700 6634  
6701 6635    ins_encode %{
6702 6636      __ movzwl($dst$$Register, $mem$$Address);
6703 6637    %}
6704 6638  
6705 6639    ins_pipe(ialu_reg_mem);
6706 6640  %}
6707 6641  
6708 6642  // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6709 6643  instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6710 6644    match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6711 6645  
6712 6646    ins_cost(125);
6713 6647    format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
6714 6648    ins_encode %{
6715 6649      __ movsbl($dst$$Register, $mem$$Address);
6716 6650    %}
6717 6651    ins_pipe(ialu_reg_mem);
6718 6652  %}
6719 6653  
6720 6654  // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6721 6655  instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6722 6656    match(Set dst (ConvI2L (LoadUS mem)));
6723 6657    effect(KILL cr);
6724 6658  
6725 6659    ins_cost(250);
6726 6660    format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
6727 6661              "XOR    $dst.hi,$dst.hi" %}
6728 6662  
6729 6663    ins_encode %{
6730 6664      __ movzwl($dst$$Register, $mem$$Address);
6731 6665      __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6732 6666    %}
6733 6667  
6734 6668    ins_pipe(ialu_reg_mem);
6735 6669  %}
6736 6670  
6737 6671  // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6738 6672  instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6739 6673    match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6740 6674    effect(KILL cr);
6741 6675  
6742 6676    format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
6743 6677              "XOR    $dst.hi,$dst.hi" %}
6744 6678    ins_encode %{
6745 6679      Register Rdst = $dst$$Register;
6746 6680      __ movzbl(Rdst, $mem$$Address);
6747 6681      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6748 6682    %}
6749 6683    ins_pipe(ialu_reg_mem);
6750 6684  %}
6751 6685  
6752 6686  // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
6753 6687  instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
6754 6688    match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6755 6689    effect(KILL cr);
6756 6690  
6757 6691    format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6758 6692              "XOR    $dst.hi,$dst.hi\n\t"
6759 6693              "AND    $dst.lo,$mask" %}
6760 6694    ins_encode %{
6761 6695      Register Rdst = $dst$$Register;
6762 6696      __ movzwl(Rdst, $mem$$Address);
6763 6697      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6764 6698      __ andl(Rdst, $mask$$constant);
6765 6699    %}
6766 6700    ins_pipe(ialu_reg_mem);
6767 6701  %}
6768 6702  
6769 6703  // Load Integer
6770 6704  instruct loadI(eRegI dst, memory mem) %{
6771 6705    match(Set dst (LoadI mem));
6772 6706  
6773 6707    ins_cost(125);
6774 6708    format %{ "MOV    $dst,$mem\t# int" %}
6775 6709  
6776 6710    ins_encode %{
6777 6711      __ movl($dst$$Register, $mem$$Address);
6778 6712    %}
6779 6713  
6780 6714    ins_pipe(ialu_reg_mem);
6781 6715  %}
6782 6716  
6783 6717  // Load Integer (32 bit signed) to Byte (8 bit signed)
6784 6718  instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6785 6719    match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6786 6720  
6787 6721    ins_cost(125);
6788 6722    format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
6789 6723    ins_encode %{
6790 6724      __ movsbl($dst$$Register, $mem$$Address);
6791 6725    %}
6792 6726    ins_pipe(ialu_reg_mem);
6793 6727  %}
6794 6728  
6795 6729  // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6796 6730  instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
6797 6731    match(Set dst (AndI (LoadI mem) mask));
6798 6732  
6799 6733    ins_cost(125);
6800 6734    format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
6801 6735    ins_encode %{
6802 6736      __ movzbl($dst$$Register, $mem$$Address);
6803 6737    %}
6804 6738    ins_pipe(ialu_reg_mem);
6805 6739  %}
6806 6740  
6807 6741  // Load Integer (32 bit signed) to Short (16 bit signed)
6808 6742  instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
6809 6743    match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6810 6744  
6811 6745    ins_cost(125);
6812 6746    format %{ "MOVSX  $dst, $mem\t# int -> short" %}
6813 6747    ins_encode %{
6814 6748      __ movswl($dst$$Register, $mem$$Address);
6815 6749    %}
6816 6750    ins_pipe(ialu_reg_mem);
6817 6751  %}
6818 6752  
6819 6753  // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6820 6754  instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
6821 6755    match(Set dst (AndI (LoadI mem) mask));
6822 6756  
6823 6757    ins_cost(125);
6824 6758    format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
6825 6759    ins_encode %{
6826 6760      __ movzwl($dst$$Register, $mem$$Address);
6827 6761    %}
6828 6762    ins_pipe(ialu_reg_mem);
6829 6763  %}
6830 6764  
6831 6765  // Load Integer into Long Register
6832 6766  instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6833 6767    match(Set dst (ConvI2L (LoadI mem)));
6834 6768    effect(KILL cr);
6835 6769  
6836 6770    ins_cost(375);
6837 6771    format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
6838 6772              "MOV    $dst.hi,$dst.lo\n\t"
6839 6773              "SAR    $dst.hi,31" %}
6840 6774  
6841 6775    ins_encode %{
6842 6776      __ movl($dst$$Register, $mem$$Address);
6843 6777      __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6844 6778      __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
6845 6779    %}
6846 6780  
6847 6781    ins_pipe(ialu_reg_mem);
6848 6782  %}
6849 6783  
6850 6784  // Load Integer with mask 0xFF into Long Register
6851 6785  instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6852 6786    match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6853 6787    effect(KILL cr);
6854 6788  
6855 6789    format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
6856 6790              "XOR    $dst.hi,$dst.hi" %}
6857 6791    ins_encode %{
6858 6792      Register Rdst = $dst$$Register;
6859 6793      __ movzbl(Rdst, $mem$$Address);
6860 6794      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6861 6795    %}
6862 6796    ins_pipe(ialu_reg_mem);
6863 6797  %}
6864 6798  
6865 6799  // Load Integer with mask 0xFFFF into Long Register
6866 6800  instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
6867 6801    match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6868 6802    effect(KILL cr);
6869 6803  
6870 6804    format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
6871 6805              "XOR    $dst.hi,$dst.hi" %}
6872 6806    ins_encode %{
6873 6807      Register Rdst = $dst$$Register;
6874 6808      __ movzwl(Rdst, $mem$$Address);
6875 6809      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6876 6810    %}
6877 6811    ins_pipe(ialu_reg_mem);
6878 6812  %}
6879 6813  
6880 6814  // Load Integer with 32-bit mask into Long Register
6881 6815  instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
6882 6816    match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6883 6817    effect(KILL cr);
6884 6818  
6885 6819    format %{ "MOV    $dst.lo,$mem\t# int & 32-bit mask -> long\n\t"
6886 6820              "XOR    $dst.hi,$dst.hi\n\t"
6887 6821              "AND    $dst.lo,$mask" %}
6888 6822    ins_encode %{
6889 6823      Register Rdst = $dst$$Register;
6890 6824      __ movl(Rdst, $mem$$Address);
6891 6825      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6892 6826      __ andl(Rdst, $mask$$constant);
6893 6827    %}
6894 6828    ins_pipe(ialu_reg_mem);
6895 6829  %}
6896 6830  
6897 6831  // Load Unsigned Integer into Long Register
6898 6832  instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6899 6833    match(Set dst (LoadUI2L mem));
6900 6834    effect(KILL cr);
6901 6835  
6902 6836    ins_cost(250);
6903 6837    format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
6904 6838              "XOR    $dst.hi,$dst.hi" %}
6905 6839  
6906 6840    ins_encode %{
6907 6841      __ movl($dst$$Register, $mem$$Address);
6908 6842      __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6909 6843    %}
6910 6844  
6911 6845    ins_pipe(ialu_reg_mem);
6912 6846  %}
6913 6847  
6914 6848  // Load Long.  Cannot clobber address while loading, so restrict address
6915 6849  // register to ESI
6916 6850  instruct loadL(eRegL dst, load_long_memory mem) %{
6917 6851    predicate(!((LoadLNode*)n)->require_atomic_access());
6918 6852    match(Set dst (LoadL mem));
6919 6853  
6920 6854    ins_cost(250);
6921 6855    format %{ "MOV    $dst.lo,$mem\t# long\n\t"
6922 6856              "MOV    $dst.hi,$mem+4" %}
6923 6857  
6924 6858    ins_encode %{
6925 6859      Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
6926 6860      Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
6927 6861      __ movl($dst$$Register, Amemlo);
6928 6862      __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6929 6863    %}
6930 6864  
6931 6865    ins_pipe(ialu_reg_long_mem);
6932 6866  %}
6933 6867  
6934 6868  // Volatile Load Long.  Must be atomic, so do 64-bit FILD
6935 6869  // then store it down to the stack and reload on the int
6936 6870  // side.
6937 6871  instruct loadL_volatile(stackSlotL dst, memory mem) %{
6938 6872    predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6939 6873    match(Set dst (LoadL mem));
6940 6874  
6941 6875    ins_cost(200);
6942 6876    format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
6943 6877              "FISTp  $dst" %}
6944 6878    ins_encode(enc_loadL_volatile(mem,dst));
6945 6879    ins_pipe( fpu_reg_mem );
6946 6880  %}
6947 6881  
6948 6882  instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6949 6883    predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6950 6884    match(Set dst (LoadL mem));
6951 6885    effect(TEMP tmp);
6952 6886    ins_cost(180);
6953 6887    format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6954 6888              "MOVSD  $dst,$tmp" %}
6955 6889    ins_encode(enc_loadLX_volatile(mem, dst, tmp));
6956 6890    ins_pipe( pipe_slow );
6957 6891  %}
6958 6892  
6959 6893  instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6960 6894    predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6961 6895    match(Set dst (LoadL mem));
6962 6896    effect(TEMP tmp);
6963 6897    ins_cost(160);
6964 6898    format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6965 6899              "MOVD   $dst.lo,$tmp\n\t"
6966 6900              "PSRLQ  $tmp,32\n\t"
6967 6901              "MOVD   $dst.hi,$tmp" %}
6968 6902    ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
6969 6903    ins_pipe( pipe_slow );
6970 6904  %}
6971 6905  
6972 6906  // Load Range
6973 6907  instruct loadRange(eRegI dst, memory mem) %{
6974 6908    match(Set dst (LoadRange mem));
6975 6909  
6976 6910    ins_cost(125);
6977 6911    format %{ "MOV    $dst,$mem" %}
6978 6912    opcode(0x8B);
6979 6913    ins_encode( OpcP, RegMem(dst,mem));
6980 6914    ins_pipe( ialu_reg_mem );
6981 6915  %}
6982 6916  
6983 6917  
6984 6918  // Load Pointer
6985 6919  instruct loadP(eRegP dst, memory mem) %{
6986 6920    match(Set dst (LoadP mem));
6987 6921  
6988 6922    ins_cost(125);
6989 6923    format %{ "MOV    $dst,$mem" %}
6990 6924    opcode(0x8B);
6991 6925    ins_encode( OpcP, RegMem(dst,mem));
6992 6926    ins_pipe( ialu_reg_mem );
6993 6927  %}
6994 6928  
6995 6929  // Load Klass Pointer
6996 6930  instruct loadKlass(eRegP dst, memory mem) %{
6997 6931    match(Set dst (LoadKlass mem));
6998 6932  
6999 6933    ins_cost(125);
7000 6934    format %{ "MOV    $dst,$mem" %}
7001 6935    opcode(0x8B);
7002 6936    ins_encode( OpcP, RegMem(dst,mem));
7003 6937    ins_pipe( ialu_reg_mem );
7004 6938  %}
7005 6939  
7006 6940  // Load Double
7007 6941  instruct loadD(regD dst, memory mem) %{
7008 6942    predicate(UseSSE<=1);
7009 6943    match(Set dst (LoadD mem));
7010 6944  
7011 6945    ins_cost(150);
7012 6946    format %{ "FLD_D  ST,$mem\n\t"
7013 6947              "FSTP   $dst" %}
7014 6948    opcode(0xDD);               /* DD /0 */
7015 6949    ins_encode( OpcP, RMopc_Mem(0x00,mem),
7016 6950                Pop_Reg_D(dst) );
7017 6951    ins_pipe( fpu_reg_mem );
7018 6952  %}
7019 6953  
7020 6954  // Load Double to XMM
7021 6955  instruct loadXD(regXD dst, memory mem) %{
7022 6956    predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
7023 6957    match(Set dst (LoadD mem));
7024 6958    ins_cost(145);
7025 6959    format %{ "MOVSD  $dst,$mem" %}
7026 6960    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
7027 6961    ins_pipe( pipe_slow );
7028 6962  %}
7029 6963  
7030 6964  instruct loadXD_partial(regXD dst, memory mem) %{
7031 6965    predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
7032 6966    match(Set dst (LoadD mem));
7033 6967    ins_cost(145);
7034 6968    format %{ "MOVLPD $dst,$mem" %}
7035 6969    ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
7036 6970    ins_pipe( pipe_slow );
7037 6971  %}
7038 6972  
7039 6973  // Load to XMM register (single-precision floating point)
7040 6974  // MOVSS instruction
7041 6975  instruct loadX(regX dst, memory mem) %{
7042 6976    predicate(UseSSE>=1);
7043 6977    match(Set dst (LoadF mem));
7044 6978    ins_cost(145);
7045 6979    format %{ "MOVSS  $dst,$mem" %}
7046 6980    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
7047 6981    ins_pipe( pipe_slow );
7048 6982  %}
7049 6983  
7050 6984  // Load Float
7051 6985  instruct loadF(regF dst, memory mem) %{
7052 6986    predicate(UseSSE==0);
7053 6987    match(Set dst (LoadF mem));
7054 6988  
7055 6989    ins_cost(150);
7056 6990    format %{ "FLD_S  ST,$mem\n\t"
7057 6991              "FSTP   $dst" %}
7058 6992    opcode(0xD9);               /* D9 /0 */
7059 6993    ins_encode( OpcP, RMopc_Mem(0x00,mem),
7060 6994                Pop_Reg_F(dst) );
7061 6995    ins_pipe( fpu_reg_mem );
7062 6996  %}
7063 6997  
7064 6998  // Load Aligned Packed Byte to XMM register
7065 6999  instruct loadA8B(regXD dst, memory mem) %{
7066 7000    predicate(UseSSE>=1);
7067 7001    match(Set dst (Load8B mem));
7068 7002    ins_cost(125);
7069 7003    format %{ "MOVQ  $dst,$mem\t! packed8B" %}
7070 7004    ins_encode( movq_ld(dst, mem));
7071 7005    ins_pipe( pipe_slow );
7072 7006  %}
7073 7007  
7074 7008  // Load Aligned Packed Short to XMM register
7075 7009  instruct loadA4S(regXD dst, memory mem) %{
7076 7010    predicate(UseSSE>=1);
7077 7011    match(Set dst (Load4S mem));
7078 7012    ins_cost(125);
7079 7013    format %{ "MOVQ  $dst,$mem\t! packed4S" %}
7080 7014    ins_encode( movq_ld(dst, mem));
7081 7015    ins_pipe( pipe_slow );
7082 7016  %}
7083 7017  
7084 7018  // Load Aligned Packed Char to XMM register
7085 7019  instruct loadA4C(regXD dst, memory mem) %{
7086 7020    predicate(UseSSE>=1);
7087 7021    match(Set dst (Load4C mem));
7088 7022    ins_cost(125);
7089 7023    format %{ "MOVQ  $dst,$mem\t! packed4C" %}
7090 7024    ins_encode( movq_ld(dst, mem));
7091 7025    ins_pipe( pipe_slow );
7092 7026  %}
7093 7027  
7094 7028  // Load Aligned Packed Integer to XMM register
7095 7029  instruct load2IU(regXD dst, memory mem) %{
7096 7030    predicate(UseSSE>=1);
7097 7031    match(Set dst (Load2I mem));
7098 7032    ins_cost(125);
7099 7033    format %{ "MOVQ  $dst,$mem\t! packed2I" %}
7100 7034    ins_encode( movq_ld(dst, mem));
7101 7035    ins_pipe( pipe_slow );
7102 7036  %}
7103 7037  
7104 7038  // Load Aligned Packed Single to XMM
7105 7039  instruct loadA2F(regXD dst, memory mem) %{
7106 7040    predicate(UseSSE>=1);
7107 7041    match(Set dst (Load2F mem));
7108 7042    ins_cost(145);
7109 7043    format %{ "MOVQ  $dst,$mem\t! packed2F" %}
7110 7044    ins_encode( movq_ld(dst, mem));
7111 7045    ins_pipe( pipe_slow );
7112 7046  %}
7113 7047  
7114 7048  // Load Effective Address
7115 7049  instruct leaP8(eRegP dst, indOffset8 mem) %{
7116 7050    match(Set dst mem);
7117 7051  
7118 7052    ins_cost(110);
7119 7053    format %{ "LEA    $dst,$mem" %}
7120 7054    opcode(0x8D);
7121 7055    ins_encode( OpcP, RegMem(dst,mem));
7122 7056    ins_pipe( ialu_reg_reg_fat );
7123 7057  %}
7124 7058  
7125 7059  instruct leaP32(eRegP dst, indOffset32 mem) %{
7126 7060    match(Set dst mem);
7127 7061  
7128 7062    ins_cost(110);
7129 7063    format %{ "LEA    $dst,$mem" %}
7130 7064    opcode(0x8D);
7131 7065    ins_encode( OpcP, RegMem(dst,mem));
7132 7066    ins_pipe( ialu_reg_reg_fat );
7133 7067  %}
7134 7068  
7135 7069  instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
7136 7070    match(Set dst mem);
7137 7071  
7138 7072    ins_cost(110);
7139 7073    format %{ "LEA    $dst,$mem" %}
7140 7074    opcode(0x8D);
7141 7075    ins_encode( OpcP, RegMem(dst,mem));
7142 7076    ins_pipe( ialu_reg_reg_fat );
7143 7077  %}
7144 7078  
7145 7079  instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
7146 7080    match(Set dst mem);
7147 7081  
7148 7082    ins_cost(110);
7149 7083    format %{ "LEA    $dst,$mem" %}
7150 7084    opcode(0x8D);
7151 7085    ins_encode( OpcP, RegMem(dst,mem));
7152 7086    ins_pipe( ialu_reg_reg_fat );
7153 7087  %}
7154 7088  
7155 7089  instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
7156 7090    match(Set dst mem);
7157 7091  
7158 7092    ins_cost(110);
7159 7093    format %{ "LEA    $dst,$mem" %}
7160 7094    opcode(0x8D);
7161 7095    ins_encode( OpcP, RegMem(dst,mem));
7162 7096    ins_pipe( ialu_reg_reg_fat );
7163 7097  %}
7164 7098  
7165 7099  // Load Constant
7166 7100  instruct loadConI(eRegI dst, immI src) %{
7167 7101    match(Set dst src);
7168 7102  
7169 7103    format %{ "MOV    $dst,$src" %}
7170 7104    ins_encode( LdImmI(dst, src) );
7171 7105    ins_pipe( ialu_reg_fat );
7172 7106  %}
7173 7107  
7174 7108  // Load Constant zero
7175 7109  instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
7176 7110    match(Set dst src);
7177 7111    effect(KILL cr);
7178 7112  
7179 7113    ins_cost(50);
7180 7114    format %{ "XOR    $dst,$dst" %}
7181 7115    opcode(0x33);  /* + rd */
7182 7116    ins_encode( OpcP, RegReg( dst, dst ) );
7183 7117    ins_pipe( ialu_reg );
7184 7118  %}
7185 7119  
7186 7120  instruct loadConP(eRegP dst, immP src) %{
7187 7121    match(Set dst src);
7188 7122  
7189 7123    format %{ "MOV    $dst,$src" %}
7190 7124    opcode(0xB8);  /* + rd */
7191 7125    ins_encode( LdImmP(dst, src) );
7192 7126    ins_pipe( ialu_reg_fat );
7193 7127  %}
7194 7128  
7195 7129  instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
7196 7130    match(Set dst src);
7197 7131    effect(KILL cr);
7198 7132    ins_cost(200);
7199 7133    format %{ "MOV    $dst.lo,$src.lo\n\t"
7200 7134              "MOV    $dst.hi,$src.hi" %}
7201 7135    opcode(0xB8);
7202 7136    ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
7203 7137    ins_pipe( ialu_reg_long_fat );
7204 7138  %}
7205 7139  
7206 7140  instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
7207 7141    match(Set dst src);

↓ open down ↓

2350 lines elided

↑ open up ↑

7208 7142    effect(KILL cr);
7209 7143    ins_cost(150);
7210 7144    format %{ "XOR    $dst.lo,$dst.lo\n\t"
7211 7145              "XOR    $dst.hi,$dst.hi" %}
7212 7146    opcode(0x33,0x33);
7213 7147    ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
7214 7148    ins_pipe( ialu_reg_long );
7215 7149  %}
7216 7150  
7217 7151  // The instruction usage is guarded by predicate in operand immF().
7218      -instruct loadConF(regF dst, immF src) %{
7219      -  match(Set dst src);
     7152 +instruct loadConF(regF dst, immF con) %{
     7153 +  match(Set dst con);
7220 7154    ins_cost(125);
     7155 +  format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
     7156 +            "FSTP   $dst" %}
     7157 +  ins_encode %{
     7158 +    __ fld_s($constantaddress($con));
     7159 +    __ fstp_d($dst$$reg);
     7160 +  %}
     7161 +  ins_pipe(fpu_reg_con);
     7162 +%}
7221 7163  
7222      -  format %{ "FLD_S  ST,$src\n\t"
     7164 +// The instruction usage is guarded by predicate in operand immF0().
     7165 +instruct loadConF0(regF dst, immF0 con) %{
     7166 +  match(Set dst con);
     7167 +  ins_cost(125);
     7168 +  format %{ "FLDZ   ST\n\t"
7223 7169              "FSTP   $dst" %}
7224      -  opcode(0xD9, 0x00);       /* D9 /0 */
7225      -  ins_encode(LdImmF(src), Pop_Reg_F(dst) );
7226      -  ins_pipe( fpu_reg_con );
     7170 +  ins_encode %{
     7171 +    __ fldz();
     7172 +    __ fstp_d($dst$$reg);
     7173 +  %}
     7174 +  ins_pipe(fpu_reg_con);
     7175 +%}
     7176 +
     7177 +// The instruction usage is guarded by predicate in operand immF1().
     7178 +instruct loadConF1(regF dst, immF1 con) %{
     7179 +  match(Set dst con);
     7180 +  ins_cost(125);
     7181 +  format %{ "FLD1   ST\n\t"
     7182 +            "FSTP   $dst" %}
     7183 +  ins_encode %{
     7184 +    __ fld1();
     7185 +    __ fstp_d($dst$$reg);
     7186 +  %}
     7187 +  ins_pipe(fpu_reg_con);
7227 7188  %}
7228 7189  
7229 7190  // The instruction usage is guarded by predicate in operand immXF().
7230 7191  instruct loadConX(regX dst, immXF con) %{
7231 7192    match(Set dst con);
7232 7193    ins_cost(125);
7233      -  format %{ "MOVSS  $dst,[$con]" %}
7234      -  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), LdImmX(dst, con));
7235      -  ins_pipe( pipe_slow );
     7194 +  format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
     7195 +  ins_encode %{
     7196 +    __ movflt($dst$$XMMRegister, $constantaddress($con));
     7197 +  %}
     7198 +  ins_pipe(pipe_slow);
7236 7199  %}
7237 7200  
7238 7201  // The instruction usage is guarded by predicate in operand immXF0().
7239 7202  instruct loadConX0(regX dst, immXF0 src) %{
7240 7203    match(Set dst src);
7241 7204    ins_cost(100);
7242 7205    format %{ "XORPS  $dst,$dst\t# float 0.0" %}
7243      -  ins_encode( Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7244      -  ins_pipe( pipe_slow );
     7206 +  ins_encode %{
     7207 +    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
     7208 +  %}
     7209 +  ins_pipe(pipe_slow);
7245 7210  %}
7246 7211  
7247 7212  // The instruction usage is guarded by predicate in operand immD().
7248      -instruct loadConD(regD dst, immD src) %{
7249      -  match(Set dst src);
     7213 +instruct loadConD(regD dst, immD con) %{
     7214 +  match(Set dst con);
     7215 +  ins_cost(125);
     7216 +
     7217 +  format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
     7218 +            "FSTP   $dst" %}
     7219 +  ins_encode %{
     7220 +    __ fld_d($constantaddress($con));
     7221 +    __ fstp_d($dst$$reg);
     7222 +  %}
     7223 +  ins_pipe(fpu_reg_con);
     7224 +%}
     7225 +
     7226 +// The instruction usage is guarded by predicate in operand immD0().
     7227 +instruct loadConD0(regD dst, immD0 con) %{
     7228 +  match(Set dst con);
     7229 +  ins_cost(125);
     7230 +
     7231 +  format %{ "FLDZ   ST\n\t"
     7232 +            "FSTP   $dst" %}
     7233 +  ins_encode %{
     7234 +    __ fldz();
     7235 +    __ fstp_d($dst$$reg);
     7236 +  %}
     7237 +  ins_pipe(fpu_reg_con);
     7238 +%}
     7239 +
     7240 +// The instruction usage is guarded by predicate in operand immD1().
     7241 +instruct loadConD1(regD dst, immD1 con) %{
     7242 +  match(Set dst con);
7250 7243    ins_cost(125);
7251 7244  
7252      -  format %{ "FLD_D  ST,$src\n\t"
     7245 +  format %{ "FLD1   ST\n\t"
7253 7246              "FSTP   $dst" %}
7254      -  ins_encode(LdImmD(src), Pop_Reg_D(dst) );
7255      -  ins_pipe( fpu_reg_con );
     7247 +  ins_encode %{
     7248 +    __ fld1();
     7249 +    __ fstp_d($dst$$reg);
     7250 +  %}
     7251 +  ins_pipe(fpu_reg_con);
7256 7252  %}
7257 7253  
7258 7254  // The instruction usage is guarded by predicate in operand immXD().
7259 7255  instruct loadConXD(regXD dst, immXD con) %{
7260 7256    match(Set dst con);
7261 7257    ins_cost(125);
7262      -  format %{ "MOVSD  $dst,[$con]" %}
7263      -  ins_encode(load_conXD(dst, con));
7264      -  ins_pipe( pipe_slow );
     7258 +  format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
     7259 +  ins_encode %{
     7260 +    __ movdbl($dst$$XMMRegister, $constantaddress($con));
     7261 +  %}
     7262 +  ins_pipe(pipe_slow);
7265 7263  %}
7266 7264  
7267 7265  // The instruction usage is guarded by predicate in operand immXD0().
7268 7266  instruct loadConXD0(regXD dst, immXD0 src) %{
7269 7267    match(Set dst src);
7270 7268    ins_cost(100);
7271 7269    format %{ "XORPD  $dst,$dst\t# double 0.0" %}
7272 7270    ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7273 7271    ins_pipe( pipe_slow );
7274 7272  %}

7275 7273  
7276 7274  // Load Stack Slot
7277 7275  instruct loadSSI(eRegI dst, stackSlotI src) %{
7278 7276    match(Set dst src);
7279 7277    ins_cost(125);
7280 7278  
7281 7279    format %{ "MOV    $dst,$src" %}
7282 7280    opcode(0x8B);
7283 7281    ins_encode( OpcP, RegMem(dst,src));
7284 7282    ins_pipe( ialu_reg_mem );
7285 7283  %}
7286 7284  
7287 7285  instruct loadSSL(eRegL dst, stackSlotL src) %{
7288 7286    match(Set dst src);
7289 7287  
7290 7288    ins_cost(200);
7291 7289    format %{ "MOV    $dst,$src.lo\n\t"
7292 7290              "MOV    $dst+4,$src.hi" %}
7293 7291    opcode(0x8B, 0x8B);
7294 7292    ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
7295 7293    ins_pipe( ialu_mem_long_reg );
7296 7294  %}
7297 7295  
7298 7296  // Load Stack Slot
7299 7297  instruct loadSSP(eRegP dst, stackSlotP src) %{
7300 7298    match(Set dst src);
7301 7299    ins_cost(125);
7302 7300  
7303 7301    format %{ "MOV    $dst,$src" %}
7304 7302    opcode(0x8B);
7305 7303    ins_encode( OpcP, RegMem(dst,src));
7306 7304    ins_pipe( ialu_reg_mem );
7307 7305  %}
7308 7306  
7309 7307  // Load Stack Slot
7310 7308  instruct loadSSF(regF dst, stackSlotF src) %{
7311 7309    match(Set dst src);
7312 7310    ins_cost(125);
7313 7311  
7314 7312    format %{ "FLD_S  $src\n\t"
7315 7313              "FSTP   $dst" %}
7316 7314    opcode(0xD9);               /* D9 /0, FLD m32real */
7317 7315    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7318 7316                Pop_Reg_F(dst) );
7319 7317    ins_pipe( fpu_reg_mem );
7320 7318  %}
7321 7319  
7322 7320  // Load Stack Slot
7323 7321  instruct loadSSD(regD dst, stackSlotD src) %{
7324 7322    match(Set dst src);
7325 7323    ins_cost(125);
7326 7324  
7327 7325    format %{ "FLD_D  $src\n\t"
7328 7326              "FSTP   $dst" %}
7329 7327    opcode(0xDD);               /* DD /0, FLD m64real */
7330 7328    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7331 7329                Pop_Reg_D(dst) );
7332 7330    ins_pipe( fpu_reg_mem );
7333 7331  %}
7334 7332  
7335 7333  // Prefetch instructions.
7336 7334  // Must be safe to execute with invalid address (cannot fault).
7337 7335  
7338 7336  instruct prefetchr0( memory mem ) %{
7339 7337    predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7340 7338    match(PrefetchRead mem);
7341 7339    ins_cost(0);
7342 7340    size(0);
7343 7341    format %{ "PREFETCHR (non-SSE is empty encoding)" %}
7344 7342    ins_encode();
7345 7343    ins_pipe(empty);
7346 7344  %}
7347 7345  
7348 7346  instruct prefetchr( memory mem ) %{
7349 7347    predicate(UseSSE==0 && VM_Version::supports_3dnow() || ReadPrefetchInstr==3);
7350 7348    match(PrefetchRead mem);
7351 7349    ins_cost(100);
7352 7350  
7353 7351    format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
7354 7352    opcode(0x0F, 0x0d);     /* Opcode 0F 0d /0 */
7355 7353    ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7356 7354    ins_pipe(ialu_mem);
7357 7355  %}
7358 7356  
7359 7357  instruct prefetchrNTA( memory mem ) %{
7360 7358    predicate(UseSSE>=1 && ReadPrefetchInstr==0);
7361 7359    match(PrefetchRead mem);
7362 7360    ins_cost(100);
7363 7361  
7364 7362    format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
7365 7363    opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7366 7364    ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7367 7365    ins_pipe(ialu_mem);
7368 7366  %}
7369 7367  
7370 7368  instruct prefetchrT0( memory mem ) %{
7371 7369    predicate(UseSSE>=1 && ReadPrefetchInstr==1);
7372 7370    match(PrefetchRead mem);
7373 7371    ins_cost(100);
7374 7372  
7375 7373    format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
7376 7374    opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7377 7375    ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7378 7376    ins_pipe(ialu_mem);
7379 7377  %}
7380 7378  
7381 7379  instruct prefetchrT2( memory mem ) %{
7382 7380    predicate(UseSSE>=1 && ReadPrefetchInstr==2);
7383 7381    match(PrefetchRead mem);
7384 7382    ins_cost(100);
7385 7383  
7386 7384    format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
7387 7385    opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7388 7386    ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7389 7387    ins_pipe(ialu_mem);
7390 7388  %}
7391 7389  
7392 7390  instruct prefetchw0( memory mem ) %{
7393 7391    predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7394 7392    match(PrefetchWrite mem);
7395 7393    ins_cost(0);
7396 7394    size(0);
7397 7395    format %{ "Prefetch (non-SSE is empty encoding)" %}
7398 7396    ins_encode();
7399 7397    ins_pipe(empty);
7400 7398  %}
7401 7399  
7402 7400  instruct prefetchw( memory mem ) %{
7403 7401    predicate(UseSSE==0 && VM_Version::supports_3dnow() || AllocatePrefetchInstr==3);
7404 7402    match( PrefetchWrite mem );
7405 7403    ins_cost(100);
7406 7404  
7407 7405    format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
7408 7406    opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
7409 7407    ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7410 7408    ins_pipe(ialu_mem);
7411 7409  %}
7412 7410  
7413 7411  instruct prefetchwNTA( memory mem ) %{
7414 7412    predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
7415 7413    match(PrefetchWrite mem);
7416 7414    ins_cost(100);
7417 7415  
7418 7416    format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
7419 7417    opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7420 7418    ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7421 7419    ins_pipe(ialu_mem);
7422 7420  %}
7423 7421  
7424 7422  instruct prefetchwT0( memory mem ) %{
7425 7423    predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
7426 7424    match(PrefetchWrite mem);
7427 7425    ins_cost(100);
7428 7426  
7429 7427    format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %}
7430 7428    opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7431 7429    ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7432 7430    ins_pipe(ialu_mem);
7433 7431  %}
7434 7432  
7435 7433  instruct prefetchwT2( memory mem ) %{
7436 7434    predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
7437 7435    match(PrefetchWrite mem);
7438 7436    ins_cost(100);
7439 7437  
7440 7438    format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %}
7441 7439    opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7442 7440    ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7443 7441    ins_pipe(ialu_mem);
7444 7442  %}
7445 7443  
7446 7444  //----------Store Instructions-------------------------------------------------
7447 7445  
7448 7446  // Store Byte
7449 7447  instruct storeB(memory mem, xRegI src) %{
7450 7448    match(Set mem (StoreB mem src));
7451 7449  
7452 7450    ins_cost(125);
7453 7451    format %{ "MOV8   $mem,$src" %}
7454 7452    opcode(0x88);
7455 7453    ins_encode( OpcP, RegMem( src, mem ) );
7456 7454    ins_pipe( ialu_mem_reg );
7457 7455  %}
7458 7456  
7459 7457  // Store Char/Short
7460 7458  instruct storeC(memory mem, eRegI src) %{
7461 7459    match(Set mem (StoreC mem src));
7462 7460  
7463 7461    ins_cost(125);
7464 7462    format %{ "MOV16  $mem,$src" %}
7465 7463    opcode(0x89, 0x66);
7466 7464    ins_encode( OpcS, OpcP, RegMem( src, mem ) );
7467 7465    ins_pipe( ialu_mem_reg );
7468 7466  %}
7469 7467  
7470 7468  // Store Integer
7471 7469  instruct storeI(memory mem, eRegI src) %{
7472 7470    match(Set mem (StoreI mem src));
7473 7471  
7474 7472    ins_cost(125);
7475 7473    format %{ "MOV    $mem,$src" %}
7476 7474    opcode(0x89);
7477 7475    ins_encode( OpcP, RegMem( src, mem ) );
7478 7476    ins_pipe( ialu_mem_reg );
7479 7477  %}
7480 7478  
7481 7479  // Store Long
7482 7480  instruct storeL(long_memory mem, eRegL src) %{
7483 7481    predicate(!((StoreLNode*)n)->require_atomic_access());
7484 7482    match(Set mem (StoreL mem src));
7485 7483  
7486 7484    ins_cost(200);
7487 7485    format %{ "MOV    $mem,$src.lo\n\t"
7488 7486              "MOV    $mem+4,$src.hi" %}
7489 7487    opcode(0x89, 0x89);
7490 7488    ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
7491 7489    ins_pipe( ialu_mem_long_reg );
7492 7490  %}
7493 7491  
7494 7492  // Store Long to Integer
7495 7493  instruct storeL2I(memory mem, eRegL src) %{
7496 7494    match(Set mem (StoreI mem (ConvL2I src)));
7497 7495  
7498 7496    format %{ "MOV    $mem,$src.lo\t# long -> int" %}
7499 7497    ins_encode %{
7500 7498      __ movl($mem$$Address, $src$$Register);
7501 7499    %}
7502 7500    ins_pipe(ialu_mem_reg);
7503 7501  %}
7504 7502  
7505 7503  // Volatile Store Long.  Must be atomic, so move it into
7506 7504  // the FP TOS and then do a 64-bit FIST.  Has to probe the
7507 7505  // target address before the store (for null-ptr checks)
7508 7506  // so the memory operand is used twice in the encoding.
7509 7507  instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
7510 7508    predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7511 7509    match(Set mem (StoreL mem src));
7512 7510    effect( KILL cr );
7513 7511    ins_cost(400);
7514 7512    format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7515 7513              "FILD   $src\n\t"
7516 7514              "FISTp  $mem\t # 64-bit atomic volatile long store" %}
7517 7515    opcode(0x3B);
7518 7516    ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7519 7517    ins_pipe( fpu_reg_mem );
7520 7518  %}
7521 7519  
7522 7520  instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7523 7521    predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7524 7522    match(Set mem (StoreL mem src));
7525 7523    effect( TEMP tmp, KILL cr );
7526 7524    ins_cost(380);
7527 7525    format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7528 7526              "MOVSD  $tmp,$src\n\t"
7529 7527              "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7530 7528    opcode(0x3B);
7531 7529    ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
7532 7530    ins_pipe( pipe_slow );
7533 7531  %}
7534 7532  
7535 7533  instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7536 7534    predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7537 7535    match(Set mem (StoreL mem src));
7538 7536    effect( TEMP tmp2 , TEMP tmp, KILL cr );
7539 7537    ins_cost(360);
7540 7538    format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7541 7539              "MOVD   $tmp,$src.lo\n\t"
7542 7540              "MOVD   $tmp2,$src.hi\n\t"
7543 7541              "PUNPCKLDQ $tmp,$tmp2\n\t"
7544 7542              "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7545 7543    opcode(0x3B);
7546 7544    ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
7547 7545    ins_pipe( pipe_slow );
7548 7546  %}
7549 7547  
7550 7548  // Store Pointer; for storing unknown oops and raw pointers
7551 7549  instruct storeP(memory mem, anyRegP src) %{
7552 7550    match(Set mem (StoreP mem src));
7553 7551  
7554 7552    ins_cost(125);
7555 7553    format %{ "MOV    $mem,$src" %}
7556 7554    opcode(0x89);
7557 7555    ins_encode( OpcP, RegMem( src, mem ) );
7558 7556    ins_pipe( ialu_mem_reg );
7559 7557  %}
7560 7558  
7561 7559  // Store Integer Immediate
7562 7560  instruct storeImmI(memory mem, immI src) %{
7563 7561    match(Set mem (StoreI mem src));
7564 7562  
7565 7563    ins_cost(150);
7566 7564    format %{ "MOV    $mem,$src" %}
7567 7565    opcode(0xC7);               /* C7 /0 */
7568 7566    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7569 7567    ins_pipe( ialu_mem_imm );
7570 7568  %}
7571 7569  
7572 7570  // Store Short/Char Immediate
7573 7571  instruct storeImmI16(memory mem, immI16 src) %{
7574 7572    predicate(UseStoreImmI16);
7575 7573    match(Set mem (StoreC mem src));
7576 7574  
7577 7575    ins_cost(150);
7578 7576    format %{ "MOV16  $mem,$src" %}
7579 7577    opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
7580 7578    ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
7581 7579    ins_pipe( ialu_mem_imm );
7582 7580  %}
7583 7581  
7584 7582  // Store Pointer Immediate; null pointers or constant oops that do not
7585 7583  // need card-mark barriers.
7586 7584  instruct storeImmP(memory mem, immP src) %{
7587 7585    match(Set mem (StoreP mem src));
7588 7586  
7589 7587    ins_cost(150);
7590 7588    format %{ "MOV    $mem,$src" %}
7591 7589    opcode(0xC7);               /* C7 /0 */
7592 7590    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7593 7591    ins_pipe( ialu_mem_imm );
7594 7592  %}
7595 7593  
7596 7594  // Store Byte Immediate
7597 7595  instruct storeImmB(memory mem, immI8 src) %{
7598 7596    match(Set mem (StoreB mem src));
7599 7597  
7600 7598    ins_cost(150);
7601 7599    format %{ "MOV8   $mem,$src" %}
7602 7600    opcode(0xC6);               /* C6 /0 */
7603 7601    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7604 7602    ins_pipe( ialu_mem_imm );
7605 7603  %}
7606 7604  
7607 7605  // Store Aligned Packed Byte XMM register to memory
7608 7606  instruct storeA8B(memory mem, regXD src) %{
7609 7607    predicate(UseSSE>=1);
7610 7608    match(Set mem (Store8B mem src));
7611 7609    ins_cost(145);
7612 7610    format %{ "MOVQ  $mem,$src\t! packed8B" %}
7613 7611    ins_encode( movq_st(mem, src));
7614 7612    ins_pipe( pipe_slow );
7615 7613  %}
7616 7614  
7617 7615  // Store Aligned Packed Char/Short XMM register to memory
7618 7616  instruct storeA4C(memory mem, regXD src) %{
7619 7617    predicate(UseSSE>=1);
7620 7618    match(Set mem (Store4C mem src));
7621 7619    ins_cost(145);
7622 7620    format %{ "MOVQ  $mem,$src\t! packed4C" %}
7623 7621    ins_encode( movq_st(mem, src));
7624 7622    ins_pipe( pipe_slow );
7625 7623  %}
7626 7624  
7627 7625  // Store Aligned Packed Integer XMM register to memory
7628 7626  instruct storeA2I(memory mem, regXD src) %{
7629 7627    predicate(UseSSE>=1);
7630 7628    match(Set mem (Store2I mem src));
7631 7629    ins_cost(145);
7632 7630    format %{ "MOVQ  $mem,$src\t! packed2I" %}
7633 7631    ins_encode( movq_st(mem, src));
7634 7632    ins_pipe( pipe_slow );
7635 7633  %}
7636 7634  
7637 7635  // Store CMS card-mark Immediate
7638 7636  instruct storeImmCM(memory mem, immI8 src) %{
7639 7637    match(Set mem (StoreCM mem src));
7640 7638  
7641 7639    ins_cost(150);
7642 7640    format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
7643 7641    opcode(0xC6);               /* C6 /0 */
7644 7642    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7645 7643    ins_pipe( ialu_mem_imm );
7646 7644  %}
7647 7645  
7648 7646  // Store Double
7649 7647  instruct storeD( memory mem, regDPR1 src) %{
7650 7648    predicate(UseSSE<=1);
7651 7649    match(Set mem (StoreD mem src));
7652 7650  
7653 7651    ins_cost(100);
7654 7652    format %{ "FST_D  $mem,$src" %}
7655 7653    opcode(0xDD);       /* DD /2 */
7656 7654    ins_encode( enc_FP_store(mem,src) );
7657 7655    ins_pipe( fpu_mem_reg );
7658 7656  %}
7659 7657  
7660 7658  // Store double does rounding on x86
7661 7659  instruct storeD_rounded( memory mem, regDPR1 src) %{
7662 7660    predicate(UseSSE<=1);
7663 7661    match(Set mem (StoreD mem (RoundDouble src)));
7664 7662  
7665 7663    ins_cost(100);
7666 7664    format %{ "FST_D  $mem,$src\t# round" %}
7667 7665    opcode(0xDD);       /* DD /2 */
7668 7666    ins_encode( enc_FP_store(mem,src) );
7669 7667    ins_pipe( fpu_mem_reg );
7670 7668  %}
7671 7669  
7672 7670  // Store XMM register to memory (double-precision floating points)
7673 7671  // MOVSD instruction
7674 7672  instruct storeXD(memory mem, regXD src) %{
7675 7673    predicate(UseSSE>=2);
7676 7674    match(Set mem (StoreD mem src));
7677 7675    ins_cost(95);
7678 7676    format %{ "MOVSD  $mem,$src" %}
7679 7677    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7680 7678    ins_pipe( pipe_slow );
7681 7679  %}
7682 7680  
7683 7681  // Store XMM register to memory (single-precision floating point)
7684 7682  // MOVSS instruction
7685 7683  instruct storeX(memory mem, regX src) %{
7686 7684    predicate(UseSSE>=1);
7687 7685    match(Set mem (StoreF mem src));
7688 7686    ins_cost(95);
7689 7687    format %{ "MOVSS  $mem,$src" %}
7690 7688    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7691 7689    ins_pipe( pipe_slow );
7692 7690  %}
7693 7691  
7694 7692  // Store Aligned Packed Single Float XMM register to memory
7695 7693  instruct storeA2F(memory mem, regXD src) %{
7696 7694    predicate(UseSSE>=1);
7697 7695    match(Set mem (Store2F mem src));
7698 7696    ins_cost(145);
7699 7697    format %{ "MOVQ  $mem,$src\t! packed2F" %}
7700 7698    ins_encode( movq_st(mem, src));
7701 7699    ins_pipe( pipe_slow );
7702 7700  %}
7703 7701  
7704 7702  // Store Float
7705 7703  instruct storeF( memory mem, regFPR1 src) %{
7706 7704    predicate(UseSSE==0);
7707 7705    match(Set mem (StoreF mem src));
7708 7706  
7709 7707    ins_cost(100);
7710 7708    format %{ "FST_S  $mem,$src" %}
7711 7709    opcode(0xD9);       /* D9 /2 */
7712 7710    ins_encode( enc_FP_store(mem,src) );
7713 7711    ins_pipe( fpu_mem_reg );
7714 7712  %}
7715 7713  
7716 7714  // Store Float does rounding on x86
7717 7715  instruct storeF_rounded( memory mem, regFPR1 src) %{
7718 7716    predicate(UseSSE==0);
7719 7717    match(Set mem (StoreF mem (RoundFloat src)));
7720 7718  
7721 7719    ins_cost(100);
7722 7720    format %{ "FST_S  $mem,$src\t# round" %}
7723 7721    opcode(0xD9);       /* D9 /2 */
7724 7722    ins_encode( enc_FP_store(mem,src) );
7725 7723    ins_pipe( fpu_mem_reg );
7726 7724  %}
7727 7725  
7728 7726  // Store Float does rounding on x86
7729 7727  instruct storeF_Drounded( memory mem, regDPR1 src) %{
7730 7728    predicate(UseSSE<=1);
7731 7729    match(Set mem (StoreF mem (ConvD2F src)));
7732 7730  
7733 7731    ins_cost(100);
7734 7732    format %{ "FST_S  $mem,$src\t# D-round" %}
7735 7733    opcode(0xD9);       /* D9 /2 */
7736 7734    ins_encode( enc_FP_store(mem,src) );
7737 7735    ins_pipe( fpu_mem_reg );
7738 7736  %}
7739 7737  
7740 7738  // Store immediate Float value (it is faster than store from FPU register)
7741 7739  // The instruction usage is guarded by predicate in operand immF().
7742 7740  instruct storeF_imm( memory mem, immF src) %{
7743 7741    match(Set mem (StoreF mem src));
7744 7742  
7745 7743    ins_cost(50);
7746 7744    format %{ "MOV    $mem,$src\t# store float" %}
7747 7745    opcode(0xC7);               /* C7 /0 */
7748 7746    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
7749 7747    ins_pipe( ialu_mem_imm );
7750 7748  %}
7751 7749  
7752 7750  // Store immediate Float value (it is faster than store from XMM register)
7753 7751  // The instruction usage is guarded by predicate in operand immXF().
7754 7752  instruct storeX_imm( memory mem, immXF src) %{
7755 7753    match(Set mem (StoreF mem src));
7756 7754  
7757 7755    ins_cost(50);
7758 7756    format %{ "MOV    $mem,$src\t# store float" %}
7759 7757    opcode(0xC7);               /* C7 /0 */
7760 7758    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32XF_as_bits( src ));
7761 7759    ins_pipe( ialu_mem_imm );
7762 7760  %}
7763 7761  
7764 7762  // Store Integer to stack slot
7765 7763  instruct storeSSI(stackSlotI dst, eRegI src) %{
7766 7764    match(Set dst src);
7767 7765  
7768 7766    ins_cost(100);
7769 7767    format %{ "MOV    $dst,$src" %}
7770 7768    opcode(0x89);
7771 7769    ins_encode( OpcPRegSS( dst, src ) );
7772 7770    ins_pipe( ialu_mem_reg );
7773 7771  %}
7774 7772  
7775 7773  // Store Integer to stack slot
7776 7774  instruct storeSSP(stackSlotP dst, eRegP src) %{
7777 7775    match(Set dst src);
7778 7776  
7779 7777    ins_cost(100);
7780 7778    format %{ "MOV    $dst,$src" %}
7781 7779    opcode(0x89);
7782 7780    ins_encode( OpcPRegSS( dst, src ) );
7783 7781    ins_pipe( ialu_mem_reg );
7784 7782  %}
7785 7783  
7786 7784  // Store Long to stack slot
7787 7785  instruct storeSSL(stackSlotL dst, eRegL src) %{
7788 7786    match(Set dst src);
7789 7787  
7790 7788    ins_cost(200);
7791 7789    format %{ "MOV    $dst,$src.lo\n\t"
7792 7790              "MOV    $dst+4,$src.hi" %}
7793 7791    opcode(0x89, 0x89);
7794 7792    ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
7795 7793    ins_pipe( ialu_mem_long_reg );
7796 7794  %}
7797 7795  
7798 7796  //----------MemBar Instructions-----------------------------------------------
7799 7797  // Memory barrier flavors
7800 7798  
7801 7799  instruct membar_acquire() %{
7802 7800    match(MemBarAcquire);
7803 7801    ins_cost(400);
7804 7802  
7805 7803    size(0);
7806 7804    format %{ "MEMBAR-acquire ! (empty encoding)" %}
7807 7805    ins_encode();
7808 7806    ins_pipe(empty);
7809 7807  %}
7810 7808  
7811 7809  instruct membar_acquire_lock() %{
7812 7810    match(MemBarAcquire);
7813 7811    predicate(Matcher::prior_fast_lock(n));
7814 7812    ins_cost(0);
7815 7813  
7816 7814    size(0);
7817 7815    format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7818 7816    ins_encode( );
7819 7817    ins_pipe(empty);
7820 7818  %}
7821 7819  
7822 7820  instruct membar_release() %{
7823 7821    match(MemBarRelease);
7824 7822    ins_cost(400);
7825 7823  
7826 7824    size(0);
7827 7825    format %{ "MEMBAR-release ! (empty encoding)" %}
7828 7826    ins_encode( );
7829 7827    ins_pipe(empty);
7830 7828  %}
7831 7829  
7832 7830  instruct membar_release_lock() %{
7833 7831    match(MemBarRelease);
7834 7832    predicate(Matcher::post_fast_unlock(n));
7835 7833    ins_cost(0);
7836 7834  
7837 7835    size(0);
7838 7836    format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7839 7837    ins_encode( );
7840 7838    ins_pipe(empty);
7841 7839  %}
7842 7840  
7843 7841  instruct membar_volatile(eFlagsReg cr) %{
7844 7842    match(MemBarVolatile);
7845 7843    effect(KILL cr);
7846 7844    ins_cost(400);
7847 7845  
7848 7846    format %{ 
7849 7847      $$template
7850 7848      if (os::is_MP()) {
7851 7849        $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
7852 7850      } else {
7853 7851        $$emit$$"MEMBAR-volatile ! (empty encoding)"
7854 7852      }
7855 7853    %}
7856 7854    ins_encode %{
7857 7855      __ membar(Assembler::StoreLoad);
7858 7856    %}
7859 7857    ins_pipe(pipe_slow);
7860 7858  %}
7861 7859  
7862 7860  instruct unnecessary_membar_volatile() %{
7863 7861    match(MemBarVolatile);
7864 7862    predicate(Matcher::post_store_load_barrier(n));
7865 7863    ins_cost(0);
7866 7864  
7867 7865    size(0);
7868 7866    format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7869 7867    ins_encode( );
7870 7868    ins_pipe(empty);
7871 7869  %}
7872 7870  
7873 7871  //----------Move Instructions--------------------------------------------------
7874 7872  instruct castX2P(eAXRegP dst, eAXRegI src) %{
7875 7873    match(Set dst (CastX2P src));
7876 7874    format %{ "# X2P  $dst, $src" %}
7877 7875    ins_encode( /*empty encoding*/ );
7878 7876    ins_cost(0);
7879 7877    ins_pipe(empty);
7880 7878  %}
7881 7879  
7882 7880  instruct castP2X(eRegI dst, eRegP src ) %{
7883 7881    match(Set dst (CastP2X src));
7884 7882    ins_cost(50);
7885 7883    format %{ "MOV    $dst, $src\t# CastP2X" %}
7886 7884    ins_encode( enc_Copy( dst, src) );
7887 7885    ins_pipe( ialu_reg_reg );
7888 7886  %}
7889 7887  
7890 7888  //----------Conditional Move---------------------------------------------------
7891 7889  // Conditional move
7892 7890  instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
7893 7891    predicate(VM_Version::supports_cmov() );
7894 7892    match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7895 7893    ins_cost(200);
7896 7894    format %{ "CMOV$cop $dst,$src" %}
7897 7895    opcode(0x0F,0x40);
7898 7896    ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7899 7897    ins_pipe( pipe_cmov_reg );
7900 7898  %}
7901 7899  
7902 7900  instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
7903 7901    predicate(VM_Version::supports_cmov() );
7904 7902    match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7905 7903    ins_cost(200);
7906 7904    format %{ "CMOV$cop $dst,$src" %}
7907 7905    opcode(0x0F,0x40);
7908 7906    ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7909 7907    ins_pipe( pipe_cmov_reg );
7910 7908  %}
7911 7909  
7912 7910  instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
7913 7911    predicate(VM_Version::supports_cmov() );
7914 7912    match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7915 7913    ins_cost(200);
7916 7914    expand %{
7917 7915      cmovI_regU(cop, cr, dst, src);
7918 7916    %}
7919 7917  %}
7920 7918  
7921 7919  // Conditional move
7922 7920  instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
7923 7921    predicate(VM_Version::supports_cmov() );
7924 7922    match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7925 7923    ins_cost(250);
7926 7924    format %{ "CMOV$cop $dst,$src" %}
7927 7925    opcode(0x0F,0x40);
7928 7926    ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7929 7927    ins_pipe( pipe_cmov_mem );
7930 7928  %}
7931 7929  
7932 7930  // Conditional move
7933 7931  instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
7934 7932    predicate(VM_Version::supports_cmov() );
7935 7933    match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7936 7934    ins_cost(250);
7937 7935    format %{ "CMOV$cop $dst,$src" %}
7938 7936    opcode(0x0F,0x40);
7939 7937    ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7940 7938    ins_pipe( pipe_cmov_mem );
7941 7939  %}
7942 7940  
7943 7941  instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
7944 7942    predicate(VM_Version::supports_cmov() );
7945 7943    match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7946 7944    ins_cost(250);
7947 7945    expand %{
7948 7946      cmovI_memU(cop, cr, dst, src);
7949 7947    %}
7950 7948  %}
7951 7949  
7952 7950  // Conditional move
7953 7951  instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7954 7952    predicate(VM_Version::supports_cmov() );
7955 7953    match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7956 7954    ins_cost(200);
7957 7955    format %{ "CMOV$cop $dst,$src\t# ptr" %}
7958 7956    opcode(0x0F,0x40);
7959 7957    ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7960 7958    ins_pipe( pipe_cmov_reg );
7961 7959  %}
7962 7960  
7963 7961  // Conditional move (non-P6 version)
7964 7962  // Note:  a CMoveP is generated for  stubs and native wrappers
7965 7963  //        regardless of whether we are on a P6, so we
7966 7964  //        emulate a cmov here
7967 7965  instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7968 7966    match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7969 7967    ins_cost(300);
7970 7968    format %{ "Jn$cop   skip\n\t"
7971 7969            "MOV    $dst,$src\t# pointer\n"
7972 7970        "skip:" %}
7973 7971    opcode(0x8b);
7974 7972    ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
7975 7973    ins_pipe( pipe_cmov_reg );
7976 7974  %}
7977 7975  
7978 7976  // Conditional move
7979 7977  instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7980 7978    predicate(VM_Version::supports_cmov() );
7981 7979    match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7982 7980    ins_cost(200);
7983 7981    format %{ "CMOV$cop $dst,$src\t# ptr" %}
7984 7982    opcode(0x0F,0x40);
7985 7983    ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7986 7984    ins_pipe( pipe_cmov_reg );
7987 7985  %}
7988 7986  
7989 7987  instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7990 7988    predicate(VM_Version::supports_cmov() );
7991 7989    match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7992 7990    ins_cost(200);
7993 7991    expand %{
7994 7992      cmovP_regU(cop, cr, dst, src);
7995 7993    %}
7996 7994  %}
7997 7995  
7998 7996  // DISABLED: Requires the ADLC to emit a bottom_type call that
7999 7997  // correctly meets the two pointer arguments; one is an incoming
8000 7998  // register but the other is a memory operand.  ALSO appears to
8001 7999  // be buggy with implicit null checks.
8002 8000  //
8003 8001  //// Conditional move
8004 8002  //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
8005 8003  //  predicate(VM_Version::supports_cmov() );
8006 8004  //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8007 8005  //  ins_cost(250);
8008 8006  //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8009 8007  //  opcode(0x0F,0x40);
8010 8008  //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8011 8009  //  ins_pipe( pipe_cmov_mem );
8012 8010  //%}
8013 8011  //
8014 8012  //// Conditional move
8015 8013  //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
8016 8014  //  predicate(VM_Version::supports_cmov() );
8017 8015  //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8018 8016  //  ins_cost(250);
8019 8017  //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8020 8018  //  opcode(0x0F,0x40);
8021 8019  //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8022 8020  //  ins_pipe( pipe_cmov_mem );
8023 8021  //%}
8024 8022  
8025 8023  // Conditional move
8026 8024  instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
8027 8025    predicate(UseSSE<=1);
8028 8026    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8029 8027    ins_cost(200);
8030 8028    format %{ "FCMOV$cop $dst,$src\t# double" %}
8031 8029    opcode(0xDA);
8032 8030    ins_encode( enc_cmov_d(cop,src) );
8033 8031    ins_pipe( pipe_cmovD_reg );
8034 8032  %}
8035 8033  
8036 8034  // Conditional move
8037 8035  instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
8038 8036    predicate(UseSSE==0);
8039 8037    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8040 8038    ins_cost(200);
8041 8039    format %{ "FCMOV$cop $dst,$src\t# float" %}
8042 8040    opcode(0xDA);
8043 8041    ins_encode( enc_cmov_d(cop,src) );
8044 8042    ins_pipe( pipe_cmovD_reg );
8045 8043  %}
8046 8044  
8047 8045  // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8048 8046  instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
8049 8047    predicate(UseSSE<=1);
8050 8048    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8051 8049    ins_cost(200);
8052 8050    format %{ "Jn$cop   skip\n\t"
8053 8051              "MOV    $dst,$src\t# double\n"
8054 8052        "skip:" %}
8055 8053    opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
8056 8054    ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
8057 8055    ins_pipe( pipe_cmovD_reg );
8058 8056  %}
8059 8057  
8060 8058  // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8061 8059  instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
8062 8060    predicate(UseSSE==0);
8063 8061    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8064 8062    ins_cost(200);
8065 8063    format %{ "Jn$cop    skip\n\t"
8066 8064              "MOV    $dst,$src\t# float\n"
8067 8065        "skip:" %}
8068 8066    opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
8069 8067    ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
8070 8068    ins_pipe( pipe_cmovD_reg );
8071 8069  %}
8072 8070  
8073 8071  // No CMOVE with SSE/SSE2
8074 8072  instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
8075 8073    predicate (UseSSE>=1);
8076 8074    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8077 8075    ins_cost(200);
8078 8076    format %{ "Jn$cop   skip\n\t"
8079 8077              "MOVSS  $dst,$src\t# float\n"
8080 8078        "skip:" %}
8081 8079    ins_encode %{
8082 8080      Label skip;
8083 8081      // Invert sense of branch from sense of CMOV
8084 8082      __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8085 8083      __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8086 8084      __ bind(skip);
8087 8085    %}
8088 8086    ins_pipe( pipe_slow );
8089 8087  %}
8090 8088  
8091 8089  // No CMOVE with SSE/SSE2
8092 8090  instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
8093 8091    predicate (UseSSE>=2);
8094 8092    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8095 8093    ins_cost(200);
8096 8094    format %{ "Jn$cop   skip\n\t"
8097 8095              "MOVSD  $dst,$src\t# float\n"
8098 8096        "skip:" %}
8099 8097    ins_encode %{
8100 8098      Label skip;
8101 8099      // Invert sense of branch from sense of CMOV
8102 8100      __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8103 8101      __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8104 8102      __ bind(skip);
8105 8103    %}
8106 8104    ins_pipe( pipe_slow );
8107 8105  %}
8108 8106  
8109 8107  // unsigned version
8110 8108  instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
8111 8109    predicate (UseSSE>=1);
8112 8110    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8113 8111    ins_cost(200);
8114 8112    format %{ "Jn$cop   skip\n\t"
8115 8113              "MOVSS  $dst,$src\t# float\n"
8116 8114        "skip:" %}
8117 8115    ins_encode %{
8118 8116      Label skip;
8119 8117      // Invert sense of branch from sense of CMOV
8120 8118      __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8121 8119      __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8122 8120      __ bind(skip);
8123 8121    %}
8124 8122    ins_pipe( pipe_slow );
8125 8123  %}
8126 8124  
8127 8125  instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
8128 8126    predicate (UseSSE>=1);
8129 8127    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8130 8128    ins_cost(200);
8131 8129    expand %{
8132 8130      fcmovX_regU(cop, cr, dst, src);
8133 8131    %}
8134 8132  %}
8135 8133  
8136 8134  // unsigned version
8137 8135  instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
8138 8136    predicate (UseSSE>=2);
8139 8137    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8140 8138    ins_cost(200);
8141 8139    format %{ "Jn$cop   skip\n\t"
8142 8140              "MOVSD  $dst,$src\t# float\n"
8143 8141        "skip:" %}
8144 8142    ins_encode %{
8145 8143      Label skip;
8146 8144      // Invert sense of branch from sense of CMOV
8147 8145      __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8148 8146      __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8149 8147      __ bind(skip);
8150 8148    %}
8151 8149    ins_pipe( pipe_slow );
8152 8150  %}
8153 8151  
8154 8152  instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
8155 8153    predicate (UseSSE>=2);
8156 8154    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8157 8155    ins_cost(200);
8158 8156    expand %{
8159 8157      fcmovXD_regU(cop, cr, dst, src);
8160 8158    %}
8161 8159  %}
8162 8160  
8163 8161  instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
8164 8162    predicate(VM_Version::supports_cmov() );
8165 8163    match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8166 8164    ins_cost(200);
8167 8165    format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8168 8166              "CMOV$cop $dst.hi,$src.hi" %}
8169 8167    opcode(0x0F,0x40);
8170 8168    ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8171 8169    ins_pipe( pipe_cmov_reg_long );
8172 8170  %}
8173 8171  
8174 8172  instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
8175 8173    predicate(VM_Version::supports_cmov() );
8176 8174    match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8177 8175    ins_cost(200);
8178 8176    format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8179 8177              "CMOV$cop $dst.hi,$src.hi" %}
8180 8178    opcode(0x0F,0x40);
8181 8179    ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8182 8180    ins_pipe( pipe_cmov_reg_long );
8183 8181  %}
8184 8182  
8185 8183  instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
8186 8184    predicate(VM_Version::supports_cmov() );
8187 8185    match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8188 8186    ins_cost(200);
8189 8187    expand %{
8190 8188      cmovL_regU(cop, cr, dst, src);
8191 8189    %}
8192 8190  %}
8193 8191  
8194 8192  //----------Arithmetic Instructions--------------------------------------------
8195 8193  //----------Addition Instructions----------------------------------------------
8196 8194  // Integer Addition Instructions
8197 8195  instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8198 8196    match(Set dst (AddI dst src));
8199 8197    effect(KILL cr);
8200 8198  
8201 8199    size(2);
8202 8200    format %{ "ADD    $dst,$src" %}
8203 8201    opcode(0x03);
8204 8202    ins_encode( OpcP, RegReg( dst, src) );
8205 8203    ins_pipe( ialu_reg_reg );
8206 8204  %}
8207 8205  
8208 8206  instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8209 8207    match(Set dst (AddI dst src));
8210 8208    effect(KILL cr);
8211 8209  
8212 8210    format %{ "ADD    $dst,$src" %}
8213 8211    opcode(0x81, 0x00); /* /0 id */
8214 8212    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8215 8213    ins_pipe( ialu_reg );
8216 8214  %}
8217 8215  
8218 8216  instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
8219 8217    predicate(UseIncDec);
8220 8218    match(Set dst (AddI dst src));
8221 8219    effect(KILL cr);
8222 8220  
8223 8221    size(1);
8224 8222    format %{ "INC    $dst" %}
8225 8223    opcode(0x40); /*  */
8226 8224    ins_encode( Opc_plus( primary, dst ) );
8227 8225    ins_pipe( ialu_reg );
8228 8226  %}
8229 8227  
8230 8228  instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
8231 8229    match(Set dst (AddI src0 src1));
8232 8230    ins_cost(110);
8233 8231  
8234 8232    format %{ "LEA    $dst,[$src0 + $src1]" %}
8235 8233    opcode(0x8D); /* 0x8D /r */
8236 8234    ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8237 8235    ins_pipe( ialu_reg_reg );
8238 8236  %}
8239 8237  
8240 8238  instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
8241 8239    match(Set dst (AddP src0 src1));
8242 8240    ins_cost(110);
8243 8241  
8244 8242    format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
8245 8243    opcode(0x8D); /* 0x8D /r */
8246 8244    ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8247 8245    ins_pipe( ialu_reg_reg );
8248 8246  %}
8249 8247  
8250 8248  instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
8251 8249    predicate(UseIncDec);
8252 8250    match(Set dst (AddI dst src));
8253 8251    effect(KILL cr);
8254 8252  
8255 8253    size(1);
8256 8254    format %{ "DEC    $dst" %}
8257 8255    opcode(0x48); /*  */
8258 8256    ins_encode( Opc_plus( primary, dst ) );
8259 8257    ins_pipe( ialu_reg );
8260 8258  %}
8261 8259  
8262 8260  instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
8263 8261    match(Set dst (AddP dst src));
8264 8262    effect(KILL cr);
8265 8263  
8266 8264    size(2);
8267 8265    format %{ "ADD    $dst,$src" %}
8268 8266    opcode(0x03);
8269 8267    ins_encode( OpcP, RegReg( dst, src) );
8270 8268    ins_pipe( ialu_reg_reg );
8271 8269  %}
8272 8270  
8273 8271  instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
8274 8272    match(Set dst (AddP dst src));
8275 8273    effect(KILL cr);
8276 8274  
8277 8275    format %{ "ADD    $dst,$src" %}
8278 8276    opcode(0x81,0x00); /* Opcode 81 /0 id */
8279 8277    // ins_encode( RegImm( dst, src) );
8280 8278    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8281 8279    ins_pipe( ialu_reg );
8282 8280  %}
8283 8281  
8284 8282  instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8285 8283    match(Set dst (AddI dst (LoadI src)));
8286 8284    effect(KILL cr);
8287 8285  
8288 8286    ins_cost(125);
8289 8287    format %{ "ADD    $dst,$src" %}
8290 8288    opcode(0x03);
8291 8289    ins_encode( OpcP, RegMem( dst, src) );
8292 8290    ins_pipe( ialu_reg_mem );
8293 8291  %}
8294 8292  
8295 8293  instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8296 8294    match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8297 8295    effect(KILL cr);
8298 8296  
8299 8297    ins_cost(150);
8300 8298    format %{ "ADD    $dst,$src" %}
8301 8299    opcode(0x01);  /* Opcode 01 /r */
8302 8300    ins_encode( OpcP, RegMem( src, dst ) );
8303 8301    ins_pipe( ialu_mem_reg );
8304 8302  %}
8305 8303  
8306 8304  // Add Memory with Immediate
8307 8305  instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8308 8306    match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8309 8307    effect(KILL cr);
8310 8308  
8311 8309    ins_cost(125);
8312 8310    format %{ "ADD    $dst,$src" %}
8313 8311    opcode(0x81);               /* Opcode 81 /0 id */
8314 8312    ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
8315 8313    ins_pipe( ialu_mem_imm );
8316 8314  %}
8317 8315  
8318 8316  instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
8319 8317    match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8320 8318    effect(KILL cr);
8321 8319  
8322 8320    ins_cost(125);
8323 8321    format %{ "INC    $dst" %}
8324 8322    opcode(0xFF);               /* Opcode FF /0 */
8325 8323    ins_encode( OpcP, RMopc_Mem(0x00,dst));
8326 8324    ins_pipe( ialu_mem_imm );
8327 8325  %}
8328 8326  
8329 8327  instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
8330 8328    match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8331 8329    effect(KILL cr);
8332 8330  
8333 8331    ins_cost(125);
8334 8332    format %{ "DEC    $dst" %}
8335 8333    opcode(0xFF);               /* Opcode FF /1 */
8336 8334    ins_encode( OpcP, RMopc_Mem(0x01,dst));
8337 8335    ins_pipe( ialu_mem_imm );
8338 8336  %}
8339 8337  
8340 8338  
8341 8339  instruct checkCastPP( eRegP dst ) %{
8342 8340    match(Set dst (CheckCastPP dst));
8343 8341  
8344 8342    size(0);
8345 8343    format %{ "#checkcastPP of $dst" %}
8346 8344    ins_encode( /*empty encoding*/ );
8347 8345    ins_pipe( empty );
8348 8346  %}
8349 8347  
8350 8348  instruct castPP( eRegP dst ) %{
8351 8349    match(Set dst (CastPP dst));
8352 8350    format %{ "#castPP of $dst" %}
8353 8351    ins_encode( /*empty encoding*/ );
8354 8352    ins_pipe( empty );
8355 8353  %}
8356 8354  
8357 8355  instruct castII( eRegI dst ) %{
8358 8356    match(Set dst (CastII dst));
8359 8357    format %{ "#castII of $dst" %}
8360 8358    ins_encode( /*empty encoding*/ );
8361 8359    ins_cost(0);
8362 8360    ins_pipe( empty );
8363 8361  %}
8364 8362  
8365 8363  
8366 8364  // Load-locked - same as a regular pointer load when used with compare-swap
8367 8365  instruct loadPLocked(eRegP dst, memory mem) %{
8368 8366    match(Set dst (LoadPLocked mem));
8369 8367  
8370 8368    ins_cost(125);
8371 8369    format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
8372 8370    opcode(0x8B);
8373 8371    ins_encode( OpcP, RegMem(dst,mem));
8374 8372    ins_pipe( ialu_reg_mem );
8375 8373  %}
8376 8374  
8377 8375  // LoadLong-locked - same as a volatile long load when used with compare-swap
8378 8376  instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
8379 8377    predicate(UseSSE<=1);
8380 8378    match(Set dst (LoadLLocked mem));
8381 8379  
8382 8380    ins_cost(200);
8383 8381    format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
8384 8382              "FISTp  $dst" %}
8385 8383    ins_encode(enc_loadL_volatile(mem,dst));
8386 8384    ins_pipe( fpu_reg_mem );
8387 8385  %}
8388 8386  
8389 8387  instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
8390 8388    predicate(UseSSE>=2);
8391 8389    match(Set dst (LoadLLocked mem));
8392 8390    effect(TEMP tmp);
8393 8391    ins_cost(180);
8394 8392    format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8395 8393              "MOVSD  $dst,$tmp" %}
8396 8394    ins_encode(enc_loadLX_volatile(mem, dst, tmp));
8397 8395    ins_pipe( pipe_slow );
8398 8396  %}
8399 8397  
8400 8398  instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
8401 8399    predicate(UseSSE>=2);
8402 8400    match(Set dst (LoadLLocked mem));
8403 8401    effect(TEMP tmp);
8404 8402    ins_cost(160);
8405 8403    format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8406 8404              "MOVD   $dst.lo,$tmp\n\t"
8407 8405              "PSRLQ  $tmp,32\n\t"
8408 8406              "MOVD   $dst.hi,$tmp" %}
8409 8407    ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
8410 8408    ins_pipe( pipe_slow );
8411 8409  %}
8412 8410  
8413 8411  // Conditional-store of the updated heap-top.
8414 8412  // Used during allocation of the shared heap.
8415 8413  // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8416 8414  instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
8417 8415    match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8418 8416    // EAX is killed if there is contention, but then it's also unused.
8419 8417    // In the common case of no contention, EAX holds the new oop address.
8420 8418    format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
8421 8419    ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
8422 8420    ins_pipe( pipe_cmpxchg );
8423 8421  %}
8424 8422  
8425 8423  // Conditional-store of an int value.
8426 8424  // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
8427 8425  instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
8428 8426    match(Set cr (StoreIConditional mem (Binary oldval newval)));
8429 8427    effect(KILL oldval);
8430 8428    format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
8431 8429    ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
8432 8430    ins_pipe( pipe_cmpxchg );
8433 8431  %}
8434 8432  
8435 8433  // Conditional-store of a long value.
8436 8434  // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
8437 8435  instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8438 8436    match(Set cr (StoreLConditional mem (Binary oldval newval)));
8439 8437    effect(KILL oldval);
8440 8438    format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
8441 8439              "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
8442 8440              "XCHG   EBX,ECX"
8443 8441    %}
8444 8442    ins_encode %{
8445 8443      // Note: we need to swap rbx, and rcx before and after the
8446 8444      //       cmpxchg8 instruction because the instruction uses
8447 8445      //       rcx as the high order word of the new value to store but
8448 8446      //       our register encoding uses rbx.
8449 8447      __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8450 8448      if( os::is_MP() )
8451 8449        __ lock();
8452 8450      __ cmpxchg8($mem$$Address);
8453 8451      __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8454 8452    %}
8455 8453    ins_pipe( pipe_cmpxchg );
8456 8454  %}
8457 8455  
8458 8456  // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8459 8457  
8460 8458  instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8461 8459    match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8462 8460    effect(KILL cr, KILL oldval);
8463 8461    format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8464 8462              "MOV    $res,0\n\t"
8465 8463              "JNE,s  fail\n\t"
8466 8464              "MOV    $res,1\n"
8467 8465            "fail:" %}
8468 8466    ins_encode( enc_cmpxchg8(mem_ptr),
8469 8467                enc_flags_ne_to_boolean(res) );
8470 8468    ins_pipe( pipe_cmpxchg );
8471 8469  %}
8472 8470  
8473 8471  instruct compareAndSwapP( eRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
8474 8472    match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8475 8473    effect(KILL cr, KILL oldval);
8476 8474    format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8477 8475              "MOV    $res,0\n\t"
8478 8476              "JNE,s  fail\n\t"
8479 8477              "MOV    $res,1\n"
8480 8478            "fail:" %}
8481 8479    ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8482 8480    ins_pipe( pipe_cmpxchg );
8483 8481  %}
8484 8482  
8485 8483  instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
8486 8484    match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8487 8485    effect(KILL cr, KILL oldval);
8488 8486    format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8489 8487              "MOV    $res,0\n\t"
8490 8488              "JNE,s  fail\n\t"
8491 8489              "MOV    $res,1\n"
8492 8490            "fail:" %}
8493 8491    ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8494 8492    ins_pipe( pipe_cmpxchg );
8495 8493  %}
8496 8494  
8497 8495  //----------Subtraction Instructions-------------------------------------------
8498 8496  // Integer Subtraction Instructions
8499 8497  instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8500 8498    match(Set dst (SubI dst src));
8501 8499    effect(KILL cr);
8502 8500  
8503 8501    size(2);
8504 8502    format %{ "SUB    $dst,$src" %}
8505 8503    opcode(0x2B);
8506 8504    ins_encode( OpcP, RegReg( dst, src) );
8507 8505    ins_pipe( ialu_reg_reg );
8508 8506  %}
8509 8507  
8510 8508  instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8511 8509    match(Set dst (SubI dst src));
8512 8510    effect(KILL cr);
8513 8511  
8514 8512    format %{ "SUB    $dst,$src" %}
8515 8513    opcode(0x81,0x05);  /* Opcode 81 /5 */
8516 8514    // ins_encode( RegImm( dst, src) );
8517 8515    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8518 8516    ins_pipe( ialu_reg );
8519 8517  %}
8520 8518  
8521 8519  instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8522 8520    match(Set dst (SubI dst (LoadI src)));
8523 8521    effect(KILL cr);
8524 8522  
8525 8523    ins_cost(125);
8526 8524    format %{ "SUB    $dst,$src" %}
8527 8525    opcode(0x2B);
8528 8526    ins_encode( OpcP, RegMem( dst, src) );
8529 8527    ins_pipe( ialu_reg_mem );
8530 8528  %}
8531 8529  
8532 8530  instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8533 8531    match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8534 8532    effect(KILL cr);
8535 8533  
8536 8534    ins_cost(150);
8537 8535    format %{ "SUB    $dst,$src" %}
8538 8536    opcode(0x29);  /* Opcode 29 /r */
8539 8537    ins_encode( OpcP, RegMem( src, dst ) );
8540 8538    ins_pipe( ialu_mem_reg );
8541 8539  %}
8542 8540  
8543 8541  // Subtract from a pointer
8544 8542  instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
8545 8543    match(Set dst (AddP dst (SubI zero src)));
8546 8544    effect(KILL cr);
8547 8545  
8548 8546    size(2);
8549 8547    format %{ "SUB    $dst,$src" %}
8550 8548    opcode(0x2B);
8551 8549    ins_encode( OpcP, RegReg( dst, src) );
8552 8550    ins_pipe( ialu_reg_reg );
8553 8551  %}
8554 8552  
8555 8553  instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
8556 8554    match(Set dst (SubI zero dst));
8557 8555    effect(KILL cr);
8558 8556  
8559 8557    size(2);
8560 8558    format %{ "NEG    $dst" %}
8561 8559    opcode(0xF7,0x03);  // Opcode F7 /3
8562 8560    ins_encode( OpcP, RegOpc( dst ) );
8563 8561    ins_pipe( ialu_reg );
8564 8562  %}
8565 8563  
8566 8564  
8567 8565  //----------Multiplication/Division Instructions-------------------------------
8568 8566  // Integer Multiplication Instructions
8569 8567  // Multiply Register
8570 8568  instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8571 8569    match(Set dst (MulI dst src));
8572 8570    effect(KILL cr);
8573 8571  
8574 8572    size(3);
8575 8573    ins_cost(300);
8576 8574    format %{ "IMUL   $dst,$src" %}
8577 8575    opcode(0xAF, 0x0F);
8578 8576    ins_encode( OpcS, OpcP, RegReg( dst, src) );
8579 8577    ins_pipe( ialu_reg_reg_alu0 );
8580 8578  %}
8581 8579  
8582 8580  // Multiply 32-bit Immediate
8583 8581  instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
8584 8582    match(Set dst (MulI src imm));
8585 8583    effect(KILL cr);
8586 8584  
8587 8585    ins_cost(300);
8588 8586    format %{ "IMUL   $dst,$src,$imm" %}
8589 8587    opcode(0x69);  /* 69 /r id */
8590 8588    ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
8591 8589    ins_pipe( ialu_reg_reg_alu0 );
8592 8590  %}
8593 8591  
8594 8592  instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
8595 8593    match(Set dst src);
8596 8594    effect(KILL cr);
8597 8595  
8598 8596    // Note that this is artificially increased to make it more expensive than loadConL
8599 8597    ins_cost(250);
8600 8598    format %{ "MOV    EAX,$src\t// low word only" %}
8601 8599    opcode(0xB8);
8602 8600    ins_encode( LdImmL_Lo(dst, src) );
8603 8601    ins_pipe( ialu_reg_fat );
8604 8602  %}
8605 8603  
8606 8604  // Multiply by 32-bit Immediate, taking the shifted high order results
8607 8605  //  (special case for shift by 32)
8608 8606  instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
8609 8607    match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8610 8608    predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8611 8609               _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8612 8610               _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8613 8611    effect(USE src1, KILL cr);
8614 8612  
8615 8613    // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8616 8614    ins_cost(0*100 + 1*400 - 150);
8617 8615    format %{ "IMUL   EDX:EAX,$src1" %}
8618 8616    ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8619 8617    ins_pipe( pipe_slow );
8620 8618  %}
8621 8619  
8622 8620  // Multiply by 32-bit Immediate, taking the shifted high order results
8623 8621  instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
8624 8622    match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8625 8623    predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8626 8624               _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8627 8625               _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8628 8626    effect(USE src1, KILL cr);
8629 8627  
8630 8628    // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8631 8629    ins_cost(1*100 + 1*400 - 150);
8632 8630    format %{ "IMUL   EDX:EAX,$src1\n\t"
8633 8631              "SAR    EDX,$cnt-32" %}
8634 8632    ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8635 8633    ins_pipe( pipe_slow );
8636 8634  %}
8637 8635  
8638 8636  // Multiply Memory 32-bit Immediate
8639 8637  instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
8640 8638    match(Set dst (MulI (LoadI src) imm));
8641 8639    effect(KILL cr);
8642 8640  
8643 8641    ins_cost(300);
8644 8642    format %{ "IMUL   $dst,$src,$imm" %}
8645 8643    opcode(0x69);  /* 69 /r id */
8646 8644    ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
8647 8645    ins_pipe( ialu_reg_mem_alu0 );
8648 8646  %}
8649 8647  
8650 8648  // Multiply Memory
8651 8649  instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
8652 8650    match(Set dst (MulI dst (LoadI src)));
8653 8651    effect(KILL cr);
8654 8652  
8655 8653    ins_cost(350);
8656 8654    format %{ "IMUL   $dst,$src" %}
8657 8655    opcode(0xAF, 0x0F);
8658 8656    ins_encode( OpcS, OpcP, RegMem( dst, src) );
8659 8657    ins_pipe( ialu_reg_mem_alu0 );
8660 8658  %}
8661 8659  
8662 8660  // Multiply Register Int to Long
8663 8661  instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
8664 8662    // Basic Idea: long = (long)int * (long)int
8665 8663    match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
8666 8664    effect(DEF dst, USE src, USE src1, KILL flags);
8667 8665  
8668 8666    ins_cost(300);
8669 8667    format %{ "IMUL   $dst,$src1" %}
8670 8668  
8671 8669    ins_encode( long_int_multiply( dst, src1 ) );
8672 8670    ins_pipe( ialu_reg_reg_alu0 );
8673 8671  %}
8674 8672  
8675 8673  instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
8676 8674    // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
8677 8675    match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
8678 8676    effect(KILL flags);
8679 8677  
8680 8678    ins_cost(300);
8681 8679    format %{ "MUL    $dst,$src1" %}
8682 8680  
8683 8681    ins_encode( long_uint_multiply(dst, src1) );
8684 8682    ins_pipe( ialu_reg_reg_alu0 );
8685 8683  %}
8686 8684  
8687 8685  // Multiply Register Long
8688 8686  instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8689 8687    match(Set dst (MulL dst src));
8690 8688    effect(KILL cr, TEMP tmp);
8691 8689    ins_cost(4*100+3*400);
8692 8690  // Basic idea: lo(result) = lo(x_lo * y_lo)
8693 8691  //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
8694 8692    format %{ "MOV    $tmp,$src.lo\n\t"
8695 8693              "IMUL   $tmp,EDX\n\t"
8696 8694              "MOV    EDX,$src.hi\n\t"
8697 8695              "IMUL   EDX,EAX\n\t"
8698 8696              "ADD    $tmp,EDX\n\t"
8699 8697              "MUL    EDX:EAX,$src.lo\n\t"
8700 8698              "ADD    EDX,$tmp" %}
8701 8699    ins_encode( long_multiply( dst, src, tmp ) );
8702 8700    ins_pipe( pipe_slow );
8703 8701  %}
8704 8702  
8705 8703  // Multiply Register Long where the left operand's high 32 bits are zero
8706 8704  instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8707 8705    predicate(is_operand_hi32_zero(n->in(1)));
8708 8706    match(Set dst (MulL dst src));
8709 8707    effect(KILL cr, TEMP tmp);
8710 8708    ins_cost(2*100+2*400);
8711 8709  // Basic idea: lo(result) = lo(x_lo * y_lo)
8712 8710  //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
8713 8711    format %{ "MOV    $tmp,$src.hi\n\t"
8714 8712              "IMUL   $tmp,EAX\n\t"
8715 8713              "MUL    EDX:EAX,$src.lo\n\t"
8716 8714              "ADD    EDX,$tmp" %}
8717 8715    ins_encode %{
8718 8716      __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
8719 8717      __ imull($tmp$$Register, rax);
8720 8718      __ mull($src$$Register);
8721 8719      __ addl(rdx, $tmp$$Register);
8722 8720    %}
8723 8721    ins_pipe( pipe_slow );
8724 8722  %}
8725 8723  
8726 8724  // Multiply Register Long where the right operand's high 32 bits are zero
8727 8725  instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8728 8726    predicate(is_operand_hi32_zero(n->in(2)));
8729 8727    match(Set dst (MulL dst src));
8730 8728    effect(KILL cr, TEMP tmp);
8731 8729    ins_cost(2*100+2*400);
8732 8730  // Basic idea: lo(result) = lo(x_lo * y_lo)
8733 8731  //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
8734 8732    format %{ "MOV    $tmp,$src.lo\n\t"
8735 8733              "IMUL   $tmp,EDX\n\t"
8736 8734              "MUL    EDX:EAX,$src.lo\n\t"
8737 8735              "ADD    EDX,$tmp" %}
8738 8736    ins_encode %{
8739 8737      __ movl($tmp$$Register, $src$$Register);
8740 8738      __ imull($tmp$$Register, rdx);
8741 8739      __ mull($src$$Register);
8742 8740      __ addl(rdx, $tmp$$Register);
8743 8741    %}
8744 8742    ins_pipe( pipe_slow );
8745 8743  %}
8746 8744  
8747 8745  // Multiply Register Long where the left and the right operands' high 32 bits are zero
8748 8746  instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
8749 8747    predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
8750 8748    match(Set dst (MulL dst src));
8751 8749    effect(KILL cr);
8752 8750    ins_cost(1*400);
8753 8751  // Basic idea: lo(result) = lo(x_lo * y_lo)
8754 8752  //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
8755 8753    format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
8756 8754    ins_encode %{
8757 8755      __ mull($src$$Register);
8758 8756    %}
8759 8757    ins_pipe( pipe_slow );
8760 8758  %}
8761 8759  
8762 8760  // Multiply Register Long by small constant
8763 8761  instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
8764 8762    match(Set dst (MulL dst src));
8765 8763    effect(KILL cr, TEMP tmp);
8766 8764    ins_cost(2*100+2*400);
8767 8765    size(12);
8768 8766  // Basic idea: lo(result) = lo(src * EAX)
8769 8767  //             hi(result) = hi(src * EAX) + lo(src * EDX)
8770 8768    format %{ "IMUL   $tmp,EDX,$src\n\t"
8771 8769              "MOV    EDX,$src\n\t"
8772 8770              "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
8773 8771              "ADD    EDX,$tmp" %}
8774 8772    ins_encode( long_multiply_con( dst, src, tmp ) );
8775 8773    ins_pipe( pipe_slow );
8776 8774  %}
8777 8775  
8778 8776  // Integer DIV with Register
8779 8777  instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8780 8778    match(Set rax (DivI rax div));
8781 8779    effect(KILL rdx, KILL cr);
8782 8780    size(26);
8783 8781    ins_cost(30*100+10*100);
8784 8782    format %{ "CMP    EAX,0x80000000\n\t"
8785 8783              "JNE,s  normal\n\t"
8786 8784              "XOR    EDX,EDX\n\t"
8787 8785              "CMP    ECX,-1\n\t"
8788 8786              "JE,s   done\n"
8789 8787      "normal: CDQ\n\t"
8790 8788              "IDIV   $div\n\t"
8791 8789      "done:"        %}
8792 8790    opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8793 8791    ins_encode( cdq_enc, OpcP, RegOpc(div) );
8794 8792    ins_pipe( ialu_reg_reg_alu0 );
8795 8793  %}
8796 8794  
8797 8795  // Divide Register Long
8798 8796  instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8799 8797    match(Set dst (DivL src1 src2));
8800 8798    effect( KILL cr, KILL cx, KILL bx );
8801 8799    ins_cost(10000);
8802 8800    format %{ "PUSH   $src1.hi\n\t"
8803 8801              "PUSH   $src1.lo\n\t"
8804 8802              "PUSH   $src2.hi\n\t"
8805 8803              "PUSH   $src2.lo\n\t"
8806 8804              "CALL   SharedRuntime::ldiv\n\t"
8807 8805              "ADD    ESP,16" %}
8808 8806    ins_encode( long_div(src1,src2) );
8809 8807    ins_pipe( pipe_slow );
8810 8808  %}
8811 8809  
8812 8810  // Integer DIVMOD with Register, both quotient and mod results
8813 8811  instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8814 8812    match(DivModI rax div);
8815 8813    effect(KILL cr);
8816 8814    size(26);
8817 8815    ins_cost(30*100+10*100);
8818 8816    format %{ "CMP    EAX,0x80000000\n\t"
8819 8817              "JNE,s  normal\n\t"
8820 8818              "XOR    EDX,EDX\n\t"
8821 8819              "CMP    ECX,-1\n\t"
8822 8820              "JE,s   done\n"
8823 8821      "normal: CDQ\n\t"
8824 8822              "IDIV   $div\n\t"
8825 8823      "done:"        %}
8826 8824    opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8827 8825    ins_encode( cdq_enc, OpcP, RegOpc(div) );
8828 8826    ins_pipe( pipe_slow );
8829 8827  %}
8830 8828  
8831 8829  // Integer MOD with Register
8832 8830  instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8833 8831    match(Set rdx (ModI rax div));
8834 8832    effect(KILL rax, KILL cr);
8835 8833  
8836 8834    size(26);
8837 8835    ins_cost(300);
8838 8836    format %{ "CDQ\n\t"
8839 8837              "IDIV   $div" %}
8840 8838    opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8841 8839    ins_encode( cdq_enc, OpcP, RegOpc(div) );
8842 8840    ins_pipe( ialu_reg_reg_alu0 );
8843 8841  %}
8844 8842  
8845 8843  // Remainder Register Long
8846 8844  instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8847 8845    match(Set dst (ModL src1 src2));
8848 8846    effect( KILL cr, KILL cx, KILL bx );
8849 8847    ins_cost(10000);
8850 8848    format %{ "PUSH   $src1.hi\n\t"
8851 8849              "PUSH   $src1.lo\n\t"
8852 8850              "PUSH   $src2.hi\n\t"
8853 8851              "PUSH   $src2.lo\n\t"
8854 8852              "CALL   SharedRuntime::lrem\n\t"
8855 8853              "ADD    ESP,16" %}
8856 8854    ins_encode( long_mod(src1,src2) );
8857 8855    ins_pipe( pipe_slow );
8858 8856  %}
8859 8857  
8860 8858  // Divide Register Long (no special case since divisor != -1)
8861 8859  instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8862 8860    match(Set dst (DivL dst imm));
8863 8861    effect( TEMP tmp, TEMP tmp2, KILL cr );
8864 8862    ins_cost(1000);
8865 8863    format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
8866 8864              "XOR    $tmp2,$tmp2\n\t"
8867 8865              "CMP    $tmp,EDX\n\t"
8868 8866              "JA,s   fast\n\t"
8869 8867              "MOV    $tmp2,EAX\n\t"
8870 8868              "MOV    EAX,EDX\n\t"
8871 8869              "MOV    EDX,0\n\t"
8872 8870              "JLE,s  pos\n\t"
8873 8871              "LNEG   EAX : $tmp2\n\t"
8874 8872              "DIV    $tmp # unsigned division\n\t"
8875 8873              "XCHG   EAX,$tmp2\n\t"
8876 8874              "DIV    $tmp\n\t"
8877 8875              "LNEG   $tmp2 : EAX\n\t"
8878 8876              "JMP,s  done\n"
8879 8877      "pos:\n\t"
8880 8878              "DIV    $tmp\n\t"
8881 8879              "XCHG   EAX,$tmp2\n"
8882 8880      "fast:\n\t"
8883 8881              "DIV    $tmp\n"
8884 8882      "done:\n\t"
8885 8883              "MOV    EDX,$tmp2\n\t"
8886 8884              "NEG    EDX:EAX # if $imm < 0" %}
8887 8885    ins_encode %{
8888 8886      int con = (int)$imm$$constant;
8889 8887      assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8890 8888      int pcon = (con > 0) ? con : -con;
8891 8889      Label Lfast, Lpos, Ldone;
8892 8890  
8893 8891      __ movl($tmp$$Register, pcon);
8894 8892      __ xorl($tmp2$$Register,$tmp2$$Register);
8895 8893      __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8896 8894      __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8897 8895  
8898 8896      __ movl($tmp2$$Register, $dst$$Register); // save
8899 8897      __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8900 8898      __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8901 8899      __ jccb(Assembler::lessEqual, Lpos); // result is positive
8902 8900  
8903 8901      // Negative dividend.
8904 8902      // convert value to positive to use unsigned division
8905 8903      __ lneg($dst$$Register, $tmp2$$Register);
8906 8904      __ divl($tmp$$Register);
8907 8905      __ xchgl($dst$$Register, $tmp2$$Register);
8908 8906      __ divl($tmp$$Register);
8909 8907      // revert result back to negative
8910 8908      __ lneg($tmp2$$Register, $dst$$Register);
8911 8909      __ jmpb(Ldone);
8912 8910  
8913 8911      __ bind(Lpos);
8914 8912      __ divl($tmp$$Register); // Use unsigned division
8915 8913      __ xchgl($dst$$Register, $tmp2$$Register);
8916 8914      // Fallthrow for final divide, tmp2 has 32 bit hi result
8917 8915  
8918 8916      __ bind(Lfast);
8919 8917      // fast path: src is positive
8920 8918      __ divl($tmp$$Register); // Use unsigned division
8921 8919  
8922 8920      __ bind(Ldone);
8923 8921      __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8924 8922      if (con < 0) {
8925 8923        __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8926 8924      }
8927 8925    %}
8928 8926    ins_pipe( pipe_slow );
8929 8927  %}
8930 8928  
8931 8929  // Remainder Register Long (remainder fit into 32 bits)
8932 8930  instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8933 8931    match(Set dst (ModL dst imm));
8934 8932    effect( TEMP tmp, TEMP tmp2, KILL cr );
8935 8933    ins_cost(1000);
8936 8934    format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8937 8935              "CMP    $tmp,EDX\n\t"
8938 8936              "JA,s   fast\n\t"
8939 8937              "MOV    $tmp2,EAX\n\t"
8940 8938              "MOV    EAX,EDX\n\t"
8941 8939              "MOV    EDX,0\n\t"
8942 8940              "JLE,s  pos\n\t"
8943 8941              "LNEG   EAX : $tmp2\n\t"
8944 8942              "DIV    $tmp # unsigned division\n\t"
8945 8943              "MOV    EAX,$tmp2\n\t"
8946 8944              "DIV    $tmp\n\t"
8947 8945              "NEG    EDX\n\t"
8948 8946              "JMP,s  done\n"
8949 8947      "pos:\n\t"
8950 8948              "DIV    $tmp\n\t"
8951 8949              "MOV    EAX,$tmp2\n"
8952 8950      "fast:\n\t"
8953 8951              "DIV    $tmp\n"
8954 8952      "done:\n\t"
8955 8953              "MOV    EAX,EDX\n\t"
8956 8954              "SAR    EDX,31\n\t" %}
8957 8955    ins_encode %{
8958 8956      int con = (int)$imm$$constant;
8959 8957      assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8960 8958      int pcon = (con > 0) ? con : -con;
8961 8959      Label  Lfast, Lpos, Ldone;
8962 8960  
8963 8961      __ movl($tmp$$Register, pcon);
8964 8962      __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8965 8963      __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8966 8964  
8967 8965      __ movl($tmp2$$Register, $dst$$Register); // save
8968 8966      __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8969 8967      __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8970 8968      __ jccb(Assembler::lessEqual, Lpos); // result is positive
8971 8969  
8972 8970      // Negative dividend.
8973 8971      // convert value to positive to use unsigned division
8974 8972      __ lneg($dst$$Register, $tmp2$$Register);
8975 8973      __ divl($tmp$$Register);
8976 8974      __ movl($dst$$Register, $tmp2$$Register);
8977 8975      __ divl($tmp$$Register);
8978 8976      // revert remainder back to negative
8979 8977      __ negl(HIGH_FROM_LOW($dst$$Register));
8980 8978      __ jmpb(Ldone);
8981 8979  
8982 8980      __ bind(Lpos);
8983 8981      __ divl($tmp$$Register);
8984 8982      __ movl($dst$$Register, $tmp2$$Register);
8985 8983  
8986 8984      __ bind(Lfast);
8987 8985      // fast path: src is positive
8988 8986      __ divl($tmp$$Register);
8989 8987  
8990 8988      __ bind(Ldone);
8991 8989      __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8992 8990      __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8993 8991  
8994 8992    %}
8995 8993    ins_pipe( pipe_slow );
8996 8994  %}
8997 8995  
8998 8996  // Integer Shift Instructions
8999 8997  // Shift Left by one
9000 8998  instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9001 8999    match(Set dst (LShiftI dst shift));
9002 9000    effect(KILL cr);
9003 9001  
9004 9002    size(2);
9005 9003    format %{ "SHL    $dst,$shift" %}
9006 9004    opcode(0xD1, 0x4);  /* D1 /4 */
9007 9005    ins_encode( OpcP, RegOpc( dst ) );
9008 9006    ins_pipe( ialu_reg );
9009 9007  %}
9010 9008  
9011 9009  // Shift Left by 8-bit immediate
9012 9010  instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9013 9011    match(Set dst (LShiftI dst shift));
9014 9012    effect(KILL cr);
9015 9013  
9016 9014    size(3);
9017 9015    format %{ "SHL    $dst,$shift" %}
9018 9016    opcode(0xC1, 0x4);  /* C1 /4 ib */
9019 9017    ins_encode( RegOpcImm( dst, shift) );
9020 9018    ins_pipe( ialu_reg );
9021 9019  %}
9022 9020  
9023 9021  // Shift Left by variable
9024 9022  instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9025 9023    match(Set dst (LShiftI dst shift));
9026 9024    effect(KILL cr);
9027 9025  
9028 9026    size(2);
9029 9027    format %{ "SHL    $dst,$shift" %}
9030 9028    opcode(0xD3, 0x4);  /* D3 /4 */
9031 9029    ins_encode( OpcP, RegOpc( dst ) );
9032 9030    ins_pipe( ialu_reg_reg );
9033 9031  %}
9034 9032  
9035 9033  // Arithmetic shift right by one
9036 9034  instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9037 9035    match(Set dst (RShiftI dst shift));
9038 9036    effect(KILL cr);
9039 9037  
9040 9038    size(2);
9041 9039    format %{ "SAR    $dst,$shift" %}
9042 9040    opcode(0xD1, 0x7);  /* D1 /7 */
9043 9041    ins_encode( OpcP, RegOpc( dst ) );
9044 9042    ins_pipe( ialu_reg );
9045 9043  %}
9046 9044  
9047 9045  // Arithmetic shift right by one
9048 9046  instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
9049 9047    match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9050 9048    effect(KILL cr);
9051 9049    format %{ "SAR    $dst,$shift" %}
9052 9050    opcode(0xD1, 0x7);  /* D1 /7 */
9053 9051    ins_encode( OpcP, RMopc_Mem(secondary,dst) );
9054 9052    ins_pipe( ialu_mem_imm );
9055 9053  %}
9056 9054  
9057 9055  // Arithmetic Shift Right by 8-bit immediate
9058 9056  instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9059 9057    match(Set dst (RShiftI dst shift));
9060 9058    effect(KILL cr);
9061 9059  
9062 9060    size(3);
9063 9061    format %{ "SAR    $dst,$shift" %}
9064 9062    opcode(0xC1, 0x7);  /* C1 /7 ib */
9065 9063    ins_encode( RegOpcImm( dst, shift ) );
9066 9064    ins_pipe( ialu_mem_imm );
9067 9065  %}
9068 9066  
9069 9067  // Arithmetic Shift Right by 8-bit immediate
9070 9068  instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
9071 9069    match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9072 9070    effect(KILL cr);
9073 9071  
9074 9072    format %{ "SAR    $dst,$shift" %}
9075 9073    opcode(0xC1, 0x7);  /* C1 /7 ib */
9076 9074    ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
9077 9075    ins_pipe( ialu_mem_imm );
9078 9076  %}
9079 9077  
9080 9078  // Arithmetic Shift Right by variable
9081 9079  instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9082 9080    match(Set dst (RShiftI dst shift));
9083 9081    effect(KILL cr);
9084 9082  
9085 9083    size(2);
9086 9084    format %{ "SAR    $dst,$shift" %}
9087 9085    opcode(0xD3, 0x7);  /* D3 /7 */
9088 9086    ins_encode( OpcP, RegOpc( dst ) );
9089 9087    ins_pipe( ialu_reg_reg );
9090 9088  %}
9091 9089  
9092 9090  // Logical shift right by one
9093 9091  instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9094 9092    match(Set dst (URShiftI dst shift));
9095 9093    effect(KILL cr);
9096 9094  
9097 9095    size(2);
9098 9096    format %{ "SHR    $dst,$shift" %}
9099 9097    opcode(0xD1, 0x5);  /* D1 /5 */
9100 9098    ins_encode( OpcP, RegOpc( dst ) );
9101 9099    ins_pipe( ialu_reg );
9102 9100  %}
9103 9101  
9104 9102  // Logical Shift Right by 8-bit immediate
9105 9103  instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9106 9104    match(Set dst (URShiftI dst shift));
9107 9105    effect(KILL cr);
9108 9106  
9109 9107    size(3);
9110 9108    format %{ "SHR    $dst,$shift" %}
9111 9109    opcode(0xC1, 0x5);  /* C1 /5 ib */
9112 9110    ins_encode( RegOpcImm( dst, shift) );
9113 9111    ins_pipe( ialu_reg );
9114 9112  %}
9115 9113  
9116 9114  
9117 9115  // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9118 9116  // This idiom is used by the compiler for the i2b bytecode.
9119 9117  instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
9120 9118    match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9121 9119  
9122 9120    size(3);
9123 9121    format %{ "MOVSX  $dst,$src :8" %}
9124 9122    ins_encode %{
9125 9123      __ movsbl($dst$$Register, $src$$Register);
9126 9124    %}
9127 9125    ins_pipe(ialu_reg_reg);
9128 9126  %}
9129 9127  
9130 9128  // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9131 9129  // This idiom is used by the compiler the i2s bytecode.
9132 9130  instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
9133 9131    match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9134 9132  
9135 9133    size(3);
9136 9134    format %{ "MOVSX  $dst,$src :16" %}
9137 9135    ins_encode %{
9138 9136      __ movswl($dst$$Register, $src$$Register);
9139 9137    %}
9140 9138    ins_pipe(ialu_reg_reg);
9141 9139  %}
9142 9140  
9143 9141  
9144 9142  // Logical Shift Right by variable
9145 9143  instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9146 9144    match(Set dst (URShiftI dst shift));
9147 9145    effect(KILL cr);
9148 9146  
9149 9147    size(2);
9150 9148    format %{ "SHR    $dst,$shift" %}
9151 9149    opcode(0xD3, 0x5);  /* D3 /5 */
9152 9150    ins_encode( OpcP, RegOpc( dst ) );
9153 9151    ins_pipe( ialu_reg_reg );
9154 9152  %}
9155 9153  
9156 9154  
9157 9155  //----------Logical Instructions-----------------------------------------------
9158 9156  //----------Integer Logical Instructions---------------------------------------
9159 9157  // And Instructions
9160 9158  // And Register with Register
9161 9159  instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9162 9160    match(Set dst (AndI dst src));
9163 9161    effect(KILL cr);
9164 9162  
9165 9163    size(2);
9166 9164    format %{ "AND    $dst,$src" %}
9167 9165    opcode(0x23);
9168 9166    ins_encode( OpcP, RegReg( dst, src) );
9169 9167    ins_pipe( ialu_reg_reg );
9170 9168  %}
9171 9169  
9172 9170  // And Register with Immediate
9173 9171  instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9174 9172    match(Set dst (AndI dst src));
9175 9173    effect(KILL cr);
9176 9174  
9177 9175    format %{ "AND    $dst,$src" %}
9178 9176    opcode(0x81,0x04);  /* Opcode 81 /4 */
9179 9177    // ins_encode( RegImm( dst, src) );
9180 9178    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9181 9179    ins_pipe( ialu_reg );
9182 9180  %}
9183 9181  
9184 9182  // And Register with Memory
9185 9183  instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9186 9184    match(Set dst (AndI dst (LoadI src)));
9187 9185    effect(KILL cr);
9188 9186  
9189 9187    ins_cost(125);
9190 9188    format %{ "AND    $dst,$src" %}
9191 9189    opcode(0x23);
9192 9190    ins_encode( OpcP, RegMem( dst, src) );
9193 9191    ins_pipe( ialu_reg_mem );
9194 9192  %}
9195 9193  
9196 9194  // And Memory with Register
9197 9195  instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9198 9196    match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9199 9197    effect(KILL cr);
9200 9198  
9201 9199    ins_cost(150);
9202 9200    format %{ "AND    $dst,$src" %}
9203 9201    opcode(0x21);  /* Opcode 21 /r */
9204 9202    ins_encode( OpcP, RegMem( src, dst ) );
9205 9203    ins_pipe( ialu_mem_reg );
9206 9204  %}
9207 9205  
9208 9206  // And Memory with Immediate
9209 9207  instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9210 9208    match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9211 9209    effect(KILL cr);
9212 9210  
9213 9211    ins_cost(125);
9214 9212    format %{ "AND    $dst,$src" %}
9215 9213    opcode(0x81, 0x4);  /* Opcode 81 /4 id */
9216 9214    // ins_encode( MemImm( dst, src) );
9217 9215    ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9218 9216    ins_pipe( ialu_mem_imm );
9219 9217  %}
9220 9218  
9221 9219  // Or Instructions
9222 9220  // Or Register with Register
9223 9221  instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9224 9222    match(Set dst (OrI dst src));
9225 9223    effect(KILL cr);
9226 9224  
9227 9225    size(2);
9228 9226    format %{ "OR     $dst,$src" %}
9229 9227    opcode(0x0B);
9230 9228    ins_encode( OpcP, RegReg( dst, src) );
9231 9229    ins_pipe( ialu_reg_reg );
9232 9230  %}
9233 9231  
9234 9232  instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
9235 9233    match(Set dst (OrI dst (CastP2X src)));
9236 9234    effect(KILL cr);
9237 9235  
9238 9236    size(2);
9239 9237    format %{ "OR     $dst,$src" %}
9240 9238    opcode(0x0B);
9241 9239    ins_encode( OpcP, RegReg( dst, src) );
9242 9240    ins_pipe( ialu_reg_reg );
9243 9241  %}
9244 9242  
9245 9243  
9246 9244  // Or Register with Immediate
9247 9245  instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9248 9246    match(Set dst (OrI dst src));
9249 9247    effect(KILL cr);
9250 9248  
9251 9249    format %{ "OR     $dst,$src" %}
9252 9250    opcode(0x81,0x01);  /* Opcode 81 /1 id */
9253 9251    // ins_encode( RegImm( dst, src) );
9254 9252    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9255 9253    ins_pipe( ialu_reg );
9256 9254  %}
9257 9255  
9258 9256  // Or Register with Memory
9259 9257  instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9260 9258    match(Set dst (OrI dst (LoadI src)));
9261 9259    effect(KILL cr);
9262 9260  
9263 9261    ins_cost(125);
9264 9262    format %{ "OR     $dst,$src" %}
9265 9263    opcode(0x0B);
9266 9264    ins_encode( OpcP, RegMem( dst, src) );
9267 9265    ins_pipe( ialu_reg_mem );
9268 9266  %}
9269 9267  
9270 9268  // Or Memory with Register
9271 9269  instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9272 9270    match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9273 9271    effect(KILL cr);
9274 9272  
9275 9273    ins_cost(150);
9276 9274    format %{ "OR     $dst,$src" %}
9277 9275    opcode(0x09);  /* Opcode 09 /r */
9278 9276    ins_encode( OpcP, RegMem( src, dst ) );
9279 9277    ins_pipe( ialu_mem_reg );
9280 9278  %}
9281 9279  
9282 9280  // Or Memory with Immediate
9283 9281  instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9284 9282    match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9285 9283    effect(KILL cr);
9286 9284  
9287 9285    ins_cost(125);
9288 9286    format %{ "OR     $dst,$src" %}
9289 9287    opcode(0x81,0x1);  /* Opcode 81 /1 id */
9290 9288    // ins_encode( MemImm( dst, src) );
9291 9289    ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9292 9290    ins_pipe( ialu_mem_imm );
9293 9291  %}
9294 9292  
9295 9293  // ROL/ROR
9296 9294  // ROL expand
9297 9295  instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9298 9296    effect(USE_DEF dst, USE shift, KILL cr);
9299 9297  
9300 9298    format %{ "ROL    $dst, $shift" %}
9301 9299    opcode(0xD1, 0x0); /* Opcode D1 /0 */
9302 9300    ins_encode( OpcP, RegOpc( dst ));
9303 9301    ins_pipe( ialu_reg );
9304 9302  %}
9305 9303  
9306 9304  instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9307 9305    effect(USE_DEF dst, USE shift, KILL cr);
9308 9306  
9309 9307    format %{ "ROL    $dst, $shift" %}
9310 9308    opcode(0xC1, 0x0); /*Opcode /C1  /0  */
9311 9309    ins_encode( RegOpcImm(dst, shift) );
9312 9310    ins_pipe(ialu_reg);
9313 9311  %}
9314 9312  
9315 9313  instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
9316 9314    effect(USE_DEF dst, USE shift, KILL cr);
9317 9315  
9318 9316    format %{ "ROL    $dst, $shift" %}
9319 9317    opcode(0xD3, 0x0);    /* Opcode D3 /0 */
9320 9318    ins_encode(OpcP, RegOpc(dst));
9321 9319    ins_pipe( ialu_reg_reg );
9322 9320  %}
9323 9321  // end of ROL expand
9324 9322  
9325 9323  // ROL 32bit by one once
9326 9324  instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
9327 9325    match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9328 9326  
9329 9327    expand %{
9330 9328      rolI_eReg_imm1(dst, lshift, cr);
9331 9329    %}
9332 9330  %}
9333 9331  
9334 9332  // ROL 32bit var by imm8 once
9335 9333  instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
9336 9334    predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9337 9335    match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9338 9336  
9339 9337    expand %{
9340 9338      rolI_eReg_imm8(dst, lshift, cr);
9341 9339    %}
9342 9340  %}
9343 9341  
9344 9342  // ROL 32bit var by var once
9345 9343  instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9346 9344    match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9347 9345  
9348 9346    expand %{
9349 9347      rolI_eReg_CL(dst, shift, cr);
9350 9348    %}
9351 9349  %}
9352 9350  
9353 9351  // ROL 32bit var by var once
9354 9352  instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9355 9353    match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9356 9354  
9357 9355    expand %{
9358 9356      rolI_eReg_CL(dst, shift, cr);
9359 9357    %}
9360 9358  %}
9361 9359  
9362 9360  // ROR expand
9363 9361  instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9364 9362    effect(USE_DEF dst, USE shift, KILL cr);
9365 9363  
9366 9364    format %{ "ROR    $dst, $shift" %}
9367 9365    opcode(0xD1,0x1);  /* Opcode D1 /1 */
9368 9366    ins_encode( OpcP, RegOpc( dst ) );
9369 9367    ins_pipe( ialu_reg );
9370 9368  %}
9371 9369  
9372 9370  instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9373 9371    effect (USE_DEF dst, USE shift, KILL cr);
9374 9372  
9375 9373    format %{ "ROR    $dst, $shift" %}
9376 9374    opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
9377 9375    ins_encode( RegOpcImm(dst, shift) );
9378 9376    ins_pipe( ialu_reg );
9379 9377  %}
9380 9378  
9381 9379  instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
9382 9380    effect(USE_DEF dst, USE shift, KILL cr);
9383 9381  
9384 9382    format %{ "ROR    $dst, $shift" %}
9385 9383    opcode(0xD3, 0x1);    /* Opcode D3 /1 */
9386 9384    ins_encode(OpcP, RegOpc(dst));
9387 9385    ins_pipe( ialu_reg_reg );
9388 9386  %}
9389 9387  // end of ROR expand
9390 9388  
9391 9389  // ROR right once
9392 9390  instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
9393 9391    match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9394 9392  
9395 9393    expand %{
9396 9394      rorI_eReg_imm1(dst, rshift, cr);
9397 9395    %}
9398 9396  %}
9399 9397  
9400 9398  // ROR 32bit by immI8 once
9401 9399  instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
9402 9400    predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9403 9401    match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9404 9402  
9405 9403    expand %{
9406 9404      rorI_eReg_imm8(dst, rshift, cr);
9407 9405    %}
9408 9406  %}
9409 9407  
9410 9408  // ROR 32bit var by var once
9411 9409  instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9412 9410    match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9413 9411  
9414 9412    expand %{
9415 9413      rorI_eReg_CL(dst, shift, cr);
9416 9414    %}
9417 9415  %}
9418 9416  
9419 9417  // ROR 32bit var by var once
9420 9418  instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9421 9419    match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9422 9420  
9423 9421    expand %{
9424 9422      rorI_eReg_CL(dst, shift, cr);
9425 9423    %}
9426 9424  %}
9427 9425  
9428 9426  // Xor Instructions
9429 9427  // Xor Register with Register
9430 9428  instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9431 9429    match(Set dst (XorI dst src));
9432 9430    effect(KILL cr);
9433 9431  
9434 9432    size(2);
9435 9433    format %{ "XOR    $dst,$src" %}
9436 9434    opcode(0x33);
9437 9435    ins_encode( OpcP, RegReg( dst, src) );
9438 9436    ins_pipe( ialu_reg_reg );
9439 9437  %}
9440 9438  
9441 9439  // Xor Register with Immediate -1
9442 9440  instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
9443 9441    match(Set dst (XorI dst imm));  
9444 9442  
9445 9443    size(2);
9446 9444    format %{ "NOT    $dst" %}  
9447 9445    ins_encode %{
9448 9446       __ notl($dst$$Register);
9449 9447    %}
9450 9448    ins_pipe( ialu_reg );
9451 9449  %}
9452 9450  
9453 9451  // Xor Register with Immediate
9454 9452  instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9455 9453    match(Set dst (XorI dst src));
9456 9454    effect(KILL cr);
9457 9455  
9458 9456    format %{ "XOR    $dst,$src" %}
9459 9457    opcode(0x81,0x06);  /* Opcode 81 /6 id */
9460 9458    // ins_encode( RegImm( dst, src) );
9461 9459    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9462 9460    ins_pipe( ialu_reg );
9463 9461  %}
9464 9462  
9465 9463  // Xor Register with Memory
9466 9464  instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9467 9465    match(Set dst (XorI dst (LoadI src)));
9468 9466    effect(KILL cr);
9469 9467  
9470 9468    ins_cost(125);
9471 9469    format %{ "XOR    $dst,$src" %}
9472 9470    opcode(0x33);
9473 9471    ins_encode( OpcP, RegMem(dst, src) );
9474 9472    ins_pipe( ialu_reg_mem );
9475 9473  %}
9476 9474  
9477 9475  // Xor Memory with Register
9478 9476  instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9479 9477    match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9480 9478    effect(KILL cr);
9481 9479  
9482 9480    ins_cost(150);
9483 9481    format %{ "XOR    $dst,$src" %}
9484 9482    opcode(0x31);  /* Opcode 31 /r */
9485 9483    ins_encode( OpcP, RegMem( src, dst ) );
9486 9484    ins_pipe( ialu_mem_reg );
9487 9485  %}
9488 9486  
9489 9487  // Xor Memory with Immediate
9490 9488  instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9491 9489    match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9492 9490    effect(KILL cr);
9493 9491  
9494 9492    ins_cost(125);
9495 9493    format %{ "XOR    $dst,$src" %}
9496 9494    opcode(0x81,0x6);  /* Opcode 81 /6 id */
9497 9495    ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9498 9496    ins_pipe( ialu_mem_imm );
9499 9497  %}
9500 9498  
9501 9499  //----------Convert Int to Boolean---------------------------------------------
9502 9500  
9503 9501  instruct movI_nocopy(eRegI dst, eRegI src) %{
9504 9502    effect( DEF dst, USE src );
9505 9503    format %{ "MOV    $dst,$src" %}
9506 9504    ins_encode( enc_Copy( dst, src) );
9507 9505    ins_pipe( ialu_reg_reg );
9508 9506  %}
9509 9507  
9510 9508  instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
9511 9509    effect( USE_DEF dst, USE src, KILL cr );
9512 9510  
9513 9511    size(4);
9514 9512    format %{ "NEG    $dst\n\t"
9515 9513              "ADC    $dst,$src" %}
9516 9514    ins_encode( neg_reg(dst),
9517 9515                OpcRegReg(0x13,dst,src) );
9518 9516    ins_pipe( ialu_reg_reg_long );
9519 9517  %}
9520 9518  
9521 9519  instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
9522 9520    match(Set dst (Conv2B src));
9523 9521  
9524 9522    expand %{
9525 9523      movI_nocopy(dst,src);
9526 9524      ci2b(dst,src,cr);
9527 9525    %}
9528 9526  %}
9529 9527  
9530 9528  instruct movP_nocopy(eRegI dst, eRegP src) %{
9531 9529    effect( DEF dst, USE src );
9532 9530    format %{ "MOV    $dst,$src" %}
9533 9531    ins_encode( enc_Copy( dst, src) );
9534 9532    ins_pipe( ialu_reg_reg );
9535 9533  %}
9536 9534  
9537 9535  instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
9538 9536    effect( USE_DEF dst, USE src, KILL cr );
9539 9537    format %{ "NEG    $dst\n\t"
9540 9538              "ADC    $dst,$src" %}
9541 9539    ins_encode( neg_reg(dst),
9542 9540                OpcRegReg(0x13,dst,src) );
9543 9541    ins_pipe( ialu_reg_reg_long );
9544 9542  %}
9545 9543  
9546 9544  instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
9547 9545    match(Set dst (Conv2B src));
9548 9546  
9549 9547    expand %{
9550 9548      movP_nocopy(dst,src);
9551 9549      cp2b(dst,src,cr);
9552 9550    %}
9553 9551  %}
9554 9552  
9555 9553  instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
9556 9554    match(Set dst (CmpLTMask p q));
9557 9555    effect( KILL cr );
9558 9556    ins_cost(400);
9559 9557  
9560 9558    // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
9561 9559    format %{ "XOR    $dst,$dst\n\t"
9562 9560              "CMP    $p,$q\n\t"
9563 9561              "SETlt  $dst\n\t"
9564 9562              "NEG    $dst" %}
9565 9563    ins_encode( OpcRegReg(0x33,dst,dst),
9566 9564                OpcRegReg(0x3B,p,q),
9567 9565                setLT_reg(dst), neg_reg(dst) );
9568 9566    ins_pipe( pipe_slow );
9569 9567  %}
9570 9568  
9571 9569  instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
9572 9570    match(Set dst (CmpLTMask dst zero));
9573 9571    effect( DEF dst, KILL cr );
9574 9572    ins_cost(100);
9575 9573  
9576 9574    format %{ "SAR    $dst,31" %}
9577 9575    opcode(0xC1, 0x7);  /* C1 /7 ib */
9578 9576    ins_encode( RegOpcImm( dst, 0x1F ) );
9579 9577    ins_pipe( ialu_reg );
9580 9578  %}
9581 9579  
9582 9580  
9583 9581  instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
9584 9582    match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9585 9583    effect( KILL tmp, KILL cr );
9586 9584    ins_cost(400);
9587 9585    // annoyingly, $tmp has no edges so you cant ask for it in
9588 9586    // any format or encoding
9589 9587    format %{ "SUB    $p,$q\n\t"
9590 9588              "SBB    ECX,ECX\n\t"
9591 9589              "AND    ECX,$y\n\t"
9592 9590              "ADD    $p,ECX" %}
9593 9591    ins_encode( enc_cmpLTP(p,q,y,tmp) );
9594 9592    ins_pipe( pipe_cmplt );
9595 9593  %}
9596 9594  
9597 9595  /* If I enable this, I encourage spilling in the inner loop of compress.
9598 9596  instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
9599 9597    match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9600 9598    effect( USE_KILL tmp, KILL cr );
9601 9599    ins_cost(400);
9602 9600  
9603 9601    format %{ "SUB    $p,$q\n\t"
9604 9602              "SBB    ECX,ECX\n\t"
9605 9603              "AND    ECX,$y\n\t"
9606 9604              "ADD    $p,ECX" %}
9607 9605    ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9608 9606  %}
9609 9607  */
9610 9608  
9611 9609  //----------Long Instructions------------------------------------------------
9612 9610  // Add Long Register with Register
9613 9611  instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9614 9612    match(Set dst (AddL dst src));
9615 9613    effect(KILL cr);
9616 9614    ins_cost(200);
9617 9615    format %{ "ADD    $dst.lo,$src.lo\n\t"
9618 9616              "ADC    $dst.hi,$src.hi" %}
9619 9617    opcode(0x03, 0x13);
9620 9618    ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9621 9619    ins_pipe( ialu_reg_reg_long );
9622 9620  %}
9623 9621  
9624 9622  // Add Long Register with Immediate
9625 9623  instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9626 9624    match(Set dst (AddL dst src));
9627 9625    effect(KILL cr);
9628 9626    format %{ "ADD    $dst.lo,$src.lo\n\t"
9629 9627              "ADC    $dst.hi,$src.hi" %}
9630 9628    opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
9631 9629    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9632 9630    ins_pipe( ialu_reg_long );
9633 9631  %}
9634 9632  
9635 9633  // Add Long Register with Memory
9636 9634  instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9637 9635    match(Set dst (AddL dst (LoadL mem)));
9638 9636    effect(KILL cr);
9639 9637    ins_cost(125);
9640 9638    format %{ "ADD    $dst.lo,$mem\n\t"
9641 9639              "ADC    $dst.hi,$mem+4" %}
9642 9640    opcode(0x03, 0x13);
9643 9641    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9644 9642    ins_pipe( ialu_reg_long_mem );
9645 9643  %}
9646 9644  
9647 9645  // Subtract Long Register with Register.
9648 9646  instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9649 9647    match(Set dst (SubL dst src));
9650 9648    effect(KILL cr);
9651 9649    ins_cost(200);
9652 9650    format %{ "SUB    $dst.lo,$src.lo\n\t"
9653 9651              "SBB    $dst.hi,$src.hi" %}
9654 9652    opcode(0x2B, 0x1B);
9655 9653    ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9656 9654    ins_pipe( ialu_reg_reg_long );
9657 9655  %}
9658 9656  
9659 9657  // Subtract Long Register with Immediate
9660 9658  instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9661 9659    match(Set dst (SubL dst src));
9662 9660    effect(KILL cr);
9663 9661    format %{ "SUB    $dst.lo,$src.lo\n\t"
9664 9662              "SBB    $dst.hi,$src.hi" %}
9665 9663    opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9666 9664    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9667 9665    ins_pipe( ialu_reg_long );
9668 9666  %}
9669 9667  
9670 9668  // Subtract Long Register with Memory
9671 9669  instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9672 9670    match(Set dst (SubL dst (LoadL mem)));
9673 9671    effect(KILL cr);
9674 9672    ins_cost(125);
9675 9673    format %{ "SUB    $dst.lo,$mem\n\t"
9676 9674              "SBB    $dst.hi,$mem+4" %}
9677 9675    opcode(0x2B, 0x1B);
9678 9676    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9679 9677    ins_pipe( ialu_reg_long_mem );
9680 9678  %}
9681 9679  
9682 9680  instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9683 9681    match(Set dst (SubL zero dst));
9684 9682    effect(KILL cr);
9685 9683    ins_cost(300);
9686 9684    format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9687 9685    ins_encode( neg_long(dst) );
9688 9686    ins_pipe( ialu_reg_reg_long );
9689 9687  %}
9690 9688  
9691 9689  // And Long Register with Register
9692 9690  instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9693 9691    match(Set dst (AndL dst src));
9694 9692    effect(KILL cr);
9695 9693    format %{ "AND    $dst.lo,$src.lo\n\t"
9696 9694              "AND    $dst.hi,$src.hi" %}
9697 9695    opcode(0x23,0x23);
9698 9696    ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9699 9697    ins_pipe( ialu_reg_reg_long );
9700 9698  %}
9701 9699  
9702 9700  // And Long Register with Immediate
9703 9701  instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9704 9702    match(Set dst (AndL dst src));
9705 9703    effect(KILL cr);
9706 9704    format %{ "AND    $dst.lo,$src.lo\n\t"
9707 9705              "AND    $dst.hi,$src.hi" %}
9708 9706    opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9709 9707    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9710 9708    ins_pipe( ialu_reg_long );
9711 9709  %}
9712 9710  
9713 9711  // And Long Register with Memory
9714 9712  instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9715 9713    match(Set dst (AndL dst (LoadL mem)));
9716 9714    effect(KILL cr);
9717 9715    ins_cost(125);
9718 9716    format %{ "AND    $dst.lo,$mem\n\t"
9719 9717              "AND    $dst.hi,$mem+4" %}
9720 9718    opcode(0x23, 0x23);
9721 9719    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9722 9720    ins_pipe( ialu_reg_long_mem );
9723 9721  %}
9724 9722  
9725 9723  // Or Long Register with Register
9726 9724  instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9727 9725    match(Set dst (OrL dst src));
9728 9726    effect(KILL cr);
9729 9727    format %{ "OR     $dst.lo,$src.lo\n\t"
9730 9728              "OR     $dst.hi,$src.hi" %}
9731 9729    opcode(0x0B,0x0B);
9732 9730    ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9733 9731    ins_pipe( ialu_reg_reg_long );
9734 9732  %}
9735 9733  
9736 9734  // Or Long Register with Immediate
9737 9735  instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9738 9736    match(Set dst (OrL dst src));
9739 9737    effect(KILL cr);
9740 9738    format %{ "OR     $dst.lo,$src.lo\n\t"
9741 9739              "OR     $dst.hi,$src.hi" %}
9742 9740    opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9743 9741    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9744 9742    ins_pipe( ialu_reg_long );
9745 9743  %}
9746 9744  
9747 9745  // Or Long Register with Memory
9748 9746  instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9749 9747    match(Set dst (OrL dst (LoadL mem)));
9750 9748    effect(KILL cr);
9751 9749    ins_cost(125);
9752 9750    format %{ "OR     $dst.lo,$mem\n\t"
9753 9751              "OR     $dst.hi,$mem+4" %}
9754 9752    opcode(0x0B,0x0B);
9755 9753    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9756 9754    ins_pipe( ialu_reg_long_mem );
9757 9755  %}
9758 9756  
9759 9757  // Xor Long Register with Register
9760 9758  instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9761 9759    match(Set dst (XorL dst src));
9762 9760    effect(KILL cr);
9763 9761    format %{ "XOR    $dst.lo,$src.lo\n\t"
9764 9762              "XOR    $dst.hi,$src.hi" %}
9765 9763    opcode(0x33,0x33);
9766 9764    ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9767 9765    ins_pipe( ialu_reg_reg_long );
9768 9766  %}
9769 9767  
9770 9768  // Xor Long Register with Immediate -1
9771 9769  instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9772 9770    match(Set dst (XorL dst imm));  
9773 9771    format %{ "NOT    $dst.lo\n\t"
9774 9772              "NOT    $dst.hi" %}
9775 9773    ins_encode %{
9776 9774       __ notl($dst$$Register);
9777 9775       __ notl(HIGH_FROM_LOW($dst$$Register));
9778 9776    %}
9779 9777    ins_pipe( ialu_reg_long );
9780 9778  %}
9781 9779  
9782 9780  // Xor Long Register with Immediate
9783 9781  instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9784 9782    match(Set dst (XorL dst src));
9785 9783    effect(KILL cr);
9786 9784    format %{ "XOR    $dst.lo,$src.lo\n\t"
9787 9785              "XOR    $dst.hi,$src.hi" %}
9788 9786    opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9789 9787    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9790 9788    ins_pipe( ialu_reg_long );
9791 9789  %}
9792 9790  
9793 9791  // Xor Long Register with Memory
9794 9792  instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9795 9793    match(Set dst (XorL dst (LoadL mem)));
9796 9794    effect(KILL cr);
9797 9795    ins_cost(125);
9798 9796    format %{ "XOR    $dst.lo,$mem\n\t"
9799 9797              "XOR    $dst.hi,$mem+4" %}
9800 9798    opcode(0x33,0x33);
9801 9799    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9802 9800    ins_pipe( ialu_reg_long_mem );
9803 9801  %}
9804 9802  
9805 9803  // Shift Left Long by 1
9806 9804  instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9807 9805    predicate(UseNewLongLShift);
9808 9806    match(Set dst (LShiftL dst cnt));
9809 9807    effect(KILL cr);
9810 9808    ins_cost(100);
9811 9809    format %{ "ADD    $dst.lo,$dst.lo\n\t"
9812 9810              "ADC    $dst.hi,$dst.hi" %}
9813 9811    ins_encode %{
9814 9812      __ addl($dst$$Register,$dst$$Register);
9815 9813      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9816 9814    %}
9817 9815    ins_pipe( ialu_reg_long );
9818 9816  %}
9819 9817  
9820 9818  // Shift Left Long by 2
9821 9819  instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9822 9820    predicate(UseNewLongLShift);
9823 9821    match(Set dst (LShiftL dst cnt));
9824 9822    effect(KILL cr);
9825 9823    ins_cost(100);
9826 9824    format %{ "ADD    $dst.lo,$dst.lo\n\t"
9827 9825              "ADC    $dst.hi,$dst.hi\n\t" 
9828 9826              "ADD    $dst.lo,$dst.lo\n\t"
9829 9827              "ADC    $dst.hi,$dst.hi" %}
9830 9828    ins_encode %{
9831 9829      __ addl($dst$$Register,$dst$$Register);
9832 9830      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9833 9831      __ addl($dst$$Register,$dst$$Register);
9834 9832      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9835 9833    %}
9836 9834    ins_pipe( ialu_reg_long );
9837 9835  %}
9838 9836  
9839 9837  // Shift Left Long by 3
9840 9838  instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9841 9839    predicate(UseNewLongLShift);
9842 9840    match(Set dst (LShiftL dst cnt));
9843 9841    effect(KILL cr);
9844 9842    ins_cost(100);
9845 9843    format %{ "ADD    $dst.lo,$dst.lo\n\t"
9846 9844              "ADC    $dst.hi,$dst.hi\n\t" 
9847 9845              "ADD    $dst.lo,$dst.lo\n\t"
9848 9846              "ADC    $dst.hi,$dst.hi\n\t" 
9849 9847              "ADD    $dst.lo,$dst.lo\n\t"
9850 9848              "ADC    $dst.hi,$dst.hi" %}
9851 9849    ins_encode %{
9852 9850      __ addl($dst$$Register,$dst$$Register);
9853 9851      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9854 9852      __ addl($dst$$Register,$dst$$Register);
9855 9853      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9856 9854      __ addl($dst$$Register,$dst$$Register);
9857 9855      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9858 9856    %}
9859 9857    ins_pipe( ialu_reg_long );
9860 9858  %}
9861 9859  
9862 9860  // Shift Left Long by 1-31
9863 9861  instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9864 9862    match(Set dst (LShiftL dst cnt));
9865 9863    effect(KILL cr);
9866 9864    ins_cost(200);
9867 9865    format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9868 9866              "SHL    $dst.lo,$cnt" %}
9869 9867    opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9870 9868    ins_encode( move_long_small_shift(dst,cnt) );
9871 9869    ins_pipe( ialu_reg_long );
9872 9870  %}
9873 9871  
9874 9872  // Shift Left Long by 32-63
9875 9873  instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9876 9874    match(Set dst (LShiftL dst cnt));
9877 9875    effect(KILL cr);
9878 9876    ins_cost(300);
9879 9877    format %{ "MOV    $dst.hi,$dst.lo\n"
9880 9878            "\tSHL    $dst.hi,$cnt-32\n"
9881 9879            "\tXOR    $dst.lo,$dst.lo" %}
9882 9880    opcode(0xC1, 0x4);  /* C1 /4 ib */
9883 9881    ins_encode( move_long_big_shift_clr(dst,cnt) );
9884 9882    ins_pipe( ialu_reg_long );
9885 9883  %}
9886 9884  
9887 9885  // Shift Left Long by variable
9888 9886  instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9889 9887    match(Set dst (LShiftL dst shift));
9890 9888    effect(KILL cr);
9891 9889    ins_cost(500+200);
9892 9890    size(17);
9893 9891    format %{ "TEST   $shift,32\n\t"
9894 9892              "JEQ,s  small\n\t"
9895 9893              "MOV    $dst.hi,$dst.lo\n\t"
9896 9894              "XOR    $dst.lo,$dst.lo\n"
9897 9895      "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9898 9896              "SHL    $dst.lo,$shift" %}
9899 9897    ins_encode( shift_left_long( dst, shift ) );
9900 9898    ins_pipe( pipe_slow );
9901 9899  %}
9902 9900  
9903 9901  // Shift Right Long by 1-31
9904 9902  instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9905 9903    match(Set dst (URShiftL dst cnt));
9906 9904    effect(KILL cr);
9907 9905    ins_cost(200);
9908 9906    format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9909 9907              "SHR    $dst.hi,$cnt" %}
9910 9908    opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9911 9909    ins_encode( move_long_small_shift(dst,cnt) );
9912 9910    ins_pipe( ialu_reg_long );
9913 9911  %}
9914 9912  
9915 9913  // Shift Right Long by 32-63
9916 9914  instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9917 9915    match(Set dst (URShiftL dst cnt));
9918 9916    effect(KILL cr);
9919 9917    ins_cost(300);
9920 9918    format %{ "MOV    $dst.lo,$dst.hi\n"
9921 9919            "\tSHR    $dst.lo,$cnt-32\n"
9922 9920            "\tXOR    $dst.hi,$dst.hi" %}
9923 9921    opcode(0xC1, 0x5);  /* C1 /5 ib */
9924 9922    ins_encode( move_long_big_shift_clr(dst,cnt) );
9925 9923    ins_pipe( ialu_reg_long );
9926 9924  %}
9927 9925  
9928 9926  // Shift Right Long by variable
9929 9927  instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9930 9928    match(Set dst (URShiftL dst shift));
9931 9929    effect(KILL cr);
9932 9930    ins_cost(600);
9933 9931    size(17);
9934 9932    format %{ "TEST   $shift,32\n\t"
9935 9933              "JEQ,s  small\n\t"
9936 9934              "MOV    $dst.lo,$dst.hi\n\t"
9937 9935              "XOR    $dst.hi,$dst.hi\n"
9938 9936      "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9939 9937              "SHR    $dst.hi,$shift" %}
9940 9938    ins_encode( shift_right_long( dst, shift ) );
9941 9939    ins_pipe( pipe_slow );
9942 9940  %}
9943 9941  
9944 9942  // Shift Right Long by 1-31
9945 9943  instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9946 9944    match(Set dst (RShiftL dst cnt));
9947 9945    effect(KILL cr);
9948 9946    ins_cost(200);
9949 9947    format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9950 9948              "SAR    $dst.hi,$cnt" %}
9951 9949    opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9952 9950    ins_encode( move_long_small_shift(dst,cnt) );
9953 9951    ins_pipe( ialu_reg_long );
9954 9952  %}
9955 9953  
9956 9954  // Shift Right Long by 32-63
9957 9955  instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9958 9956    match(Set dst (RShiftL dst cnt));
9959 9957    effect(KILL cr);
9960 9958    ins_cost(300);
9961 9959    format %{ "MOV    $dst.lo,$dst.hi\n"
9962 9960            "\tSAR    $dst.lo,$cnt-32\n"
9963 9961            "\tSAR    $dst.hi,31" %}
9964 9962    opcode(0xC1, 0x7);  /* C1 /7 ib */
9965 9963    ins_encode( move_long_big_shift_sign(dst,cnt) );
9966 9964    ins_pipe( ialu_reg_long );
9967 9965  %}
9968 9966  
9969 9967  // Shift Right arithmetic Long by variable
9970 9968  instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9971 9969    match(Set dst (RShiftL dst shift));
9972 9970    effect(KILL cr);
9973 9971    ins_cost(600);
9974 9972    size(18);
9975 9973    format %{ "TEST   $shift,32\n\t"
9976 9974              "JEQ,s  small\n\t"
9977 9975              "MOV    $dst.lo,$dst.hi\n\t"
9978 9976              "SAR    $dst.hi,31\n"
9979 9977      "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9980 9978              "SAR    $dst.hi,$shift" %}
9981 9979    ins_encode( shift_right_arith_long( dst, shift ) );
9982 9980    ins_pipe( pipe_slow );
9983 9981  %}
9984 9982  
9985 9983  
9986 9984  //----------Double Instructions------------------------------------------------
9987 9985  // Double Math
9988 9986  
9989 9987  // Compare & branch
9990 9988  
9991 9989  // P6 version of float compare, sets condition codes in EFLAGS
9992 9990  instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9993 9991    predicate(VM_Version::supports_cmov() && UseSSE <=1);
9994 9992    match(Set cr (CmpD src1 src2));
9995 9993    effect(KILL rax);
9996 9994    ins_cost(150);
9997 9995    format %{ "FLD    $src1\n\t"
9998 9996              "FUCOMIP ST,$src2  // P6 instruction\n\t"
9999 9997              "JNP    exit\n\t"
10000 9998              "MOV    ah,1       // saw a NaN, set CF\n\t"
10001 9999              "SAHF\n"
10002 10000       "exit:\tNOP               // avoid branch to branch" %}
10003 10001    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10004 10002    ins_encode( Push_Reg_D(src1),
10005 10003                OpcP, RegOpc(src2),
10006 10004                cmpF_P6_fixup );
10007 10005    ins_pipe( pipe_slow );
10008 10006  %}
10009 10007  
10010 10008  instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
10011 10009    predicate(VM_Version::supports_cmov() && UseSSE <=1);
10012 10010    match(Set cr (CmpD src1 src2));
10013 10011    ins_cost(150);
10014 10012    format %{ "FLD    $src1\n\t"
10015 10013              "FUCOMIP ST,$src2  // P6 instruction" %}
10016 10014    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10017 10015    ins_encode( Push_Reg_D(src1),
10018 10016                OpcP, RegOpc(src2));
10019 10017    ins_pipe( pipe_slow );
10020 10018  %}
10021 10019  
10022 10020  // Compare & branch
10023 10021  instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
10024 10022    predicate(UseSSE<=1);
10025 10023    match(Set cr (CmpD src1 src2));
10026 10024    effect(KILL rax);
10027 10025    ins_cost(200);
10028 10026    format %{ "FLD    $src1\n\t"
10029 10027              "FCOMp  $src2\n\t"
10030 10028              "FNSTSW AX\n\t"
10031 10029              "TEST   AX,0x400\n\t"
10032 10030              "JZ,s   flags\n\t"
10033 10031              "MOV    AH,1\t# unordered treat as LT\n"
10034 10032      "flags:\tSAHF" %}
10035 10033    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10036 10034    ins_encode( Push_Reg_D(src1),
10037 10035                OpcP, RegOpc(src2),
10038 10036                fpu_flags);
10039 10037    ins_pipe( pipe_slow );
10040 10038  %}
10041 10039  
10042 10040  // Compare vs zero into -1,0,1
10043 10041  instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
10044 10042    predicate(UseSSE<=1);
10045 10043    match(Set dst (CmpD3 src1 zero));
10046 10044    effect(KILL cr, KILL rax);
10047 10045    ins_cost(280);
10048 10046    format %{ "FTSTD  $dst,$src1" %}
10049 10047    opcode(0xE4, 0xD9);
10050 10048    ins_encode( Push_Reg_D(src1),
10051 10049                OpcS, OpcP, PopFPU,
10052 10050                CmpF_Result(dst));
10053 10051    ins_pipe( pipe_slow );
10054 10052  %}
10055 10053  
10056 10054  // Compare into -1,0,1
10057 10055  instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
10058 10056    predicate(UseSSE<=1);
10059 10057    match(Set dst (CmpD3 src1 src2));
10060 10058    effect(KILL cr, KILL rax);
10061 10059    ins_cost(300);
10062 10060    format %{ "FCMPD  $dst,$src1,$src2" %}
10063 10061    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10064 10062    ins_encode( Push_Reg_D(src1),
10065 10063                OpcP, RegOpc(src2),
10066 10064                CmpF_Result(dst));
10067 10065    ins_pipe( pipe_slow );
10068 10066  %}
10069 10067  
10070 10068  // float compare and set condition codes in EFLAGS by XMM regs
10071 10069  instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
10072 10070    predicate(UseSSE>=2);
10073 10071    match(Set cr (CmpD dst src));
10074 10072    effect(KILL rax);
10075 10073    ins_cost(125);
10076 10074    format %{ "COMISD $dst,$src\n"
10077 10075            "\tJNP    exit\n"
10078 10076            "\tMOV    ah,1       // saw a NaN, set CF\n"
10079 10077            "\tSAHF\n"
10080 10078       "exit:\tNOP               // avoid branch to branch" %}
10081 10079    opcode(0x66, 0x0F, 0x2F);
10082 10080    ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
10083 10081    ins_pipe( pipe_slow );
10084 10082  %}
10085 10083  
10086 10084  instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
10087 10085    predicate(UseSSE>=2);
10088 10086    match(Set cr (CmpD dst src));
10089 10087    ins_cost(100);
10090 10088    format %{ "COMISD $dst,$src" %}
10091 10089    opcode(0x66, 0x0F, 0x2F);
10092 10090    ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
10093 10091    ins_pipe( pipe_slow );
10094 10092  %}
10095 10093  
10096 10094  // float compare and set condition codes in EFLAGS by XMM regs
10097 10095  instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
10098 10096    predicate(UseSSE>=2);
10099 10097    match(Set cr (CmpD dst (LoadD src)));
10100 10098    effect(KILL rax);
10101 10099    ins_cost(145);
10102 10100    format %{ "COMISD $dst,$src\n"
10103 10101            "\tJNP    exit\n"
10104 10102            "\tMOV    ah,1       // saw a NaN, set CF\n"
10105 10103            "\tSAHF\n"
10106 10104       "exit:\tNOP               // avoid branch to branch" %}
10107 10105    opcode(0x66, 0x0F, 0x2F);
10108 10106    ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
10109 10107    ins_pipe( pipe_slow );
10110 10108  %}
10111 10109  
10112 10110  instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
10113 10111    predicate(UseSSE>=2);
10114 10112    match(Set cr (CmpD dst (LoadD src)));
10115 10113    ins_cost(100);
10116 10114    format %{ "COMISD $dst,$src" %}
10117 10115    opcode(0x66, 0x0F, 0x2F);
10118 10116    ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
10119 10117    ins_pipe( pipe_slow );
10120 10118  %}
10121 10119  
10122 10120  // Compare into -1,0,1 in XMM
10123 10121  instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
10124 10122    predicate(UseSSE>=2);
10125 10123    match(Set dst (CmpD3 src1 src2));
10126 10124    effect(KILL cr);
10127 10125    ins_cost(255);
10128 10126    format %{ "XOR    $dst,$dst\n"
10129 10127            "\tCOMISD $src1,$src2\n"
10130 10128            "\tJP,s   nan\n"
10131 10129            "\tJEQ,s  exit\n"
10132 10130            "\tJA,s   inc\n"
10133 10131        "nan:\tDEC    $dst\n"
10134 10132            "\tJMP,s  exit\n"
10135 10133        "inc:\tINC    $dst\n"
10136 10134        "exit:"
10137 10135                  %}
10138 10136    opcode(0x66, 0x0F, 0x2F);
10139 10137    ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
10140 10138               CmpX_Result(dst));
10141 10139    ins_pipe( pipe_slow );
10142 10140  %}
10143 10141  
10144 10142  // Compare into -1,0,1 in XMM and memory
10145 10143  instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
10146 10144    predicate(UseSSE>=2);
10147 10145    match(Set dst (CmpD3 src1 (LoadD mem)));
10148 10146    effect(KILL cr);
10149 10147    ins_cost(275);
10150 10148    format %{ "COMISD $src1,$mem\n"
10151 10149            "\tMOV    $dst,0\t\t# do not blow flags\n"
10152 10150            "\tJP,s   nan\n"
10153 10151            "\tJEQ,s  exit\n"
10154 10152            "\tJA,s   inc\n"
10155 10153        "nan:\tDEC    $dst\n"
10156 10154            "\tJMP,s  exit\n"
10157 10155        "inc:\tINC    $dst\n"
10158 10156        "exit:"
10159 10157                  %}
10160 10158    opcode(0x66, 0x0F, 0x2F);
10161 10159    ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
10162 10160               LdImmI(dst,0x0), CmpX_Result(dst));
10163 10161    ins_pipe( pipe_slow );
10164 10162  %}
10165 10163  
10166 10164  
10167 10165  instruct subD_reg(regD dst, regD src) %{
10168 10166    predicate (UseSSE <=1);
10169 10167    match(Set dst (SubD dst src));
10170 10168  
10171 10169    format %{ "FLD    $src\n\t"
10172 10170              "DSUBp  $dst,ST" %}
10173 10171    opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10174 10172    ins_cost(150);
10175 10173    ins_encode( Push_Reg_D(src),
10176 10174                OpcP, RegOpc(dst) );
10177 10175    ins_pipe( fpu_reg_reg );
10178 10176  %}
10179 10177  
10180 10178  instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10181 10179    predicate (UseSSE <=1);
10182 10180    match(Set dst (RoundDouble (SubD src1 src2)));
10183 10181    ins_cost(250);
10184 10182  
10185 10183    format %{ "FLD    $src2\n\t"
10186 10184              "DSUB   ST,$src1\n\t"
10187 10185              "FSTP_D $dst\t# D-round" %}
10188 10186    opcode(0xD8, 0x5);
10189 10187    ins_encode( Push_Reg_D(src2),
10190 10188                OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10191 10189    ins_pipe( fpu_mem_reg_reg );
10192 10190  %}
10193 10191  
10194 10192  
10195 10193  instruct subD_reg_mem(regD dst, memory src) %{
10196 10194    predicate (UseSSE <=1);
10197 10195    match(Set dst (SubD dst (LoadD src)));
10198 10196    ins_cost(150);
10199 10197  
10200 10198    format %{ "FLD    $src\n\t"
10201 10199              "DSUBp  $dst,ST" %}
10202 10200    opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
10203 10201    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10204 10202                OpcP, RegOpc(dst) );
10205 10203    ins_pipe( fpu_reg_mem );
10206 10204  %}
10207 10205  
10208 10206  instruct absD_reg(regDPR1 dst, regDPR1 src) %{
10209 10207    predicate (UseSSE<=1);
10210 10208    match(Set dst (AbsD src));
10211 10209    ins_cost(100);
10212 10210    format %{ "FABS" %}
10213 10211    opcode(0xE1, 0xD9);
10214 10212    ins_encode( OpcS, OpcP );
10215 10213    ins_pipe( fpu_reg_reg );
10216 10214  %}
10217 10215  
10218 10216  instruct absXD_reg( regXD dst ) %{
10219 10217    predicate(UseSSE>=2);
10220 10218    match(Set dst (AbsD dst));
10221 10219    format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
10222 10220    ins_encode( AbsXD_encoding(dst));
10223 10221    ins_pipe( pipe_slow );
10224 10222  %}
10225 10223  
10226 10224  instruct negD_reg(regDPR1 dst, regDPR1 src) %{
10227 10225    predicate(UseSSE<=1);
10228 10226    match(Set dst (NegD src));
10229 10227    ins_cost(100);
10230 10228    format %{ "FCHS" %}
10231 10229    opcode(0xE0, 0xD9);
10232 10230    ins_encode( OpcS, OpcP );
10233 10231    ins_pipe( fpu_reg_reg );
10234 10232  %}
10235 10233  
10236 10234  instruct negXD_reg( regXD dst ) %{
10237 10235    predicate(UseSSE>=2);
10238 10236    match(Set dst (NegD dst));
10239 10237    format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
10240 10238    ins_encode %{
10241 10239       __ xorpd($dst$$XMMRegister,
10242 10240                ExternalAddress((address)double_signflip_pool));
10243 10241    %}
10244 10242    ins_pipe( pipe_slow );
10245 10243  %}
10246 10244  
10247 10245  instruct addD_reg(regD dst, regD src) %{
10248 10246    predicate(UseSSE<=1);
10249 10247    match(Set dst (AddD dst src));
10250 10248    format %{ "FLD    $src\n\t"
10251 10249              "DADD   $dst,ST" %}
10252 10250    size(4);
10253 10251    ins_cost(150);
10254 10252    opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10255 10253    ins_encode( Push_Reg_D(src),
10256 10254                OpcP, RegOpc(dst) );
10257 10255    ins_pipe( fpu_reg_reg );
10258 10256  %}
10259 10257  
10260 10258  
10261 10259  instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10262 10260    predicate(UseSSE<=1);
10263 10261    match(Set dst (RoundDouble (AddD src1 src2)));
10264 10262    ins_cost(250);
10265 10263  
10266 10264    format %{ "FLD    $src2\n\t"
10267 10265              "DADD   ST,$src1\n\t"
10268 10266              "FSTP_D $dst\t# D-round" %}
10269 10267    opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
10270 10268    ins_encode( Push_Reg_D(src2),
10271 10269                OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10272 10270    ins_pipe( fpu_mem_reg_reg );
10273 10271  %}
10274 10272  
10275 10273  
10276 10274  instruct addD_reg_mem(regD dst, memory src) %{
10277 10275    predicate(UseSSE<=1);
10278 10276    match(Set dst (AddD dst (LoadD src)));
10279 10277    ins_cost(150);
10280 10278  
10281 10279    format %{ "FLD    $src\n\t"
10282 10280              "DADDp  $dst,ST" %}
10283 10281    opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
10284 10282    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10285 10283                OpcP, RegOpc(dst) );
10286 10284    ins_pipe( fpu_reg_mem );
10287 10285  %}
10288 10286  
10289 10287  // add-to-memory
10290 10288  instruct addD_mem_reg(memory dst, regD src) %{
10291 10289    predicate(UseSSE<=1);
10292 10290    match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
10293 10291    ins_cost(150);
10294 10292  
10295 10293    format %{ "FLD_D  $dst\n\t"

↓ open down ↓

3021 lines elided

↑ open up ↑

10296 10294              "DADD   ST,$src\n\t"
10297 10295              "FST_D  $dst" %}
10298 10296    opcode(0xDD, 0x0);
10299 10297    ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
10300 10298                Opcode(0xD8), RegOpc(src),
10301 10299                set_instruction_start,
10302 10300                Opcode(0xDD), RMopc_Mem(0x03,dst) );
10303 10301    ins_pipe( fpu_reg_mem );
10304 10302  %}
10305 10303  
10306      -instruct addD_reg_imm1(regD dst, immD1 src) %{
     10304 +instruct addD_reg_imm1(regD dst, immD1 con) %{
10307 10305    predicate(UseSSE<=1);
10308      -  match(Set dst (AddD dst src));
     10306 +  match(Set dst (AddD dst con));
10309 10307    ins_cost(125);
10310 10308    format %{ "FLD1\n\t"
10311 10309              "DADDp  $dst,ST" %}
10312      -  opcode(0xDE, 0x00);
10313      -  ins_encode( LdImmD(src),
10314      -              OpcP, RegOpc(dst) );
10315      -  ins_pipe( fpu_reg );
     10310 +  ins_encode %{
     10311 +    __ fld1();
     10312 +    __ faddp($dst$$reg);
     10313 +  %}
     10314 +  ins_pipe(fpu_reg);
10316 10315  %}
10317 10316  
10318      -instruct addD_reg_imm(regD dst, immD src) %{
     10317 +instruct addD_reg_imm(regD dst, immD con) %{
10319 10318    predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10320      -  match(Set dst (AddD dst src));
     10319 +  match(Set dst (AddD dst con));
10321 10320    ins_cost(200);
10322      -  format %{ "FLD_D  [$src]\n\t"
     10321 +  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10323 10322              "DADDp  $dst,ST" %}
10324      -  opcode(0xDE, 0x00);       /* DE /0 */
10325      -  ins_encode( LdImmD(src),
10326      -              OpcP, RegOpc(dst));
10327      -  ins_pipe( fpu_reg_mem );
     10323 +  ins_encode %{
     10324 +    __ fld_d($constantaddress($con));
     10325 +    __ faddp($dst$$reg);
     10326 +  %}
     10327 +  ins_pipe(fpu_reg_mem);
10328 10328  %}
10329 10329  
10330 10330  instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
10331 10331    predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
10332 10332    match(Set dst (RoundDouble (AddD src con)));
10333 10333    ins_cost(200);
10334      -  format %{ "FLD_D  [$con]\n\t"
     10334 +  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10335 10335              "DADD   ST,$src\n\t"
10336 10336              "FSTP_D $dst\t# D-round" %}
10337      -  opcode(0xD8, 0x00);       /* D8 /0 */
10338      -  ins_encode( LdImmD(con),
10339      -              OpcP, RegOpc(src), Pop_Mem_D(dst));
10340      -  ins_pipe( fpu_mem_reg_con );
     10337 +  ins_encode %{
     10338 +    __ fld_d($constantaddress($con));
     10339 +    __ fadd($src$$reg);
     10340 +    __ fstp_d(Address(rsp, $dst$$disp));
     10341 +  %}
     10342 +  ins_pipe(fpu_mem_reg_con);
10341 10343  %}
10342 10344  
10343 10345  // Add two double precision floating point values in xmm
10344 10346  instruct addXD_reg(regXD dst, regXD src) %{
10345 10347    predicate(UseSSE>=2);
10346 10348    match(Set dst (AddD dst src));
10347 10349    format %{ "ADDSD  $dst,$src" %}
10348 10350    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10349 10351    ins_pipe( pipe_slow );
10350 10352  %}
10351 10353  
10352 10354  instruct addXD_imm(regXD dst, immXD con) %{
10353 10355    predicate(UseSSE>=2);
10354 10356    match(Set dst (AddD dst con));
10355      -  format %{ "ADDSD  $dst,[$con]" %}
10356      -  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), LdImmXD(dst, con) );
10357      -  ins_pipe( pipe_slow );
     10357 +  format %{ "ADDSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
     10358 +  ins_encode %{
     10359 +    __ addsd($dst$$XMMRegister, $constantaddress($con));
     10360 +  %}
     10361 +  ins_pipe(pipe_slow);
10358 10362  %}
10359 10363  
10360 10364  instruct addXD_mem(regXD dst, memory mem) %{
10361 10365    predicate(UseSSE>=2);
10362 10366    match(Set dst (AddD dst (LoadD mem)));
10363 10367    format %{ "ADDSD  $dst,$mem" %}
10364 10368    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
10365 10369    ins_pipe( pipe_slow );
10366 10370  %}
10367 10371

10368 10372  // Sub two double precision floating point values in xmm
10369 10373  instruct subXD_reg(regXD dst, regXD src) %{

↓ open down ↓

2 lines elided

↑ open up ↑

10370 10374    predicate(UseSSE>=2);
10371 10375    match(Set dst (SubD dst src));
10372 10376    format %{ "SUBSD  $dst,$src" %}
10373 10377    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10374 10378    ins_pipe( pipe_slow );
10375 10379  %}
10376 10380  
10377 10381  instruct subXD_imm(regXD dst, immXD con) %{
10378 10382    predicate(UseSSE>=2);
10379 10383    match(Set dst (SubD dst con));
10380      -  format %{ "SUBSD  $dst,[$con]" %}
10381      -  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), LdImmXD(dst, con) );
10382      -  ins_pipe( pipe_slow );
     10384 +  format %{ "SUBSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
     10385 +  ins_encode %{
     10386 +    __ subsd($dst$$XMMRegister, $constantaddress($con));
     10387 +  %}
     10388 +  ins_pipe(pipe_slow);
10383 10389  %}
10384 10390  
10385 10391  instruct subXD_mem(regXD dst, memory mem) %{
10386 10392    predicate(UseSSE>=2);
10387 10393    match(Set dst (SubD dst (LoadD mem)));
10388 10394    format %{ "SUBSD  $dst,$mem" %}
10389 10395    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10390 10396    ins_pipe( pipe_slow );
10391 10397  %}
10392 10398

10393 10399  // Mul two double precision floating point values in xmm
10394 10400  instruct mulXD_reg(regXD dst, regXD src) %{

↓ open down ↓

2 lines elided

↑ open up ↑

10395 10401    predicate(UseSSE>=2);
10396 10402    match(Set dst (MulD dst src));
10397 10403    format %{ "MULSD  $dst,$src" %}
10398 10404    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10399 10405    ins_pipe( pipe_slow );
10400 10406  %}
10401 10407  
10402 10408  instruct mulXD_imm(regXD dst, immXD con) %{
10403 10409    predicate(UseSSE>=2);
10404 10410    match(Set dst (MulD dst con));
10405      -  format %{ "MULSD  $dst,[$con]" %}
10406      -  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), LdImmXD(dst, con) );
10407      -  ins_pipe( pipe_slow );
     10411 +  format %{ "MULSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
     10412 +  ins_encode %{
     10413 +    __ mulsd($dst$$XMMRegister, $constantaddress($con));
     10414 +  %}
     10415 +  ins_pipe(pipe_slow);
10408 10416  %}
10409 10417  
10410 10418  instruct mulXD_mem(regXD dst, memory mem) %{
10411 10419    predicate(UseSSE>=2);
10412 10420    match(Set dst (MulD dst (LoadD mem)));
10413 10421    format %{ "MULSD  $dst,$mem" %}
10414 10422    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10415 10423    ins_pipe( pipe_slow );
10416 10424  %}
10417 10425

10418 10426  // Div two double precision floating point values in xmm
10419 10427  instruct divXD_reg(regXD dst, regXD src) %{
10420 10428    predicate(UseSSE>=2);

↓ open down ↓

3 lines elided

↑ open up ↑

10421 10429    match(Set dst (DivD dst src));
10422 10430    format %{ "DIVSD  $dst,$src" %}
10423 10431    opcode(0xF2, 0x0F, 0x5E);
10424 10432    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10425 10433    ins_pipe( pipe_slow );
10426 10434  %}
10427 10435  
10428 10436  instruct divXD_imm(regXD dst, immXD con) %{
10429 10437    predicate(UseSSE>=2);
10430 10438    match(Set dst (DivD dst con));
10431      -  format %{ "DIVSD  $dst,[$con]" %}
10432      -  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), LdImmXD(dst, con));
10433      -  ins_pipe( pipe_slow );
     10439 +  format %{ "DIVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
     10440 +  ins_encode %{
     10441 +    __ divsd($dst$$XMMRegister, $constantaddress($con));
     10442 +  %}
     10443 +  ins_pipe(pipe_slow);
10434 10444  %}
10435 10445  
10436 10446  instruct divXD_mem(regXD dst, memory mem) %{
10437 10447    predicate(UseSSE>=2);
10438 10448    match(Set dst (DivD dst (LoadD mem)));
10439 10449    format %{ "DIVSD  $dst,$mem" %}
10440 10450    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10441 10451    ins_pipe( pipe_slow );
10442 10452  %}
10443 10453

10444 10454  
10445 10455  instruct mulD_reg(regD dst, regD src) %{
10446 10456    predicate(UseSSE<=1);
10447 10457    match(Set dst (MulD dst src));
10448 10458    format %{ "FLD    $src\n\t"
10449 10459              "DMULp  $dst,ST" %}
10450 10460    opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10451 10461    ins_cost(150);
10452 10462    ins_encode( Push_Reg_D(src),
10453 10463                OpcP, RegOpc(dst) );
10454 10464    ins_pipe( fpu_reg_reg );
10455 10465  %}
10456 10466  
10457 10467  // Strict FP instruction biases argument before multiply then
10458 10468  // biases result to avoid double rounding of subnormals.
10459 10469  //
10460 10470  // scale arg1 by multiplying arg1 by 2^(-15360)
10461 10471  // load arg2
10462 10472  // multiply scaled arg1 by arg2
10463 10473  // rescale product by 2^(15360)
10464 10474  //
10465 10475  instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
10466 10476    predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10467 10477    match(Set dst (MulD dst src));
10468 10478    ins_cost(1);   // Select this instruction for all strict FP double multiplies
10469 10479  
10470 10480    format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10471 10481              "DMULp  $dst,ST\n\t"
10472 10482              "FLD    $src\n\t"
10473 10483              "DMULp  $dst,ST\n\t"

↓ open down ↓

30 lines elided

↑ open up ↑

10474 10484              "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10475 10485              "DMULp  $dst,ST\n\t" %}
10476 10486    opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10477 10487    ins_encode( strictfp_bias1(dst),
10478 10488                Push_Reg_D(src),
10479 10489                OpcP, RegOpc(dst),
10480 10490                strictfp_bias2(dst) );
10481 10491    ins_pipe( fpu_reg_reg );
10482 10492  %}
10483 10493  
10484      -instruct mulD_reg_imm(regD dst, immD src) %{
     10494 +instruct mulD_reg_imm(regD dst, immD con) %{
10485 10495    predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10486      -  match(Set dst (MulD dst src));
     10496 +  match(Set dst (MulD dst con));
10487 10497    ins_cost(200);
10488      -  format %{ "FLD_D  [$src]\n\t"
     10498 +  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10489 10499              "DMULp  $dst,ST" %}
10490      -  opcode(0xDE, 0x1); /* DE /1 */
10491      -  ins_encode( LdImmD(src),
10492      -              OpcP, RegOpc(dst) );
10493      -  ins_pipe( fpu_reg_mem );
     10500 +  ins_encode %{
     10501 +    __ fld_d($constantaddress($con));
     10502 +    __ fmulp($dst$$reg);
     10503 +  %}
     10504 +  ins_pipe(fpu_reg_mem);
10494 10505  %}
10495 10506  
10496 10507  
10497 10508  instruct mulD_reg_mem(regD dst, memory src) %{
10498 10509    predicate( UseSSE<=1 );
10499 10510    match(Set dst (MulD dst (LoadD src)));
10500 10511    ins_cost(200);
10501 10512    format %{ "FLD_D  $src\n\t"
10502 10513              "DMULp  $dst,ST" %}
10503 10514    opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */

10504 10515    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10505 10516                OpcP, RegOpc(dst) );
10506 10517    ins_pipe( fpu_reg_mem );
10507 10518  %}
10508 10519  
10509 10520  //
10510 10521  // Cisc-alternate to reg-reg multiply
10511 10522  instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
10512 10523    predicate( UseSSE<=1 );
10513 10524    match(Set dst (MulD src (LoadD mem)));
10514 10525    ins_cost(250);
10515 10526    format %{ "FLD_D  $mem\n\t"
10516 10527              "DMUL   ST,$src\n\t"
10517 10528              "FSTP_D $dst" %}
10518 10529    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
10519 10530    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10520 10531                OpcReg_F(src),
10521 10532                Pop_Reg_D(dst) );
10522 10533    ins_pipe( fpu_reg_reg_mem );
10523 10534  %}
10524 10535  
10525 10536  
10526 10537  // MACRO3 -- addD a mulD
10527 10538  // This instruction is a '2-address' instruction in that the result goes
10528 10539  // back to src2.  This eliminates a move from the macro; possibly the
10529 10540  // register allocator will have to add it back (and maybe not).
10530 10541  instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
10531 10542    predicate( UseSSE<=1 );
10532 10543    match(Set src2 (AddD (MulD src0 src1) src2));
10533 10544    format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10534 10545              "DMUL   ST,$src1\n\t"
10535 10546              "DADDp  $src2,ST" %}
10536 10547    ins_cost(250);
10537 10548    opcode(0xDD); /* LoadD DD /0 */
10538 10549    ins_encode( Push_Reg_F(src0),
10539 10550                FMul_ST_reg(src1),
10540 10551                FAddP_reg_ST(src2) );
10541 10552    ins_pipe( fpu_reg_reg_reg );
10542 10553  %}
10543 10554  
10544 10555  
10545 10556  // MACRO3 -- subD a mulD
10546 10557  instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
10547 10558    predicate( UseSSE<=1 );
10548 10559    match(Set src2 (SubD (MulD src0 src1) src2));
10549 10560    format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10550 10561              "DMUL   ST,$src1\n\t"
10551 10562              "DSUBRp $src2,ST" %}
10552 10563    ins_cost(250);
10553 10564    ins_encode( Push_Reg_F(src0),
10554 10565                FMul_ST_reg(src1),
10555 10566                Opcode(0xDE), Opc_plus(0xE0,src2));
10556 10567    ins_pipe( fpu_reg_reg_reg );
10557 10568  %}
10558 10569  
10559 10570  
10560 10571  instruct divD_reg(regD dst, regD src) %{
10561 10572    predicate( UseSSE<=1 );
10562 10573    match(Set dst (DivD dst src));
10563 10574  
10564 10575    format %{ "FLD    $src\n\t"
10565 10576              "FDIVp  $dst,ST" %}
10566 10577    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10567 10578    ins_cost(150);
10568 10579    ins_encode( Push_Reg_D(src),
10569 10580                OpcP, RegOpc(dst) );
10570 10581    ins_pipe( fpu_reg_reg );
10571 10582  %}
10572 10583  
10573 10584  // Strict FP instruction biases argument before division then
10574 10585  // biases result, to avoid double rounding of subnormals.
10575 10586  //
10576 10587  // scale dividend by multiplying dividend by 2^(-15360)
10577 10588  // load divisor
10578 10589  // divide scaled dividend by divisor
10579 10590  // rescale quotient by 2^(15360)
10580 10591  //
10581 10592  instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
10582 10593    predicate (UseSSE<=1);
10583 10594    match(Set dst (DivD dst src));
10584 10595    predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10585 10596    ins_cost(01);
10586 10597  
10587 10598    format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10588 10599              "DMULp  $dst,ST\n\t"
10589 10600              "FLD    $src\n\t"
10590 10601              "FDIVp  $dst,ST\n\t"
10591 10602              "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10592 10603              "DMULp  $dst,ST\n\t" %}
10593 10604    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10594 10605    ins_encode( strictfp_bias1(dst),
10595 10606                Push_Reg_D(src),
10596 10607                OpcP, RegOpc(dst),
10597 10608                strictfp_bias2(dst) );
10598 10609    ins_pipe( fpu_reg_reg );
10599 10610  %}
10600 10611  
10601 10612  instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10602 10613    predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10603 10614    match(Set dst (RoundDouble (DivD src1 src2)));
10604 10615  
10605 10616    format %{ "FLD    $src1\n\t"
10606 10617              "FDIV   ST,$src2\n\t"
10607 10618              "FSTP_D $dst\t# D-round" %}
10608 10619    opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10609 10620    ins_encode( Push_Reg_D(src1),
10610 10621                OpcP, RegOpc(src2), Pop_Mem_D(dst) );
10611 10622    ins_pipe( fpu_mem_reg_reg );
10612 10623  %}
10613 10624  
10614 10625  
10615 10626  instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
10616 10627    predicate(UseSSE<=1);
10617 10628    match(Set dst (ModD dst src));
10618 10629    effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
10619 10630  
10620 10631    format %{ "DMOD   $dst,$src" %}
10621 10632    ins_cost(250);
10622 10633    ins_encode(Push_Reg_Mod_D(dst, src),
10623 10634                emitModD(),
10624 10635                Push_Result_Mod_D(src),
10625 10636                Pop_Reg_D(dst));
10626 10637    ins_pipe( pipe_slow );
10627 10638  %}
10628 10639  
10629 10640  instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
10630 10641    predicate(UseSSE>=2);
10631 10642    match(Set dst (ModD src0 src1));
10632 10643    effect(KILL rax, KILL cr);
10633 10644  
10634 10645    format %{ "SUB    ESP,8\t # DMOD\n"
10635 10646            "\tMOVSD  [ESP+0],$src1\n"
10636 10647            "\tFLD_D  [ESP+0]\n"
10637 10648            "\tMOVSD  [ESP+0],$src0\n"
10638 10649            "\tFLD_D  [ESP+0]\n"
10639 10650       "loop:\tFPREM\n"
10640 10651            "\tFWAIT\n"
10641 10652            "\tFNSTSW AX\n"
10642 10653            "\tSAHF\n"
10643 10654            "\tJP     loop\n"
10644 10655            "\tFSTP_D [ESP+0]\n"
10645 10656            "\tMOVSD  $dst,[ESP+0]\n"
10646 10657            "\tADD    ESP,8\n"
10647 10658            "\tFSTP   ST0\t # Restore FPU Stack"
10648 10659      %}
10649 10660    ins_cost(250);
10650 10661    ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
10651 10662    ins_pipe( pipe_slow );
10652 10663  %}
10653 10664  
10654 10665  instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
10655 10666    predicate (UseSSE<=1);
10656 10667    match(Set dst (SinD src));
10657 10668    ins_cost(1800);
10658 10669    format %{ "DSIN   $dst" %}
10659 10670    opcode(0xD9, 0xFE);
10660 10671    ins_encode( OpcP, OpcS );
10661 10672    ins_pipe( pipe_slow );
10662 10673  %}
10663 10674  
10664 10675  instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
10665 10676    predicate (UseSSE>=2);
10666 10677    match(Set dst (SinD dst));
10667 10678    effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10668 10679    ins_cost(1800);
10669 10680    format %{ "DSIN   $dst" %}
10670 10681    opcode(0xD9, 0xFE);
10671 10682    ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10672 10683    ins_pipe( pipe_slow );
10673 10684  %}
10674 10685  
10675 10686  instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
10676 10687    predicate (UseSSE<=1);
10677 10688    match(Set dst (CosD src));
10678 10689    ins_cost(1800);
10679 10690    format %{ "DCOS   $dst" %}
10680 10691    opcode(0xD9, 0xFF);
10681 10692    ins_encode( OpcP, OpcS );
10682 10693    ins_pipe( pipe_slow );
10683 10694  %}
10684 10695  
10685 10696  instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
10686 10697    predicate (UseSSE>=2);
10687 10698    match(Set dst (CosD dst));
10688 10699    effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10689 10700    ins_cost(1800);
10690 10701    format %{ "DCOS   $dst" %}
10691 10702    opcode(0xD9, 0xFF);
10692 10703    ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10693 10704    ins_pipe( pipe_slow );
10694 10705  %}
10695 10706  
10696 10707  instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
10697 10708    predicate (UseSSE<=1);
10698 10709    match(Set dst(TanD src));
10699 10710    format %{ "DTAN   $dst" %}
10700 10711    ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
10701 10712                Opcode(0xDD), Opcode(0xD8));   // fstp st
10702 10713    ins_pipe( pipe_slow );
10703 10714  %}
10704 10715  
10705 10716  instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
10706 10717    predicate (UseSSE>=2);
10707 10718    match(Set dst(TanD dst));
10708 10719    effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10709 10720    format %{ "DTAN   $dst" %}
10710 10721    ins_encode( Push_SrcXD(dst),
10711 10722                Opcode(0xD9), Opcode(0xF2),    // fptan
10712 10723                Opcode(0xDD), Opcode(0xD8),   // fstp st
10713 10724                Push_ResultXD(dst) );
10714 10725    ins_pipe( pipe_slow );
10715 10726  %}
10716 10727  
10717 10728  instruct atanD_reg(regD dst, regD src) %{
10718 10729    predicate (UseSSE<=1);
10719 10730    match(Set dst(AtanD dst src));
10720 10731    format %{ "DATA   $dst,$src" %}
10721 10732    opcode(0xD9, 0xF3);
10722 10733    ins_encode( Push_Reg_D(src),
10723 10734                OpcP, OpcS, RegOpc(dst) );
10724 10735    ins_pipe( pipe_slow );
10725 10736  %}
10726 10737  
10727 10738  instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10728 10739    predicate (UseSSE>=2);
10729 10740    match(Set dst(AtanD dst src));
10730 10741    effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10731 10742    format %{ "DATA   $dst,$src" %}
10732 10743    opcode(0xD9, 0xF3);
10733 10744    ins_encode( Push_SrcXD(src),
10734 10745                OpcP, OpcS, Push_ResultXD(dst) );
10735 10746    ins_pipe( pipe_slow );
10736 10747  %}
10737 10748  
10738 10749  instruct sqrtD_reg(regD dst, regD src) %{
10739 10750    predicate (UseSSE<=1);
10740 10751    match(Set dst (SqrtD src));
10741 10752    format %{ "DSQRT  $dst,$src" %}
10742 10753    opcode(0xFA, 0xD9);
10743 10754    ins_encode( Push_Reg_D(src),
10744 10755                OpcS, OpcP, Pop_Reg_D(dst) );
10745 10756    ins_pipe( pipe_slow );
10746 10757  %}
10747 10758  
10748 10759  instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10749 10760    predicate (UseSSE<=1);
10750 10761    match(Set Y (PowD X Y));  // Raise X to the Yth power
10751 10762    effect(KILL rax, KILL rbx, KILL rcx);
10752 10763    format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10753 10764              "FLD_D  $X\n\t"
10754 10765              "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10755 10766  
10756 10767              "FDUP   \t\t\t# Q Q\n\t"
10757 10768              "FRNDINT\t\t\t# int(Q) Q\n\t"
10758 10769              "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10759 10770              "FISTP  dword [ESP]\n\t"
10760 10771              "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10761 10772              "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10762 10773              "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10763 10774              "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10764 10775              "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10765 10776              "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10766 10777              "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10767 10778              "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10768 10779              "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10769 10780              "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10770 10781              "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10771 10782              "MOV    [ESP+0],0\n\t"
10772 10783              "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10773 10784  
10774 10785              "ADD    ESP,8"
10775 10786               %}
10776 10787    ins_encode( push_stack_temp_qword,
10777 10788                Push_Reg_D(X),
10778 10789                Opcode(0xD9), Opcode(0xF1),   // fyl2x
10779 10790                pow_exp_core_encoding,
10780 10791                pop_stack_temp_qword);
10781 10792    ins_pipe( pipe_slow );
10782 10793  %}
10783 10794  
10784 10795  instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10785 10796    predicate (UseSSE>=2);
10786 10797    match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
10787 10798    effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10788 10799    format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10789 10800              "MOVSD  [ESP],$src1\n\t"
10790 10801              "FLD    FPR1,$src1\n\t"
10791 10802              "MOVSD  [ESP],$src0\n\t"
10792 10803              "FLD    FPR1,$src0\n\t"
10793 10804              "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10794 10805  
10795 10806              "FDUP   \t\t\t# Q Q\n\t"
10796 10807              "FRNDINT\t\t\t# int(Q) Q\n\t"
10797 10808              "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10798 10809              "FISTP  dword [ESP]\n\t"
10799 10810              "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10800 10811              "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10801 10812              "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10802 10813              "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10803 10814              "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10804 10815              "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10805 10816              "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10806 10817              "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10807 10818              "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10808 10819              "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10809 10820              "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10810 10821              "MOV    [ESP+0],0\n\t"
10811 10822              "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10812 10823  
10813 10824              "FST_D  [ESP]\n\t"
10814 10825              "MOVSD  $dst,[ESP]\n\t"
10815 10826              "ADD    ESP,8"
10816 10827               %}
10817 10828    ins_encode( push_stack_temp_qword,
10818 10829                push_xmm_to_fpr1(src1),
10819 10830                push_xmm_to_fpr1(src0),
10820 10831                Opcode(0xD9), Opcode(0xF1),   // fyl2x
10821 10832                pow_exp_core_encoding,
10822 10833                Push_ResultXD(dst) );
10823 10834    ins_pipe( pipe_slow );
10824 10835  %}
10825 10836  
10826 10837  
10827 10838  instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10828 10839    predicate (UseSSE<=1);
10829 10840    match(Set dpr1 (ExpD dpr1));
10830 10841    effect(KILL rax, KILL rbx, KILL rcx);
10831 10842    format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding"
10832 10843              "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10833 10844              "FMULP  \t\t\t# Q=X*log2(e)\n\t"
10834 10845  
10835 10846              "FDUP   \t\t\t# Q Q\n\t"
10836 10847              "FRNDINT\t\t\t# int(Q) Q\n\t"
10837 10848              "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10838 10849              "FISTP  dword [ESP]\n\t"
10839 10850              "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10840 10851              "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10841 10852              "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10842 10853              "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10843 10854              "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10844 10855              "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10845 10856              "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10846 10857              "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10847 10858              "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10848 10859              "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10849 10860              "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10850 10861              "MOV    [ESP+0],0\n\t"
10851 10862              "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10852 10863  
10853 10864              "ADD    ESP,8"
10854 10865               %}
10855 10866    ins_encode( push_stack_temp_qword,
10856 10867                Opcode(0xD9), Opcode(0xEA),   // fldl2e
10857 10868                Opcode(0xDE), Opcode(0xC9),   // fmulp
10858 10869                pow_exp_core_encoding,
10859 10870                pop_stack_temp_qword);
10860 10871    ins_pipe( pipe_slow );
10861 10872  %}
10862 10873  
10863 10874  instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10864 10875    predicate (UseSSE>=2);
10865 10876    match(Set dst (ExpD src));
10866 10877    effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10867 10878    format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding\n\t"
10868 10879              "MOVSD  [ESP],$src\n\t"
10869 10880              "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10870 10881              "FMULP  \t\t\t# Q=X*log2(e) X\n\t"
10871 10882  
10872 10883              "FDUP   \t\t\t# Q Q\n\t"
10873 10884              "FRNDINT\t\t\t# int(Q) Q\n\t"
10874 10885              "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10875 10886              "FISTP  dword [ESP]\n\t"
10876 10887              "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10877 10888              "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10878 10889              "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10879 10890              "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10880 10891              "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10881 10892              "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10882 10893              "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10883 10894              "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10884 10895              "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10885 10896              "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10886 10897              "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10887 10898              "MOV    [ESP+0],0\n\t"
10888 10899              "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10889 10900  
10890 10901              "FST_D  [ESP]\n\t"
10891 10902              "MOVSD  $dst,[ESP]\n\t"
10892 10903              "ADD    ESP,8"
10893 10904               %}
10894 10905    ins_encode( Push_SrcXD(src),
10895 10906                Opcode(0xD9), Opcode(0xEA),   // fldl2e
10896 10907                Opcode(0xDE), Opcode(0xC9),   // fmulp
10897 10908                pow_exp_core_encoding,
10898 10909                Push_ResultXD(dst) );
10899 10910    ins_pipe( pipe_slow );
10900 10911  %}
10901 10912  
10902 10913  
10903 10914  
10904 10915  instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
10905 10916    predicate (UseSSE<=1);
10906 10917    // The source Double operand on FPU stack
10907 10918    match(Set dst (Log10D src));
10908 10919    // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10909 10920    // fxch         ; swap ST(0) with ST(1)
10910 10921    // fyl2x        ; compute log_10(2) * log_2(x)
10911 10922    format %{ "FLDLG2 \t\t\t#Log10\n\t"
10912 10923              "FXCH   \n\t"
10913 10924              "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10914 10925           %}
10915 10926    ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10916 10927                Opcode(0xD9), Opcode(0xC9),   // fxch
10917 10928                Opcode(0xD9), Opcode(0xF1));  // fyl2x
10918 10929  
10919 10930    ins_pipe( pipe_slow );
10920 10931  %}
10921 10932  
10922 10933  instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10923 10934    predicate (UseSSE>=2);
10924 10935    effect(KILL cr);
10925 10936    match(Set dst (Log10D src));
10926 10937    // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10927 10938    // fyl2x        ; compute log_10(2) * log_2(x)
10928 10939    format %{ "FLDLG2 \t\t\t#Log10\n\t"
10929 10940              "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10930 10941           %}
10931 10942    ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10932 10943                Push_SrcXD(src),
10933 10944                Opcode(0xD9), Opcode(0xF1),   // fyl2x
10934 10945                Push_ResultXD(dst));
10935 10946  
10936 10947    ins_pipe( pipe_slow );
10937 10948  %}
10938 10949  
10939 10950  instruct logD_reg(regDPR1 dst, regDPR1 src) %{
10940 10951    predicate (UseSSE<=1);
10941 10952    // The source Double operand on FPU stack
10942 10953    match(Set dst (LogD src));
10943 10954    // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10944 10955    // fxch         ; swap ST(0) with ST(1)
10945 10956    // fyl2x        ; compute log_e(2) * log_2(x)
10946 10957    format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10947 10958              "FXCH   \n\t"
10948 10959              "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10949 10960           %}
10950 10961    ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10951 10962                Opcode(0xD9), Opcode(0xC9),   // fxch
10952 10963                Opcode(0xD9), Opcode(0xF1));  // fyl2x
10953 10964  
10954 10965    ins_pipe( pipe_slow );
10955 10966  %}
10956 10967  
10957 10968  instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10958 10969    predicate (UseSSE>=2);
10959 10970    effect(KILL cr);
10960 10971    // The source and result Double operands in XMM registers
10961 10972    match(Set dst (LogD src));
10962 10973    // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10963 10974    // fyl2x        ; compute log_e(2) * log_2(x)
10964 10975    format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10965 10976              "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10966 10977           %}
10967 10978    ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10968 10979                Push_SrcXD(src),
10969 10980                Opcode(0xD9), Opcode(0xF1),   // fyl2x
10970 10981                Push_ResultXD(dst));
10971 10982    ins_pipe( pipe_slow );
10972 10983  %}
10973 10984  
10974 10985  //-------------Float Instructions-------------------------------
10975 10986  // Float Math
10976 10987  
10977 10988  // Code for float compare:
10978 10989  //     fcompp();
10979 10990  //     fwait(); fnstsw_ax();
10980 10991  //     sahf();
10981 10992  //     movl(dst, unordered_result);
10982 10993  //     jcc(Assembler::parity, exit);
10983 10994  //     movl(dst, less_result);
10984 10995  //     jcc(Assembler::below, exit);
10985 10996  //     movl(dst, equal_result);
10986 10997  //     jcc(Assembler::equal, exit);
10987 10998  //     movl(dst, greater_result);
10988 10999  //   exit:
10989 11000  
10990 11001  // P6 version of float compare, sets condition codes in EFLAGS
10991 11002  instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10992 11003    predicate(VM_Version::supports_cmov() && UseSSE == 0);
10993 11004    match(Set cr (CmpF src1 src2));
10994 11005    effect(KILL rax);
10995 11006    ins_cost(150);
10996 11007    format %{ "FLD    $src1\n\t"
10997 11008              "FUCOMIP ST,$src2  // P6 instruction\n\t"
10998 11009              "JNP    exit\n\t"
10999 11010              "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
11000 11011              "SAHF\n"
11001 11012       "exit:\tNOP               // avoid branch to branch" %}
11002 11013    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11003 11014    ins_encode( Push_Reg_D(src1),
11004 11015                OpcP, RegOpc(src2),
11005 11016                cmpF_P6_fixup );
11006 11017    ins_pipe( pipe_slow );
11007 11018  %}
11008 11019  
11009 11020  instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
11010 11021    predicate(VM_Version::supports_cmov() && UseSSE == 0);
11011 11022    match(Set cr (CmpF src1 src2));
11012 11023    ins_cost(100);
11013 11024    format %{ "FLD    $src1\n\t"
11014 11025              "FUCOMIP ST,$src2  // P6 instruction" %}
11015 11026    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11016 11027    ins_encode( Push_Reg_D(src1),
11017 11028                OpcP, RegOpc(src2));
11018 11029    ins_pipe( pipe_slow );
11019 11030  %}
11020 11031  
11021 11032  
11022 11033  // Compare & branch
11023 11034  instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
11024 11035    predicate(UseSSE == 0);
11025 11036    match(Set cr (CmpF src1 src2));
11026 11037    effect(KILL rax);
11027 11038    ins_cost(200);
11028 11039    format %{ "FLD    $src1\n\t"
11029 11040              "FCOMp  $src2\n\t"
11030 11041              "FNSTSW AX\n\t"
11031 11042              "TEST   AX,0x400\n\t"
11032 11043              "JZ,s   flags\n\t"
11033 11044              "MOV    AH,1\t# unordered treat as LT\n"
11034 11045      "flags:\tSAHF" %}
11035 11046    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11036 11047    ins_encode( Push_Reg_D(src1),
11037 11048                OpcP, RegOpc(src2),
11038 11049                fpu_flags);
11039 11050    ins_pipe( pipe_slow );
11040 11051  %}
11041 11052  
11042 11053  // Compare vs zero into -1,0,1
11043 11054  instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
11044 11055    predicate(UseSSE == 0);
11045 11056    match(Set dst (CmpF3 src1 zero));
11046 11057    effect(KILL cr, KILL rax);
11047 11058    ins_cost(280);
11048 11059    format %{ "FTSTF  $dst,$src1" %}
11049 11060    opcode(0xE4, 0xD9);
11050 11061    ins_encode( Push_Reg_D(src1),
11051 11062                OpcS, OpcP, PopFPU,
11052 11063                CmpF_Result(dst));
11053 11064    ins_pipe( pipe_slow );
11054 11065  %}
11055 11066  
11056 11067  // Compare into -1,0,1
11057 11068  instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11058 11069    predicate(UseSSE == 0);
11059 11070    match(Set dst (CmpF3 src1 src2));
11060 11071    effect(KILL cr, KILL rax);
11061 11072    ins_cost(300);
11062 11073    format %{ "FCMPF  $dst,$src1,$src2" %}
11063 11074    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11064 11075    ins_encode( Push_Reg_D(src1),
11065 11076                OpcP, RegOpc(src2),
11066 11077                CmpF_Result(dst));
11067 11078    ins_pipe( pipe_slow );
11068 11079  %}
11069 11080  
11070 11081  // float compare and set condition codes in EFLAGS by XMM regs
11071 11082  instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
11072 11083    predicate(UseSSE>=1);
11073 11084    match(Set cr (CmpF dst src));
11074 11085    effect(KILL rax);
11075 11086    ins_cost(145);
11076 11087    format %{ "COMISS $dst,$src\n"
11077 11088            "\tJNP    exit\n"
11078 11089            "\tMOV    ah,1       // saw a NaN, set CF\n"
11079 11090            "\tSAHF\n"
11080 11091       "exit:\tNOP               // avoid branch to branch" %}
11081 11092    opcode(0x0F, 0x2F);
11082 11093    ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
11083 11094    ins_pipe( pipe_slow );
11084 11095  %}
11085 11096  
11086 11097  instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
11087 11098    predicate(UseSSE>=1);
11088 11099    match(Set cr (CmpF dst src));
11089 11100    ins_cost(100);
11090 11101    format %{ "COMISS $dst,$src" %}
11091 11102    opcode(0x0F, 0x2F);
11092 11103    ins_encode(OpcP, OpcS, RegReg(dst, src));
11093 11104    ins_pipe( pipe_slow );
11094 11105  %}
11095 11106  
11096 11107  // float compare and set condition codes in EFLAGS by XMM regs
11097 11108  instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
11098 11109    predicate(UseSSE>=1);
11099 11110    match(Set cr (CmpF dst (LoadF src)));
11100 11111    effect(KILL rax);
11101 11112    ins_cost(165);
11102 11113    format %{ "COMISS $dst,$src\n"
11103 11114            "\tJNP    exit\n"
11104 11115            "\tMOV    ah,1       // saw a NaN, set CF\n"
11105 11116            "\tSAHF\n"
11106 11117       "exit:\tNOP               // avoid branch to branch" %}
11107 11118    opcode(0x0F, 0x2F);
11108 11119    ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
11109 11120    ins_pipe( pipe_slow );
11110 11121  %}
11111 11122  
11112 11123  instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
11113 11124    predicate(UseSSE>=1);
11114 11125    match(Set cr (CmpF dst (LoadF src)));
11115 11126    ins_cost(100);
11116 11127    format %{ "COMISS $dst,$src" %}
11117 11128    opcode(0x0F, 0x2F);
11118 11129    ins_encode(OpcP, OpcS, RegMem(dst, src));
11119 11130    ins_pipe( pipe_slow );
11120 11131  %}
11121 11132  
11122 11133  // Compare into -1,0,1 in XMM
11123 11134  instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
11124 11135    predicate(UseSSE>=1);
11125 11136    match(Set dst (CmpF3 src1 src2));
11126 11137    effect(KILL cr);
11127 11138    ins_cost(255);
11128 11139    format %{ "XOR    $dst,$dst\n"
11129 11140            "\tCOMISS $src1,$src2\n"
11130 11141            "\tJP,s   nan\n"
11131 11142            "\tJEQ,s  exit\n"
11132 11143            "\tJA,s   inc\n"
11133 11144        "nan:\tDEC    $dst\n"
11134 11145            "\tJMP,s  exit\n"
11135 11146        "inc:\tINC    $dst\n"
11136 11147        "exit:"
11137 11148                  %}
11138 11149    opcode(0x0F, 0x2F);
11139 11150    ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
11140 11151    ins_pipe( pipe_slow );
11141 11152  %}
11142 11153  
11143 11154  // Compare into -1,0,1 in XMM and memory
11144 11155  instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
11145 11156    predicate(UseSSE>=1);
11146 11157    match(Set dst (CmpF3 src1 (LoadF mem)));
11147 11158    effect(KILL cr);
11148 11159    ins_cost(275);
11149 11160    format %{ "COMISS $src1,$mem\n"
11150 11161            "\tMOV    $dst,0\t\t# do not blow flags\n"
11151 11162            "\tJP,s   nan\n"
11152 11163            "\tJEQ,s  exit\n"
11153 11164            "\tJA,s   inc\n"
11154 11165        "nan:\tDEC    $dst\n"
11155 11166            "\tJMP,s  exit\n"
11156 11167        "inc:\tINC    $dst\n"
11157 11168        "exit:"
11158 11169                  %}
11159 11170    opcode(0x0F, 0x2F);
11160 11171    ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
11161 11172    ins_pipe( pipe_slow );
11162 11173  %}
11163 11174  
11164 11175  // Spill to obtain 24-bit precision
11165 11176  instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
11166 11177    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11167 11178    match(Set dst (SubF src1 src2));
11168 11179  
11169 11180    format %{ "FSUB   $dst,$src1 - $src2" %}
11170 11181    opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
11171 11182    ins_encode( Push_Reg_F(src1),
11172 11183                OpcReg_F(src2),
11173 11184                Pop_Mem_F(dst) );
11174 11185    ins_pipe( fpu_mem_reg_reg );
11175 11186  %}
11176 11187  //
11177 11188  // This instruction does not round to 24-bits
11178 11189  instruct subF_reg(regF dst, regF src) %{
11179 11190    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11180 11191    match(Set dst (SubF dst src));
11181 11192  
11182 11193    format %{ "FSUB   $dst,$src" %}
11183 11194    opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
11184 11195    ins_encode( Push_Reg_F(src),
11185 11196                OpcP, RegOpc(dst) );
11186 11197    ins_pipe( fpu_reg_reg );
11187 11198  %}
11188 11199  
11189 11200  // Spill to obtain 24-bit precision
11190 11201  instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
11191 11202    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11192 11203    match(Set dst (AddF src1 src2));
11193 11204  
11194 11205    format %{ "FADD   $dst,$src1,$src2" %}
11195 11206    opcode(0xD8, 0x0); /* D8 C0+i */
11196 11207    ins_encode( Push_Reg_F(src2),
11197 11208                OpcReg_F(src1),
11198 11209                Pop_Mem_F(dst) );
11199 11210    ins_pipe( fpu_mem_reg_reg );
11200 11211  %}
11201 11212  //
11202 11213  // This instruction does not round to 24-bits
11203 11214  instruct addF_reg(regF dst, regF src) %{
11204 11215    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11205 11216    match(Set dst (AddF dst src));
11206 11217  
11207 11218    format %{ "FLD    $src\n\t"
11208 11219              "FADDp  $dst,ST" %}
11209 11220    opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
11210 11221    ins_encode( Push_Reg_F(src),
11211 11222                OpcP, RegOpc(dst) );
11212 11223    ins_pipe( fpu_reg_reg );
11213 11224  %}
11214 11225  
11215 11226  // Add two single precision floating point values in xmm
11216 11227  instruct addX_reg(regX dst, regX src) %{

↓ open down ↓

713 lines elided

↑ open up ↑

11217 11228    predicate(UseSSE>=1);
11218 11229    match(Set dst (AddF dst src));
11219 11230    format %{ "ADDSS  $dst,$src" %}
11220 11231    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
11221 11232    ins_pipe( pipe_slow );
11222 11233  %}
11223 11234  
11224 11235  instruct addX_imm(regX dst, immXF con) %{
11225 11236    predicate(UseSSE>=1);
11226 11237    match(Set dst (AddF dst con));
11227      -  format %{ "ADDSS  $dst,[$con]" %}
11228      -  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), LdImmX(dst, con) );
11229      -  ins_pipe( pipe_slow );
     11238 +  format %{ "ADDSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
     11239 +  ins_encode %{
     11240 +    __ addss($dst$$XMMRegister, $constantaddress($con));
     11241 +  %}
     11242 +  ins_pipe(pipe_slow);
11230 11243  %}
11231 11244  
11232 11245  instruct addX_mem(regX dst, memory mem) %{
11233 11246    predicate(UseSSE>=1);
11234 11247    match(Set dst (AddF dst (LoadF mem)));
11235 11248    format %{ "ADDSS  $dst,$mem" %}
11236 11249    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
11237 11250    ins_pipe( pipe_slow );
11238 11251  %}
11239 11252

11240 11253  // Subtract two single precision floating point values in xmm
11241 11254  instruct subX_reg(regX dst, regX src) %{

↓ open down ↓

2 lines elided

↑ open up ↑

11242 11255    predicate(UseSSE>=1);
11243 11256    match(Set dst (SubF dst src));
11244 11257    format %{ "SUBSS  $dst,$src" %}
11245 11258    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
11246 11259    ins_pipe( pipe_slow );
11247 11260  %}
11248 11261  
11249 11262  instruct subX_imm(regX dst, immXF con) %{
11250 11263    predicate(UseSSE>=1);
11251 11264    match(Set dst (SubF dst con));
11252      -  format %{ "SUBSS  $dst,[$con]" %}
11253      -  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), LdImmX(dst, con) );
11254      -  ins_pipe( pipe_slow );
     11265 +  format %{ "SUBSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
     11266 +  ins_encode %{
     11267 +    __ subss($dst$$XMMRegister, $constantaddress($con));
     11268 +  %}
     11269 +  ins_pipe(pipe_slow);
11255 11270  %}
11256 11271  
11257 11272  instruct subX_mem(regX dst, memory mem) %{
11258 11273    predicate(UseSSE>=1);
11259 11274    match(Set dst (SubF dst (LoadF mem)));
11260 11275    format %{ "SUBSS  $dst,$mem" %}
11261 11276    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
11262 11277    ins_pipe( pipe_slow );
11263 11278  %}
11264 11279

11265 11280  // Multiply two single precision floating point values in xmm
11266 11281  instruct mulX_reg(regX dst, regX src) %{

↓ open down ↓

2 lines elided

↑ open up ↑

11267 11282    predicate(UseSSE>=1);
11268 11283    match(Set dst (MulF dst src));
11269 11284    format %{ "MULSS  $dst,$src" %}
11270 11285    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
11271 11286    ins_pipe( pipe_slow );
11272 11287  %}
11273 11288  
11274 11289  instruct mulX_imm(regX dst, immXF con) %{
11275 11290    predicate(UseSSE>=1);
11276 11291    match(Set dst (MulF dst con));
11277      -  format %{ "MULSS  $dst,[$con]" %}
11278      -  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), LdImmX(dst, con) );
11279      -  ins_pipe( pipe_slow );
     11292 +  format %{ "MULSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
     11293 +  ins_encode %{
     11294 +    __ mulss($dst$$XMMRegister, $constantaddress($con));
     11295 +  %}
     11296 +  ins_pipe(pipe_slow);
11280 11297  %}
11281 11298  
11282 11299  instruct mulX_mem(regX dst, memory mem) %{
11283 11300    predicate(UseSSE>=1);
11284 11301    match(Set dst (MulF dst (LoadF mem)));
11285 11302    format %{ "MULSS  $dst,$mem" %}
11286 11303    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
11287 11304    ins_pipe( pipe_slow );
11288 11305  %}
11289 11306

11290 11307  // Divide two single precision floating point values in xmm
11291 11308  instruct divX_reg(regX dst, regX src) %{

↓ open down ↓

2 lines elided

↑ open up ↑

11292 11309    predicate(UseSSE>=1);
11293 11310    match(Set dst (DivF dst src));
11294 11311    format %{ "DIVSS  $dst,$src" %}
11295 11312    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
11296 11313    ins_pipe( pipe_slow );
11297 11314  %}
11298 11315  
11299 11316  instruct divX_imm(regX dst, immXF con) %{
11300 11317    predicate(UseSSE>=1);
11301 11318    match(Set dst (DivF dst con));
11302      -  format %{ "DIVSS  $dst,[$con]" %}
11303      -  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), LdImmX(dst, con) );
11304      -  ins_pipe( pipe_slow );
     11319 +  format %{ "DIVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
     11320 +  ins_encode %{
     11321 +    __ divss($dst$$XMMRegister, $constantaddress($con));
     11322 +  %}
     11323 +  ins_pipe(pipe_slow);
11305 11324  %}
11306 11325  
11307 11326  instruct divX_mem(regX dst, memory mem) %{
11308 11327    predicate(UseSSE>=1);
11309 11328    match(Set dst (DivF dst (LoadF mem)));
11310 11329    format %{ "DIVSS  $dst,$mem" %}
11311 11330    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
11312 11331    ins_pipe( pipe_slow );
11313 11332  %}
11314 11333

11315 11334  // Get the square root of a single precision floating point values in xmm
11316 11335  instruct sqrtX_reg(regX dst, regX src) %{
11317 11336    predicate(UseSSE>=1);
11318 11337    match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11319 11338    format %{ "SQRTSS $dst,$src" %}
11320 11339    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11321 11340    ins_pipe( pipe_slow );
11322 11341  %}
11323 11342  
11324 11343  instruct sqrtX_mem(regX dst, memory mem) %{
11325 11344    predicate(UseSSE>=1);
11326 11345    match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
11327 11346    format %{ "SQRTSS $dst,$mem" %}
11328 11347    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11329 11348    ins_pipe( pipe_slow );
11330 11349  %}
11331 11350  
11332 11351  // Get the square root of a double precision floating point values in xmm
11333 11352  instruct sqrtXD_reg(regXD dst, regXD src) %{
11334 11353    predicate(UseSSE>=2);
11335 11354    match(Set dst (SqrtD src));
11336 11355    format %{ "SQRTSD $dst,$src" %}
11337 11356    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11338 11357    ins_pipe( pipe_slow );
11339 11358  %}
11340 11359  
11341 11360  instruct sqrtXD_mem(regXD dst, memory mem) %{
11342 11361    predicate(UseSSE>=2);
11343 11362    match(Set dst (SqrtD (LoadD mem)));
11344 11363    format %{ "SQRTSD $dst,$mem" %}
11345 11364    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11346 11365    ins_pipe( pipe_slow );
11347 11366  %}
11348 11367  
11349 11368  instruct absF_reg(regFPR1 dst, regFPR1 src) %{
11350 11369    predicate(UseSSE==0);
11351 11370    match(Set dst (AbsF src));
11352 11371    ins_cost(100);
11353 11372    format %{ "FABS" %}
11354 11373    opcode(0xE1, 0xD9);
11355 11374    ins_encode( OpcS, OpcP );
11356 11375    ins_pipe( fpu_reg_reg );
11357 11376  %}
11358 11377  
11359 11378  instruct absX_reg(regX dst ) %{
11360 11379    predicate(UseSSE>=1);
11361 11380    match(Set dst (AbsF dst));
11362 11381    format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11363 11382    ins_encode( AbsXF_encoding(dst));
11364 11383    ins_pipe( pipe_slow );
11365 11384  %}
11366 11385  
11367 11386  instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11368 11387    predicate(UseSSE==0);
11369 11388    match(Set dst (NegF src));
11370 11389    ins_cost(100);
11371 11390    format %{ "FCHS" %}
11372 11391    opcode(0xE0, 0xD9);
11373 11392    ins_encode( OpcS, OpcP );
11374 11393    ins_pipe( fpu_reg_reg );
11375 11394  %}
11376 11395  
11377 11396  instruct negX_reg( regX dst ) %{
11378 11397    predicate(UseSSE>=1);
11379 11398    match(Set dst (NegF dst));
11380 11399    format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
11381 11400    ins_encode( NegXF_encoding(dst));
11382 11401    ins_pipe( pipe_slow );
11383 11402  %}
11384 11403  
11385 11404  // Cisc-alternate to addF_reg
11386 11405  // Spill to obtain 24-bit precision
11387 11406  instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11388 11407    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11389 11408    match(Set dst (AddF src1 (LoadF src2)));
11390 11409  
11391 11410    format %{ "FLD    $src2\n\t"
11392 11411              "FADD   ST,$src1\n\t"
11393 11412              "FSTP_S $dst" %}
11394 11413    opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11395 11414    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11396 11415                OpcReg_F(src1),
11397 11416                Pop_Mem_F(dst) );
11398 11417    ins_pipe( fpu_mem_reg_mem );
11399 11418  %}
11400 11419  //
11401 11420  // Cisc-alternate to addF_reg
11402 11421  // This instruction does not round to 24-bits
11403 11422  instruct addF_reg_mem(regF dst, memory src) %{
11404 11423    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11405 11424    match(Set dst (AddF dst (LoadF src)));
11406 11425  
11407 11426    format %{ "FADD   $dst,$src" %}
11408 11427    opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
11409 11428    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
11410 11429                OpcP, RegOpc(dst) );
11411 11430    ins_pipe( fpu_reg_mem );
11412 11431  %}
11413 11432  
11414 11433  // // Following two instructions for _222_mpegaudio
11415 11434  // Spill to obtain 24-bit precision
11416 11435  instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
11417 11436    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11418 11437    match(Set dst (AddF src1 src2));
11419 11438  
11420 11439    format %{ "FADD   $dst,$src1,$src2" %}
11421 11440    opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11422 11441    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
11423 11442                OpcReg_F(src2),
11424 11443                Pop_Mem_F(dst) );
11425 11444    ins_pipe( fpu_mem_reg_mem );
11426 11445  %}
11427 11446  
11428 11447  // Cisc-spill variant
11429 11448  // Spill to obtain 24-bit precision
11430 11449  instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
11431 11450    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11432 11451    match(Set dst (AddF src1 (LoadF src2)));
11433 11452  
11434 11453    format %{ "FADD   $dst,$src1,$src2 cisc" %}
11435 11454    opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11436 11455    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11437 11456                set_instruction_start,
11438 11457                OpcP, RMopc_Mem(secondary,src1),
11439 11458                Pop_Mem_F(dst) );
11440 11459    ins_pipe( fpu_mem_mem_mem );
11441 11460  %}
11442 11461  
11443 11462  // Spill to obtain 24-bit precision
11444 11463  instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11445 11464    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11446 11465    match(Set dst (AddF src1 src2));
11447 11466  
11448 11467    format %{ "FADD   $dst,$src1,$src2" %}

↓ open down ↓

134 lines elided

↑ open up ↑

11449 11468    opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
11450 11469    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11451 11470                set_instruction_start,
11452 11471                OpcP, RMopc_Mem(secondary,src1),
11453 11472                Pop_Mem_F(dst) );
11454 11473    ins_pipe( fpu_mem_mem_mem );
11455 11474  %}
11456 11475  
11457 11476  
11458 11477  // Spill to obtain 24-bit precision
11459      -instruct addF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
     11478 +instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11460 11479    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11461      -  match(Set dst (AddF src1 src2));
11462      -  format %{ "FLD    $src1\n\t"
11463      -            "FADD   $src2\n\t"
     11480 +  match(Set dst (AddF src con));
     11481 +  format %{ "FLD    $src\n\t"
     11482 +            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11464 11483              "FSTP_S $dst"  %}
11465      -  opcode(0xD8, 0x00);       /* D8 /0 */
11466      -  ins_encode( Push_Reg_F(src1),
11467      -              Opc_MemImm_F(src2),
11468      -              Pop_Mem_F(dst));
11469      -  ins_pipe( fpu_mem_reg_con );
     11484 +  ins_encode %{
     11485 +    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
     11486 +    __ fadd_s($constantaddress($con));
     11487 +    __ fstp_s(Address(rsp, $dst$$disp));
     11488 +  %}
     11489 +  ins_pipe(fpu_mem_reg_con);
11470 11490  %}
11471 11491  //
11472 11492  // This instruction does not round to 24-bits
11473      -instruct addF_reg_imm(regF dst, regF src1, immF src2) %{
     11493 +instruct addF_reg_imm(regF dst, regF src, immF con) %{
11474 11494    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11475      -  match(Set dst (AddF src1 src2));
11476      -  format %{ "FLD    $src1\n\t"
11477      -            "FADD   $src2\n\t"
11478      -            "FSTP_S $dst"  %}
11479      -  opcode(0xD8, 0x00);       /* D8 /0 */
11480      -  ins_encode( Push_Reg_F(src1),
11481      -              Opc_MemImm_F(src2),
11482      -              Pop_Reg_F(dst));
11483      -  ins_pipe( fpu_reg_reg_con );
     11495 +  match(Set dst (AddF src con));
     11496 +  format %{ "FLD    $src\n\t"
     11497 +            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
     11498 +            "FSTP   $dst"  %}
     11499 +  ins_encode %{
     11500 +    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
     11501 +    __ fadd_s($constantaddress($con));
     11502 +    __ fstp_d($dst$$reg);
     11503 +  %}
     11504 +  ins_pipe(fpu_reg_reg_con);
11484 11505  %}
11485 11506  
11486 11507  // Spill to obtain 24-bit precision
11487 11508  instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
11488 11509    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11489 11510    match(Set dst (MulF src1 src2));
11490 11511  
11491 11512    format %{ "FLD    $src1\n\t"
11492 11513              "FMUL   $src2\n\t"
11493 11514              "FSTP_S $dst"  %}

11494 11515    opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
11495 11516    ins_encode( Push_Reg_F(src1),
11496 11517                OpcReg_F(src2),
11497 11518                Pop_Mem_F(dst) );
11498 11519    ins_pipe( fpu_mem_reg_reg );
11499 11520  %}
11500 11521  //
11501 11522  // This instruction does not round to 24-bits
11502 11523  instruct mulF_reg(regF dst, regF src1, regF src2) %{
11503 11524    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11504 11525    match(Set dst (MulF src1 src2));
11505 11526  
11506 11527    format %{ "FLD    $src1\n\t"
11507 11528              "FMUL   $src2\n\t"
11508 11529              "FSTP_S $dst"  %}
11509 11530    opcode(0xD8, 0x1); /* D8 C8+i */
11510 11531    ins_encode( Push_Reg_F(src2),
11511 11532                OpcReg_F(src1),
11512 11533                Pop_Reg_F(dst) );
11513 11534    ins_pipe( fpu_reg_reg_reg );
11514 11535  %}
11515 11536  
11516 11537  
11517 11538  // Spill to obtain 24-bit precision
11518 11539  // Cisc-alternate to reg-reg multiply
11519 11540  instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11520 11541    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11521 11542    match(Set dst (MulF src1 (LoadF src2)));
11522 11543  
11523 11544    format %{ "FLD_S  $src2\n\t"
11524 11545              "FMUL   $src1\n\t"
11525 11546              "FSTP_S $dst"  %}
11526 11547    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
11527 11548    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11528 11549                OpcReg_F(src1),
11529 11550                Pop_Mem_F(dst) );
11530 11551    ins_pipe( fpu_mem_reg_mem );
11531 11552  %}
11532 11553  //
11533 11554  // This instruction does not round to 24-bits
11534 11555  // Cisc-alternate to reg-reg multiply
11535 11556  instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
11536 11557    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11537 11558    match(Set dst (MulF src1 (LoadF src2)));
11538 11559  
11539 11560    format %{ "FMUL   $dst,$src1,$src2" %}
11540 11561    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
11541 11562    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11542 11563                OpcReg_F(src1),
11543 11564                Pop_Reg_F(dst) );
11544 11565    ins_pipe( fpu_reg_reg_mem );
11545 11566  %}
11546 11567  
11547 11568  // Spill to obtain 24-bit precision
11548 11569  instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11549 11570    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11550 11571    match(Set dst (MulF src1 src2));
11551 11572

↓ open down ↓

58 lines elided

↑ open up ↑

11552 11573    format %{ "FMUL   $dst,$src1,$src2" %}
11553 11574    opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
11554 11575    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11555 11576                set_instruction_start,
11556 11577                OpcP, RMopc_Mem(secondary,src1),
11557 11578                Pop_Mem_F(dst) );
11558 11579    ins_pipe( fpu_mem_mem_mem );
11559 11580  %}
11560 11581  
11561 11582  // Spill to obtain 24-bit precision
11562      -instruct mulF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
     11583 +instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11563 11584    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11564      -  match(Set dst (MulF src1 src2));
     11585 +  match(Set dst (MulF src con));
11565 11586  
11566      -  format %{ "FMULc $dst,$src1,$src2" %}
11567      -  opcode(0xD8, 0x1);  /* D8 /1*/
11568      -  ins_encode( Push_Reg_F(src1),
11569      -              Opc_MemImm_F(src2),
11570      -              Pop_Mem_F(dst));
11571      -  ins_pipe( fpu_mem_reg_con );
     11587 +  format %{ "FLD    $src\n\t"
     11588 +            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
     11589 +            "FSTP_S $dst"  %}
     11590 +  ins_encode %{
     11591 +    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
     11592 +    __ fmul_s($constantaddress($con));
     11593 +    __ fstp_s(Address(rsp, $dst$$disp));
     11594 +  %}
     11595 +  ins_pipe(fpu_mem_reg_con);
11572 11596  %}
11573 11597  //
11574 11598  // This instruction does not round to 24-bits
11575      -instruct mulF_reg_imm(regF dst, regF src1, immF src2) %{
     11599 +instruct mulF_reg_imm(regF dst, regF src, immF con) %{
11576 11600    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11577      -  match(Set dst (MulF src1 src2));
     11601 +  match(Set dst (MulF src con));
11578 11602  
11579      -  format %{ "FMULc $dst. $src1, $src2" %}
11580      -  opcode(0xD8, 0x1);  /* D8 /1*/
11581      -  ins_encode( Push_Reg_F(src1),
11582      -              Opc_MemImm_F(src2),
11583      -              Pop_Reg_F(dst));
11584      -  ins_pipe( fpu_reg_reg_con );
     11603 +  format %{ "FLD    $src\n\t"
     11604 +            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
     11605 +            "FSTP   $dst"  %}
     11606 +  ins_encode %{
     11607 +    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
     11608 +    __ fmul_s($constantaddress($con));
     11609 +    __ fstp_d($dst$$reg);
     11610 +  %}
     11611 +  ins_pipe(fpu_reg_reg_con);
11585 11612  %}
11586 11613  
11587 11614  
11588 11615  //
11589 11616  // MACRO1 -- subsume unshared load into mulF
11590 11617  // This instruction does not round to 24-bits
11591 11618  instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
11592 11619    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11593 11620    match(Set dst (MulF (LoadF mem1) src));
11594 11621

11595 11622    format %{ "FLD    $mem1    ===MACRO1===\n\t"
11596 11623              "FMUL   ST,$src\n\t"
11597 11624              "FSTP   $dst" %}
11598 11625    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
11599 11626    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
11600 11627                OpcReg_F(src),
11601 11628                Pop_Reg_F(dst) );
11602 11629    ins_pipe( fpu_reg_reg_mem );
11603 11630  %}
11604 11631  //
11605 11632  // MACRO2 -- addF a mulF which subsumed an unshared load
11606 11633  // This instruction does not round to 24-bits
11607 11634  instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
11608 11635    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11609 11636    match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
11610 11637    ins_cost(95);
11611 11638  
11612 11639    format %{ "FLD    $mem1     ===MACRO2===\n\t"
11613 11640              "FMUL   ST,$src1  subsume mulF left load\n\t"
11614 11641              "FADD   ST,$src2\n\t"
11615 11642              "FSTP   $dst" %}
11616 11643    opcode(0xD9); /* LoadF D9 /0 */
11617 11644    ins_encode( OpcP, RMopc_Mem(0x00,mem1),
11618 11645                FMul_ST_reg(src1),
11619 11646                FAdd_ST_reg(src2),
11620 11647                Pop_Reg_F(dst) );
11621 11648    ins_pipe( fpu_reg_mem_reg_reg );
11622 11649  %}
11623 11650  
11624 11651  // MACRO3 -- addF a mulF
11625 11652  // This instruction does not round to 24-bits.  It is a '2-address'
11626 11653  // instruction in that the result goes back to src2.  This eliminates
11627 11654  // a move from the macro; possibly the register allocator will have
11628 11655  // to add it back (and maybe not).
11629 11656  instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
11630 11657    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11631 11658    match(Set src2 (AddF (MulF src0 src1) src2));
11632 11659  
11633 11660    format %{ "FLD    $src0     ===MACRO3===\n\t"
11634 11661              "FMUL   ST,$src1\n\t"
11635 11662              "FADDP  $src2,ST" %}
11636 11663    opcode(0xD9); /* LoadF D9 /0 */
11637 11664    ins_encode( Push_Reg_F(src0),
11638 11665                FMul_ST_reg(src1),
11639 11666                FAddP_reg_ST(src2) );
11640 11667    ins_pipe( fpu_reg_reg_reg );
11641 11668  %}
11642 11669  
11643 11670  // MACRO4 -- divF subF
11644 11671  // This instruction does not round to 24-bits
11645 11672  instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
11646 11673    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11647 11674    match(Set dst (DivF (SubF src2 src1) src3));
11648 11675  
11649 11676    format %{ "FLD    $src2   ===MACRO4===\n\t"
11650 11677              "FSUB   ST,$src1\n\t"
11651 11678              "FDIV   ST,$src3\n\t"
11652 11679              "FSTP  $dst" %}
11653 11680    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11654 11681    ins_encode( Push_Reg_F(src2),
11655 11682                subF_divF_encode(src1,src3),
11656 11683                Pop_Reg_F(dst) );
11657 11684    ins_pipe( fpu_reg_reg_reg_reg );
11658 11685  %}
11659 11686  
11660 11687  // Spill to obtain 24-bit precision
11661 11688  instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
11662 11689    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11663 11690    match(Set dst (DivF src1 src2));
11664 11691  
11665 11692    format %{ "FDIV   $dst,$src1,$src2" %}
11666 11693    opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
11667 11694    ins_encode( Push_Reg_F(src1),
11668 11695                OpcReg_F(src2),
11669 11696                Pop_Mem_F(dst) );
11670 11697    ins_pipe( fpu_mem_reg_reg );
11671 11698  %}
11672 11699  //
11673 11700  // This instruction does not round to 24-bits
11674 11701  instruct divF_reg(regF dst, regF src) %{
11675 11702    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11676 11703    match(Set dst (DivF dst src));
11677 11704  
11678 11705    format %{ "FDIV   $dst,$src" %}
11679 11706    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11680 11707    ins_encode( Push_Reg_F(src),
11681 11708                OpcP, RegOpc(dst) );
11682 11709    ins_pipe( fpu_reg_reg );
11683 11710  %}
11684 11711  
11685 11712  
11686 11713  // Spill to obtain 24-bit precision
11687 11714  instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11688 11715    predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11689 11716    match(Set dst (ModF src1 src2));
11690 11717    effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11691 11718  
11692 11719    format %{ "FMOD   $dst,$src1,$src2" %}
11693 11720    ins_encode( Push_Reg_Mod_D(src1, src2),
11694 11721                emitModD(),
11695 11722                Push_Result_Mod_D(src2),
11696 11723                Pop_Mem_F(dst));
11697 11724    ins_pipe( pipe_slow );
11698 11725  %}
11699 11726  //
11700 11727  // This instruction does not round to 24-bits
11701 11728  instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
11702 11729    predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11703 11730    match(Set dst (ModF dst src));
11704 11731    effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11705 11732  
11706 11733    format %{ "FMOD   $dst,$src" %}
11707 11734    ins_encode(Push_Reg_Mod_D(dst, src),
11708 11735                emitModD(),
11709 11736                Push_Result_Mod_D(src),
11710 11737                Pop_Reg_F(dst));
11711 11738    ins_pipe( pipe_slow );
11712 11739  %}
11713 11740  
11714 11741  instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
11715 11742    predicate(UseSSE>=1);
11716 11743    match(Set dst (ModF src0 src1));
11717 11744    effect(KILL rax, KILL cr);
11718 11745    format %{ "SUB    ESP,4\t # FMOD\n"
11719 11746            "\tMOVSS  [ESP+0],$src1\n"
11720 11747            "\tFLD_S  [ESP+0]\n"
11721 11748            "\tMOVSS  [ESP+0],$src0\n"
11722 11749            "\tFLD_S  [ESP+0]\n"
11723 11750       "loop:\tFPREM\n"
11724 11751            "\tFWAIT\n"
11725 11752            "\tFNSTSW AX\n"
11726 11753            "\tSAHF\n"
11727 11754            "\tJP     loop\n"
11728 11755            "\tFSTP_S [ESP+0]\n"
11729 11756            "\tMOVSS  $dst,[ESP+0]\n"
11730 11757            "\tADD    ESP,4\n"
11731 11758            "\tFSTP   ST0\t # Restore FPU Stack"
11732 11759      %}
11733 11760    ins_cost(250);
11734 11761    ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
11735 11762    ins_pipe( pipe_slow );
11736 11763  %}
11737 11764  
11738 11765  
11739 11766  //----------Arithmetic Conversion Instructions---------------------------------
11740 11767  // The conversions operations are all Alpha sorted.  Please keep it that way!
11741 11768  
11742 11769  instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
11743 11770    predicate(UseSSE==0);
11744 11771    match(Set dst (RoundFloat src));
11745 11772    ins_cost(125);
11746 11773    format %{ "FST_S  $dst,$src\t# F-round" %}
11747 11774    ins_encode( Pop_Mem_Reg_F(dst, src) );
11748 11775    ins_pipe( fpu_mem_reg );
11749 11776  %}
11750 11777  
11751 11778  instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
11752 11779    predicate(UseSSE<=1);
11753 11780    match(Set dst (RoundDouble src));
11754 11781    ins_cost(125);
11755 11782    format %{ "FST_D  $dst,$src\t# D-round" %}
11756 11783    ins_encode( Pop_Mem_Reg_D(dst, src) );
11757 11784    ins_pipe( fpu_mem_reg );
11758 11785  %}
11759 11786  
11760 11787  // Force rounding to 24-bit precision and 6-bit exponent
11761 11788  instruct convD2F_reg(stackSlotF dst, regD src) %{
11762 11789    predicate(UseSSE==0);
11763 11790    match(Set dst (ConvD2F src));
11764 11791    format %{ "FST_S  $dst,$src\t# F-round" %}
11765 11792    expand %{
11766 11793      roundFloat_mem_reg(dst,src);
11767 11794    %}
11768 11795  %}
11769 11796  
11770 11797  // Force rounding to 24-bit precision and 6-bit exponent
11771 11798  instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11772 11799    predicate(UseSSE==1);
11773 11800    match(Set dst (ConvD2F src));
11774 11801    effect( KILL cr );
11775 11802    format %{ "SUB    ESP,4\n\t"
11776 11803              "FST_S  [ESP],$src\t# F-round\n\t"
11777 11804              "MOVSS  $dst,[ESP]\n\t"
11778 11805              "ADD ESP,4" %}
11779 11806    ins_encode( D2X_encoding(dst, src) );
11780 11807    ins_pipe( pipe_slow );
11781 11808  %}
11782 11809  
11783 11810  // Force rounding double precision to single precision
11784 11811  instruct convXD2X_reg(regX dst, regXD src) %{
11785 11812    predicate(UseSSE>=2);
11786 11813    match(Set dst (ConvD2F src));
11787 11814    format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11788 11815    opcode(0xF2, 0x0F, 0x5A);
11789 11816    ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11790 11817    ins_pipe( pipe_slow );
11791 11818  %}
11792 11819  
11793 11820  instruct convF2D_reg_reg(regD dst, regF src) %{
11794 11821    predicate(UseSSE==0);
11795 11822    match(Set dst (ConvF2D src));
11796 11823    format %{ "FST_S  $dst,$src\t# D-round" %}
11797 11824    ins_encode( Pop_Reg_Reg_D(dst, src));
11798 11825    ins_pipe( fpu_reg_reg );
11799 11826  %}
11800 11827  
11801 11828  instruct convF2D_reg(stackSlotD dst, regF src) %{
11802 11829    predicate(UseSSE==1);
11803 11830    match(Set dst (ConvF2D src));
11804 11831    format %{ "FST_D  $dst,$src\t# D-round" %}
11805 11832    expand %{
11806 11833      roundDouble_mem_reg(dst,src);
11807 11834    %}
11808 11835  %}
11809 11836  
11810 11837  instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11811 11838    predicate(UseSSE==1);
11812 11839    match(Set dst (ConvF2D src));
11813 11840    effect( KILL cr );
11814 11841    format %{ "SUB    ESP,4\n\t"
11815 11842              "MOVSS  [ESP] $src\n\t"
11816 11843              "FLD_S  [ESP]\n\t"
11817 11844              "ADD    ESP,4\n\t"
11818 11845              "FSTP   $dst\t# D-round" %}
11819 11846    ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
11820 11847    ins_pipe( pipe_slow );
11821 11848  %}
11822 11849  
11823 11850  instruct convX2XD_reg(regXD dst, regX src) %{
11824 11851    predicate(UseSSE>=2);
11825 11852    match(Set dst (ConvF2D src));
11826 11853    format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11827 11854    opcode(0xF3, 0x0F, 0x5A);
11828 11855    ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11829 11856    ins_pipe( pipe_slow );
11830 11857  %}
11831 11858  
11832 11859  // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11833 11860  instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11834 11861    predicate(UseSSE<=1);
11835 11862    match(Set dst (ConvD2I src));
11836 11863    effect( KILL tmp, KILL cr );
11837 11864    format %{ "FLD    $src\t# Convert double to int \n\t"
11838 11865              "FLDCW  trunc mode\n\t"
11839 11866              "SUB    ESP,4\n\t"
11840 11867              "FISTp  [ESP + #0]\n\t"
11841 11868              "FLDCW  std/24-bit mode\n\t"
11842 11869              "POP    EAX\n\t"
11843 11870              "CMP    EAX,0x80000000\n\t"
11844 11871              "JNE,s  fast\n\t"
11845 11872              "FLD_D  $src\n\t"
11846 11873              "CALL   d2i_wrapper\n"
11847 11874        "fast:" %}
11848 11875    ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11849 11876    ins_pipe( pipe_slow );
11850 11877  %}
11851 11878  
11852 11879  // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11853 11880  instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11854 11881    predicate(UseSSE>=2);
11855 11882    match(Set dst (ConvD2I src));
11856 11883    effect( KILL tmp, KILL cr );
11857 11884    format %{ "CVTTSD2SI $dst, $src\n\t"
11858 11885              "CMP    $dst,0x80000000\n\t"
11859 11886              "JNE,s  fast\n\t"
11860 11887              "SUB    ESP, 8\n\t"
11861 11888              "MOVSD  [ESP], $src\n\t"
11862 11889              "FLD_D  [ESP]\n\t"
11863 11890              "ADD    ESP, 8\n\t"
11864 11891              "CALL   d2i_wrapper\n"
11865 11892        "fast:" %}
11866 11893    opcode(0x1); // double-precision conversion
11867 11894    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11868 11895    ins_pipe( pipe_slow );
11869 11896  %}
11870 11897  
11871 11898  instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11872 11899    predicate(UseSSE<=1);
11873 11900    match(Set dst (ConvD2L src));
11874 11901    effect( KILL cr );
11875 11902    format %{ "FLD    $src\t# Convert double to long\n\t"
11876 11903              "FLDCW  trunc mode\n\t"
11877 11904              "SUB    ESP,8\n\t"
11878 11905              "FISTp  [ESP + #0]\n\t"
11879 11906              "FLDCW  std/24-bit mode\n\t"
11880 11907              "POP    EAX\n\t"
11881 11908              "POP    EDX\n\t"
11882 11909              "CMP    EDX,0x80000000\n\t"
11883 11910              "JNE,s  fast\n\t"
11884 11911              "TEST   EAX,EAX\n\t"
11885 11912              "JNE,s  fast\n\t"
11886 11913              "FLD    $src\n\t"
11887 11914              "CALL   d2l_wrapper\n"
11888 11915        "fast:" %}
11889 11916    ins_encode( Push_Reg_D(src),  D2L_encoding(src) );
11890 11917    ins_pipe( pipe_slow );
11891 11918  %}
11892 11919  
11893 11920  // XMM lacks a float/double->long conversion, so use the old FPU stack.
11894 11921  instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11895 11922    predicate (UseSSE>=2);
11896 11923    match(Set dst (ConvD2L src));
11897 11924    effect( KILL cr );
11898 11925    format %{ "SUB    ESP,8\t# Convert double to long\n\t"
11899 11926              "MOVSD  [ESP],$src\n\t"
11900 11927              "FLD_D  [ESP]\n\t"
11901 11928              "FLDCW  trunc mode\n\t"
11902 11929              "FISTp  [ESP + #0]\n\t"
11903 11930              "FLDCW  std/24-bit mode\n\t"
11904 11931              "POP    EAX\n\t"
11905 11932              "POP    EDX\n\t"
11906 11933              "CMP    EDX,0x80000000\n\t"
11907 11934              "JNE,s  fast\n\t"
11908 11935              "TEST   EAX,EAX\n\t"
11909 11936              "JNE,s  fast\n\t"
11910 11937              "SUB    ESP,8\n\t"
11911 11938              "MOVSD  [ESP],$src\n\t"
11912 11939              "FLD_D  [ESP]\n\t"
11913 11940              "CALL   d2l_wrapper\n"
11914 11941        "fast:" %}
11915 11942    ins_encode( XD2L_encoding(src) );
11916 11943    ins_pipe( pipe_slow );
11917 11944  %}
11918 11945  
11919 11946  // Convert a double to an int.  Java semantics require we do complex
11920 11947  // manglations in the corner cases.  So we set the rounding mode to
11921 11948  // 'zero', store the darned double down as an int, and reset the
11922 11949  // rounding mode to 'nearest'.  The hardware stores a flag value down
11923 11950  // if we would overflow or converted a NAN; we check for this and
11924 11951  // and go the slow path if needed.
11925 11952  instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11926 11953    predicate(UseSSE==0);
11927 11954    match(Set dst (ConvF2I src));
11928 11955    effect( KILL tmp, KILL cr );
11929 11956    format %{ "FLD    $src\t# Convert float to int \n\t"
11930 11957              "FLDCW  trunc mode\n\t"
11931 11958              "SUB    ESP,4\n\t"
11932 11959              "FISTp  [ESP + #0]\n\t"
11933 11960              "FLDCW  std/24-bit mode\n\t"
11934 11961              "POP    EAX\n\t"
11935 11962              "CMP    EAX,0x80000000\n\t"
11936 11963              "JNE,s  fast\n\t"
11937 11964              "FLD    $src\n\t"
11938 11965              "CALL   d2i_wrapper\n"
11939 11966        "fast:" %}
11940 11967    // D2I_encoding works for F2I
11941 11968    ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11942 11969    ins_pipe( pipe_slow );
11943 11970  %}
11944 11971  
11945 11972  // Convert a float in xmm to an int reg.
11946 11973  instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11947 11974    predicate(UseSSE>=1);
11948 11975    match(Set dst (ConvF2I src));
11949 11976    effect( KILL tmp, KILL cr );
11950 11977    format %{ "CVTTSS2SI $dst, $src\n\t"
11951 11978              "CMP    $dst,0x80000000\n\t"
11952 11979              "JNE,s  fast\n\t"
11953 11980              "SUB    ESP, 4\n\t"
11954 11981              "MOVSS  [ESP], $src\n\t"
11955 11982              "FLD    [ESP]\n\t"
11956 11983              "ADD    ESP, 4\n\t"
11957 11984              "CALL   d2i_wrapper\n"
11958 11985        "fast:" %}
11959 11986    opcode(0x0); // single-precision conversion
11960 11987    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11961 11988    ins_pipe( pipe_slow );
11962 11989  %}
11963 11990  
11964 11991  instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11965 11992    predicate(UseSSE==0);
11966 11993    match(Set dst (ConvF2L src));
11967 11994    effect( KILL cr );
11968 11995    format %{ "FLD    $src\t# Convert float to long\n\t"
11969 11996              "FLDCW  trunc mode\n\t"
11970 11997              "SUB    ESP,8\n\t"
11971 11998              "FISTp  [ESP + #0]\n\t"
11972 11999              "FLDCW  std/24-bit mode\n\t"
11973 12000              "POP    EAX\n\t"
11974 12001              "POP    EDX\n\t"
11975 12002              "CMP    EDX,0x80000000\n\t"
11976 12003              "JNE,s  fast\n\t"
11977 12004              "TEST   EAX,EAX\n\t"
11978 12005              "JNE,s  fast\n\t"
11979 12006              "FLD    $src\n\t"
11980 12007              "CALL   d2l_wrapper\n"
11981 12008        "fast:" %}
11982 12009    // D2L_encoding works for F2L
11983 12010    ins_encode( Push_Reg_F(src), D2L_encoding(src) );
11984 12011    ins_pipe( pipe_slow );
11985 12012  %}
11986 12013  
11987 12014  // XMM lacks a float/double->long conversion, so use the old FPU stack.
11988 12015  instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
11989 12016    predicate (UseSSE>=1);
11990 12017    match(Set dst (ConvF2L src));
11991 12018    effect( KILL cr );
11992 12019    format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11993 12020              "MOVSS  [ESP],$src\n\t"
11994 12021              "FLD_S  [ESP]\n\t"
11995 12022              "FLDCW  trunc mode\n\t"
11996 12023              "FISTp  [ESP + #0]\n\t"
11997 12024              "FLDCW  std/24-bit mode\n\t"
11998 12025              "POP    EAX\n\t"
11999 12026              "POP    EDX\n\t"
12000 12027              "CMP    EDX,0x80000000\n\t"
12001 12028              "JNE,s  fast\n\t"
12002 12029              "TEST   EAX,EAX\n\t"
12003 12030              "JNE,s  fast\n\t"
12004 12031              "SUB    ESP,4\t# Convert float to long\n\t"
12005 12032              "MOVSS  [ESP],$src\n\t"
12006 12033              "FLD_S  [ESP]\n\t"
12007 12034              "ADD    ESP,4\n\t"
12008 12035              "CALL   d2l_wrapper\n"
12009 12036        "fast:" %}
12010 12037    ins_encode( X2L_encoding(src) );
12011 12038    ins_pipe( pipe_slow );
12012 12039  %}
12013 12040  
12014 12041  instruct convI2D_reg(regD dst, stackSlotI src) %{
12015 12042    predicate( UseSSE<=1 );
12016 12043    match(Set dst (ConvI2D src));
12017 12044    format %{ "FILD   $src\n\t"
12018 12045              "FSTP   $dst" %}
12019 12046    opcode(0xDB, 0x0);  /* DB /0 */
12020 12047    ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
12021 12048    ins_pipe( fpu_reg_mem );
12022 12049  %}
12023 12050  
12024 12051  instruct convI2XD_reg(regXD dst, eRegI src) %{
12025 12052    predicate( UseSSE>=2 && !UseXmmI2D );
12026 12053    match(Set dst (ConvI2D src));
12027 12054    format %{ "CVTSI2SD $dst,$src" %}
12028 12055    opcode(0xF2, 0x0F, 0x2A);
12029 12056    ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12030 12057    ins_pipe( pipe_slow );
12031 12058  %}
12032 12059  
12033 12060  instruct convI2XD_mem(regXD dst, memory mem) %{
12034 12061    predicate( UseSSE>=2 );
12035 12062    match(Set dst (ConvI2D (LoadI mem)));
12036 12063    format %{ "CVTSI2SD $dst,$mem" %}
12037 12064    opcode(0xF2, 0x0F, 0x2A);
12038 12065    ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
12039 12066    ins_pipe( pipe_slow );
12040 12067  %}
12041 12068  
12042 12069  instruct convXI2XD_reg(regXD dst, eRegI src)
12043 12070  %{
12044 12071    predicate( UseSSE>=2 && UseXmmI2D );
12045 12072    match(Set dst (ConvI2D src));
12046 12073  
12047 12074    format %{ "MOVD  $dst,$src\n\t"
12048 12075              "CVTDQ2PD $dst,$dst\t# i2d" %}
12049 12076    ins_encode %{
12050 12077      __ movdl($dst$$XMMRegister, $src$$Register);
12051 12078      __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
12052 12079    %}
12053 12080    ins_pipe(pipe_slow); // XXX
12054 12081  %}
12055 12082  
12056 12083  instruct convI2D_mem(regD dst, memory mem) %{
12057 12084    predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
12058 12085    match(Set dst (ConvI2D (LoadI mem)));
12059 12086    format %{ "FILD   $mem\n\t"
12060 12087              "FSTP   $dst" %}
12061 12088    opcode(0xDB);      /* DB /0 */
12062 12089    ins_encode( OpcP, RMopc_Mem(0x00,mem),
12063 12090                Pop_Reg_D(dst));
12064 12091    ins_pipe( fpu_reg_mem );
12065 12092  %}
12066 12093  
12067 12094  // Convert a byte to a float; no rounding step needed.
12068 12095  instruct conv24I2F_reg(regF dst, stackSlotI src) %{
12069 12096    predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
12070 12097    match(Set dst (ConvI2F src));
12071 12098    format %{ "FILD   $src\n\t"
12072 12099              "FSTP   $dst" %}
12073 12100  
12074 12101    opcode(0xDB, 0x0);  /* DB /0 */
12075 12102    ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
12076 12103    ins_pipe( fpu_reg_mem );
12077 12104  %}
12078 12105  
12079 12106  // In 24-bit mode, force exponent rounding by storing back out
12080 12107  instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
12081 12108    predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12082 12109    match(Set dst (ConvI2F src));
12083 12110    ins_cost(200);
12084 12111    format %{ "FILD   $src\n\t"
12085 12112              "FSTP_S $dst" %}
12086 12113    opcode(0xDB, 0x0);  /* DB /0 */
12087 12114    ins_encode( Push_Mem_I(src),
12088 12115                Pop_Mem_F(dst));
12089 12116    ins_pipe( fpu_mem_mem );
12090 12117  %}
12091 12118  
12092 12119  // In 24-bit mode, force exponent rounding by storing back out
12093 12120  instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
12094 12121    predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12095 12122    match(Set dst (ConvI2F (LoadI mem)));
12096 12123    ins_cost(200);
12097 12124    format %{ "FILD   $mem\n\t"
12098 12125              "FSTP_S $dst" %}
12099 12126    opcode(0xDB);  /* DB /0 */
12100 12127    ins_encode( OpcP, RMopc_Mem(0x00,mem),
12101 12128                Pop_Mem_F(dst));
12102 12129    ins_pipe( fpu_mem_mem );
12103 12130  %}
12104 12131  
12105 12132  // This instruction does not round to 24-bits
12106 12133  instruct convI2F_reg(regF dst, stackSlotI src) %{
12107 12134    predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12108 12135    match(Set dst (ConvI2F src));
12109 12136    format %{ "FILD   $src\n\t"
12110 12137              "FSTP   $dst" %}
12111 12138    opcode(0xDB, 0x0);  /* DB /0 */
12112 12139    ins_encode( Push_Mem_I(src),
12113 12140                Pop_Reg_F(dst));
12114 12141    ins_pipe( fpu_reg_mem );
12115 12142  %}
12116 12143  
12117 12144  // This instruction does not round to 24-bits
12118 12145  instruct convI2F_mem(regF dst, memory mem) %{
12119 12146    predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12120 12147    match(Set dst (ConvI2F (LoadI mem)));
12121 12148    format %{ "FILD   $mem\n\t"
12122 12149              "FSTP   $dst" %}
12123 12150    opcode(0xDB);      /* DB /0 */
12124 12151    ins_encode( OpcP, RMopc_Mem(0x00,mem),
12125 12152                Pop_Reg_F(dst));
12126 12153    ins_pipe( fpu_reg_mem );
12127 12154  %}
12128 12155  
12129 12156  // Convert an int to a float in xmm; no rounding step needed.
12130 12157  instruct convI2X_reg(regX dst, eRegI src) %{
12131 12158    predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
12132 12159    match(Set dst (ConvI2F src));
12133 12160    format %{ "CVTSI2SS $dst, $src" %}
12134 12161  
12135 12162    opcode(0xF3, 0x0F, 0x2A);  /* F3 0F 2A /r */
12136 12163    ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12137 12164    ins_pipe( pipe_slow );
12138 12165  %}
12139 12166  
12140 12167   instruct convXI2X_reg(regX dst, eRegI src)
12141 12168  %{
12142 12169    predicate( UseSSE>=2 && UseXmmI2F );
12143 12170    match(Set dst (ConvI2F src));
12144 12171  
12145 12172    format %{ "MOVD  $dst,$src\n\t"
12146 12173              "CVTDQ2PS $dst,$dst\t# i2f" %}
12147 12174    ins_encode %{
12148 12175      __ movdl($dst$$XMMRegister, $src$$Register);
12149 12176      __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
12150 12177    %}
12151 12178    ins_pipe(pipe_slow); // XXX
12152 12179  %}
12153 12180  
12154 12181  instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
12155 12182    match(Set dst (ConvI2L src));
12156 12183    effect(KILL cr);
12157 12184    ins_cost(375);
12158 12185    format %{ "MOV    $dst.lo,$src\n\t"
12159 12186              "MOV    $dst.hi,$src\n\t"
12160 12187              "SAR    $dst.hi,31" %}
12161 12188    ins_encode(convert_int_long(dst,src));
12162 12189    ins_pipe( ialu_reg_reg_long );
12163 12190  %}
12164 12191  
12165 12192  // Zero-extend convert int to long
12166 12193  instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
12167 12194    match(Set dst (AndL (ConvI2L src) mask) );
12168 12195    effect( KILL flags );
12169 12196    ins_cost(250);
12170 12197    format %{ "MOV    $dst.lo,$src\n\t"
12171 12198              "XOR    $dst.hi,$dst.hi" %}
12172 12199    opcode(0x33); // XOR
12173 12200    ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12174 12201    ins_pipe( ialu_reg_reg_long );
12175 12202  %}
12176 12203  
12177 12204  // Zero-extend long
12178 12205  instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
12179 12206    match(Set dst (AndL src mask) );
12180 12207    effect( KILL flags );
12181 12208    ins_cost(250);
12182 12209    format %{ "MOV    $dst.lo,$src.lo\n\t"
12183 12210              "XOR    $dst.hi,$dst.hi\n\t" %}
12184 12211    opcode(0x33); // XOR
12185 12212    ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12186 12213    ins_pipe( ialu_reg_reg_long );
12187 12214  %}
12188 12215  
12189 12216  instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
12190 12217    predicate (UseSSE<=1);
12191 12218    match(Set dst (ConvL2D src));
12192 12219    effect( KILL cr );
12193 12220    format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
12194 12221              "PUSH   $src.lo\n\t"
12195 12222              "FILD   ST,[ESP + #0]\n\t"
12196 12223              "ADD    ESP,8\n\t"
12197 12224              "FSTP_D $dst\t# D-round" %}
12198 12225    opcode(0xDF, 0x5);  /* DF /5 */
12199 12226    ins_encode(convert_long_double(src), Pop_Mem_D(dst));
12200 12227    ins_pipe( pipe_slow );
12201 12228  %}
12202 12229  
12203 12230  instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
12204 12231    predicate (UseSSE>=2);
12205 12232    match(Set dst (ConvL2D src));
12206 12233    effect( KILL cr );
12207 12234    format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
12208 12235              "PUSH   $src.lo\n\t"
12209 12236              "FILD_D [ESP]\n\t"
12210 12237              "FSTP_D [ESP]\n\t"
12211 12238              "MOVSD  $dst,[ESP]\n\t"
12212 12239              "ADD    ESP,8" %}
12213 12240    opcode(0xDF, 0x5);  /* DF /5 */
12214 12241    ins_encode(convert_long_double2(src), Push_ResultXD(dst));
12215 12242    ins_pipe( pipe_slow );
12216 12243  %}
12217 12244  
12218 12245  instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
12219 12246    predicate (UseSSE>=1);
12220 12247    match(Set dst (ConvL2F src));
12221 12248    effect( KILL cr );
12222 12249    format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
12223 12250              "PUSH   $src.lo\n\t"
12224 12251              "FILD_D [ESP]\n\t"
12225 12252              "FSTP_S [ESP]\n\t"
12226 12253              "MOVSS  $dst,[ESP]\n\t"
12227 12254              "ADD    ESP,8" %}
12228 12255    opcode(0xDF, 0x5);  /* DF /5 */
12229 12256    ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
12230 12257    ins_pipe( pipe_slow );
12231 12258  %}
12232 12259  
12233 12260  instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
12234 12261    match(Set dst (ConvL2F src));
12235 12262    effect( KILL cr );
12236 12263    format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
12237 12264              "PUSH   $src.lo\n\t"
12238 12265              "FILD   ST,[ESP + #0]\n\t"
12239 12266              "ADD    ESP,8\n\t"
12240 12267              "FSTP_S $dst\t# F-round" %}
12241 12268    opcode(0xDF, 0x5);  /* DF /5 */
12242 12269    ins_encode(convert_long_double(src), Pop_Mem_F(dst));
12243 12270    ins_pipe( pipe_slow );
12244 12271  %}
12245 12272  
12246 12273  instruct convL2I_reg( eRegI dst, eRegL src ) %{
12247 12274    match(Set dst (ConvL2I src));
12248 12275    effect( DEF dst, USE src );
12249 12276    format %{ "MOV    $dst,$src.lo" %}
12250 12277    ins_encode(enc_CopyL_Lo(dst,src));
12251 12278    ins_pipe( ialu_reg_reg );
12252 12279  %}
12253 12280  
12254 12281  
12255 12282  instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12256 12283    match(Set dst (MoveF2I src));
12257 12284    effect( DEF dst, USE src );
12258 12285    ins_cost(100);
12259 12286    format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
12260 12287    opcode(0x8B);
12261 12288    ins_encode( OpcP, RegMem(dst,src));
12262 12289    ins_pipe( ialu_reg_mem );
12263 12290  %}
12264 12291  
12265 12292  instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
12266 12293    predicate(UseSSE==0);
12267 12294    match(Set dst (MoveF2I src));
12268 12295    effect( DEF dst, USE src );
12269 12296  
12270 12297    ins_cost(125);
12271 12298    format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
12272 12299    ins_encode( Pop_Mem_Reg_F(dst, src) );
12273 12300    ins_pipe( fpu_mem_reg );
12274 12301  %}
12275 12302  
12276 12303  instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
12277 12304    predicate(UseSSE>=1);
12278 12305    match(Set dst (MoveF2I src));
12279 12306    effect( DEF dst, USE src );
12280 12307  
12281 12308    ins_cost(95);
12282 12309    format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
12283 12310    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
12284 12311    ins_pipe( pipe_slow );
12285 12312  %}
12286 12313  
12287 12314  instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
12288 12315    predicate(UseSSE>=2);
12289 12316    match(Set dst (MoveF2I src));
12290 12317    effect( DEF dst, USE src );
12291 12318    ins_cost(85);
12292 12319    format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
12293 12320    ins_encode( MovX2I_reg(dst, src));
12294 12321    ins_pipe( pipe_slow );
12295 12322  %}
12296 12323  
12297 12324  instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12298 12325    match(Set dst (MoveI2F src));
12299 12326    effect( DEF dst, USE src );
12300 12327  
12301 12328    ins_cost(100);
12302 12329    format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
12303 12330    opcode(0x89);
12304 12331    ins_encode( OpcPRegSS( dst, src ) );
12305 12332    ins_pipe( ialu_mem_reg );
12306 12333  %}
12307 12334  
12308 12335  
12309 12336  instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
12310 12337    predicate(UseSSE==0);
12311 12338    match(Set dst (MoveI2F src));
12312 12339    effect(DEF dst, USE src);
12313 12340  
12314 12341    ins_cost(125);
12315 12342    format %{ "FLD_S  $src\n\t"
12316 12343              "FSTP   $dst\t# MoveI2F_stack_reg" %}
12317 12344    opcode(0xD9);               /* D9 /0, FLD m32real */
12318 12345    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12319 12346                Pop_Reg_F(dst) );
12320 12347    ins_pipe( fpu_reg_mem );
12321 12348  %}
12322 12349  
12323 12350  instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
12324 12351    predicate(UseSSE>=1);
12325 12352    match(Set dst (MoveI2F src));
12326 12353    effect( DEF dst, USE src );
12327 12354  
12328 12355    ins_cost(95);
12329 12356    format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
12330 12357    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12331 12358    ins_pipe( pipe_slow );
12332 12359  %}
12333 12360  
12334 12361  instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
12335 12362    predicate(UseSSE>=2);
12336 12363    match(Set dst (MoveI2F src));
12337 12364    effect( DEF dst, USE src );
12338 12365  
12339 12366    ins_cost(85);
12340 12367    format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
12341 12368    ins_encode( MovI2X_reg(dst, src) );
12342 12369    ins_pipe( pipe_slow );
12343 12370  %}
12344 12371  
12345 12372  instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12346 12373    match(Set dst (MoveD2L src));
12347 12374    effect(DEF dst, USE src);
12348 12375  
12349 12376    ins_cost(250);
12350 12377    format %{ "MOV    $dst.lo,$src\n\t"
12351 12378              "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12352 12379    opcode(0x8B, 0x8B);
12353 12380    ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12354 12381    ins_pipe( ialu_mem_long_reg );
12355 12382  %}
12356 12383  
12357 12384  instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12358 12385    predicate(UseSSE<=1);
12359 12386    match(Set dst (MoveD2L src));
12360 12387    effect(DEF dst, USE src);
12361 12388  
12362 12389    ins_cost(125);
12363 12390    format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
12364 12391    ins_encode( Pop_Mem_Reg_D(dst, src) );
12365 12392    ins_pipe( fpu_mem_reg );
12366 12393  %}
12367 12394  
12368 12395  instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12369 12396    predicate(UseSSE>=2);
12370 12397    match(Set dst (MoveD2L src));
12371 12398    effect(DEF dst, USE src);
12372 12399    ins_cost(95);
12373 12400  
12374 12401    format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
12375 12402    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
12376 12403    ins_pipe( pipe_slow );
12377 12404  %}
12378 12405  
12379 12406  instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12380 12407    predicate(UseSSE>=2);
12381 12408    match(Set dst (MoveD2L src));
12382 12409    effect(DEF dst, USE src, TEMP tmp);
12383 12410    ins_cost(85);
12384 12411    format %{ "MOVD   $dst.lo,$src\n\t"
12385 12412              "PSHUFLW $tmp,$src,0x4E\n\t"
12386 12413              "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12387 12414    ins_encode( MovXD2L_reg(dst, src, tmp) );
12388 12415    ins_pipe( pipe_slow );
12389 12416  %}
12390 12417  
12391 12418  instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12392 12419    match(Set dst (MoveL2D src));
12393 12420    effect(DEF dst, USE src);
12394 12421  
12395 12422    ins_cost(200);
12396 12423    format %{ "MOV    $dst,$src.lo\n\t"
12397 12424              "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12398 12425    opcode(0x89, 0x89);
12399 12426    ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12400 12427    ins_pipe( ialu_mem_long_reg );
12401 12428  %}
12402 12429  
12403 12430  
12404 12431  instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12405 12432    predicate(UseSSE<=1);
12406 12433    match(Set dst (MoveL2D src));
12407 12434    effect(DEF dst, USE src);
12408 12435    ins_cost(125);
12409 12436  
12410 12437    format %{ "FLD_D  $src\n\t"
12411 12438              "FSTP   $dst\t# MoveL2D_stack_reg" %}
12412 12439    opcode(0xDD);               /* DD /0, FLD m64real */
12413 12440    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12414 12441                Pop_Reg_D(dst) );
12415 12442    ins_pipe( fpu_reg_mem );
12416 12443  %}
12417 12444  
12418 12445  
12419 12446  instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12420 12447    predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12421 12448    match(Set dst (MoveL2D src));
12422 12449    effect(DEF dst, USE src);
12423 12450  
12424 12451    ins_cost(95);
12425 12452    format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
12426 12453    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12427 12454    ins_pipe( pipe_slow );
12428 12455  %}
12429 12456  
12430 12457  instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12431 12458    predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12432 12459    match(Set dst (MoveL2D src));
12433 12460    effect(DEF dst, USE src);
12434 12461  
12435 12462    ins_cost(95);
12436 12463    format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12437 12464    ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
12438 12465    ins_pipe( pipe_slow );
12439 12466  %}
12440 12467  
12441 12468  instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12442 12469    predicate(UseSSE>=2);
12443 12470    match(Set dst (MoveL2D src));
12444 12471    effect(TEMP dst, USE src, TEMP tmp);
12445 12472    ins_cost(85);
12446 12473    format %{ "MOVD   $dst,$src.lo\n\t"
12447 12474              "MOVD   $tmp,$src.hi\n\t"
12448 12475              "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12449 12476    ins_encode( MovL2XD_reg(dst, src, tmp) );
12450 12477    ins_pipe( pipe_slow );
12451 12478  %}
12452 12479  
12453 12480  // Replicate scalar to packed byte (1 byte) values in xmm
12454 12481  instruct Repl8B_reg(regXD dst, regXD src) %{
12455 12482    predicate(UseSSE>=2);
12456 12483    match(Set dst (Replicate8B src));
12457 12484    format %{ "MOVDQA  $dst,$src\n\t"
12458 12485              "PUNPCKLBW $dst,$dst\n\t"
12459 12486              "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12460 12487    ins_encode( pshufd_8x8(dst, src));
12461 12488    ins_pipe( pipe_slow );
12462 12489  %}
12463 12490  
12464 12491  // Replicate scalar to packed byte (1 byte) values in xmm
12465 12492  instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12466 12493    predicate(UseSSE>=2);
12467 12494    match(Set dst (Replicate8B src));
12468 12495    format %{ "MOVD    $dst,$src\n\t"
12469 12496              "PUNPCKLBW $dst,$dst\n\t"
12470 12497              "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12471 12498    ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
12472 12499    ins_pipe( pipe_slow );
12473 12500  %}
12474 12501  
12475 12502  // Replicate scalar zero to packed byte (1 byte) values in xmm
12476 12503  instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12477 12504    predicate(UseSSE>=2);
12478 12505    match(Set dst (Replicate8B zero));
12479 12506    format %{ "PXOR  $dst,$dst\t! replicate8B" %}
12480 12507    ins_encode( pxor(dst, dst));
12481 12508    ins_pipe( fpu_reg_reg );
12482 12509  %}
12483 12510  
12484 12511  // Replicate scalar to packed shore (2 byte) values in xmm
12485 12512  instruct Repl4S_reg(regXD dst, regXD src) %{
12486 12513    predicate(UseSSE>=2);
12487 12514    match(Set dst (Replicate4S src));
12488 12515    format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12489 12516    ins_encode( pshufd_4x16(dst, src));
12490 12517    ins_pipe( fpu_reg_reg );
12491 12518  %}
12492 12519  
12493 12520  // Replicate scalar to packed shore (2 byte) values in xmm
12494 12521  instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12495 12522    predicate(UseSSE>=2);
12496 12523    match(Set dst (Replicate4S src));
12497 12524    format %{ "MOVD    $dst,$src\n\t"
12498 12525              "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12499 12526    ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12500 12527    ins_pipe( fpu_reg_reg );
12501 12528  %}
12502 12529  
12503 12530  // Replicate scalar zero to packed short (2 byte) values in xmm
12504 12531  instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12505 12532    predicate(UseSSE>=2);
12506 12533    match(Set dst (Replicate4S zero));
12507 12534    format %{ "PXOR  $dst,$dst\t! replicate4S" %}
12508 12535    ins_encode( pxor(dst, dst));
12509 12536    ins_pipe( fpu_reg_reg );
12510 12537  %}
12511 12538  
12512 12539  // Replicate scalar to packed char (2 byte) values in xmm
12513 12540  instruct Repl4C_reg(regXD dst, regXD src) %{
12514 12541    predicate(UseSSE>=2);
12515 12542    match(Set dst (Replicate4C src));
12516 12543    format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12517 12544    ins_encode( pshufd_4x16(dst, src));
12518 12545    ins_pipe( fpu_reg_reg );
12519 12546  %}
12520 12547  
12521 12548  // Replicate scalar to packed char (2 byte) values in xmm
12522 12549  instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12523 12550    predicate(UseSSE>=2);
12524 12551    match(Set dst (Replicate4C src));
12525 12552    format %{ "MOVD    $dst,$src\n\t"
12526 12553              "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12527 12554    ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12528 12555    ins_pipe( fpu_reg_reg );
12529 12556  %}
12530 12557  
12531 12558  // Replicate scalar zero to packed char (2 byte) values in xmm
12532 12559  instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12533 12560    predicate(UseSSE>=2);
12534 12561    match(Set dst (Replicate4C zero));
12535 12562    format %{ "PXOR  $dst,$dst\t! replicate4C" %}
12536 12563    ins_encode( pxor(dst, dst));
12537 12564    ins_pipe( fpu_reg_reg );
12538 12565  %}
12539 12566  
12540 12567  // Replicate scalar to packed integer (4 byte) values in xmm
12541 12568  instruct Repl2I_reg(regXD dst, regXD src) %{
12542 12569    predicate(UseSSE>=2);
12543 12570    match(Set dst (Replicate2I src));
12544 12571    format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12545 12572    ins_encode( pshufd(dst, src, 0x00));
12546 12573    ins_pipe( fpu_reg_reg );
12547 12574  %}
12548 12575  
12549 12576  // Replicate scalar to packed integer (4 byte) values in xmm
12550 12577  instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12551 12578    predicate(UseSSE>=2);
12552 12579    match(Set dst (Replicate2I src));
12553 12580    format %{ "MOVD   $dst,$src\n\t"
12554 12581              "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12555 12582    ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
12556 12583    ins_pipe( fpu_reg_reg );
12557 12584  %}
12558 12585  
12559 12586  // Replicate scalar zero to packed integer (2 byte) values in xmm
12560 12587  instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12561 12588    predicate(UseSSE>=2);
12562 12589    match(Set dst (Replicate2I zero));
12563 12590    format %{ "PXOR  $dst,$dst\t! replicate2I" %}
12564 12591    ins_encode( pxor(dst, dst));
12565 12592    ins_pipe( fpu_reg_reg );
12566 12593  %}
12567 12594  
12568 12595  // Replicate scalar to packed single precision floating point values in xmm
12569 12596  instruct Repl2F_reg(regXD dst, regXD src) %{
12570 12597    predicate(UseSSE>=2);
12571 12598    match(Set dst (Replicate2F src));
12572 12599    format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12573 12600    ins_encode( pshufd(dst, src, 0xe0));
12574 12601    ins_pipe( fpu_reg_reg );
12575 12602  %}
12576 12603  
12577 12604  // Replicate scalar to packed single precision floating point values in xmm
12578 12605  instruct Repl2F_regX(regXD dst, regX src) %{
12579 12606    predicate(UseSSE>=2);
12580 12607    match(Set dst (Replicate2F src));
12581 12608    format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12582 12609    ins_encode( pshufd(dst, src, 0xe0));
12583 12610    ins_pipe( fpu_reg_reg );
12584 12611  %}
12585 12612  
12586 12613  // Replicate scalar to packed single precision floating point values in xmm
12587 12614  instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12588 12615    predicate(UseSSE>=2);
12589 12616    match(Set dst (Replicate2F zero));
12590 12617    format %{ "PXOR  $dst,$dst\t! replicate2F" %}
12591 12618    ins_encode( pxor(dst, dst));
12592 12619    ins_pipe( fpu_reg_reg );
12593 12620  %}
12594 12621  
12595 12622  // =======================================================================
12596 12623  // fast clearing of an array
12597 12624  instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12598 12625    match(Set dummy (ClearArray cnt base));
12599 12626    effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12600 12627    format %{ "SHL    ECX,1\t# Convert doublewords to words\n\t"
12601 12628              "XOR    EAX,EAX\n\t"
12602 12629              "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12603 12630    opcode(0,0x4);
12604 12631    ins_encode( Opcode(0xD1), RegOpc(ECX),
12605 12632                OpcRegReg(0x33,EAX,EAX),
12606 12633                Opcode(0xF3), Opcode(0xAB) );
12607 12634    ins_pipe( pipe_slow );
12608 12635  %}
12609 12636  
12610 12637  instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eBXRegI cnt2,
12611 12638                          eAXRegI result, regXD tmp1, regXD tmp2, eFlagsReg cr) %{
12612 12639    match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12613 12640    effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12614 12641  
12615 12642    format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
12616 12643    ins_encode %{
12617 12644      __ string_compare($str1$$Register, $str2$$Register,
12618 12645                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
12619 12646                        $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12620 12647    %}
12621 12648    ins_pipe( pipe_slow );
12622 12649  %}
12623 12650  
12624 12651  // fast string equals
12625 12652  instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12626 12653                         regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12627 12654    match(Set result (StrEquals (Binary str1 str2) cnt));
12628 12655    effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12629 12656  
12630 12657    format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12631 12658    ins_encode %{
12632 12659      __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
12633 12660                            $cnt$$Register, $result$$Register, $tmp3$$Register,
12634 12661                            $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12635 12662    %}
12636 12663    ins_pipe( pipe_slow );
12637 12664  %}
12638 12665  
12639 12666  instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12640 12667                          eBXRegI result, regXD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12641 12668    predicate(UseSSE42Intrinsics);
12642 12669    match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12643 12670    effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
12644 12671  
12645 12672    format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp2, $tmp1" %}
12646 12673    ins_encode %{
12647 12674      __ string_indexof($str1$$Register, $str2$$Register,
12648 12675                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
12649 12676                        $tmp1$$XMMRegister, $tmp2$$Register);
12650 12677    %}
12651 12678    ins_pipe( pipe_slow );
12652 12679  %}
12653 12680  
12654 12681  // fast array equals
12655 12682  instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12656 12683                        regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12657 12684  %{
12658 12685    match(Set result (AryEq ary1 ary2));
12659 12686    effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12660 12687    //ins_cost(300);
12661 12688  
12662 12689    format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12663 12690    ins_encode %{
12664 12691      __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
12665 12692                            $tmp3$$Register, $result$$Register, $tmp4$$Register,
12666 12693                            $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12667 12694    %}
12668 12695    ins_pipe( pipe_slow );
12669 12696  %}
12670 12697  
12671 12698  //----------Control Flow Instructions------------------------------------------
12672 12699  // Signed compare Instructions
12673 12700  instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12674 12701    match(Set cr (CmpI op1 op2));
12675 12702    effect( DEF cr, USE op1, USE op2 );
12676 12703    format %{ "CMP    $op1,$op2" %}
12677 12704    opcode(0x3B);  /* Opcode 3B /r */
12678 12705    ins_encode( OpcP, RegReg( op1, op2) );
12679 12706    ins_pipe( ialu_cr_reg_reg );
12680 12707  %}
12681 12708  
12682 12709  instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
12683 12710    match(Set cr (CmpI op1 op2));
12684 12711    effect( DEF cr, USE op1 );
12685 12712    format %{ "CMP    $op1,$op2" %}
12686 12713    opcode(0x81,0x07);  /* Opcode 81 /7 */
12687 12714    // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12688 12715    ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12689 12716    ins_pipe( ialu_cr_reg_imm );
12690 12717  %}
12691 12718  
12692 12719  // Cisc-spilled version of cmpI_eReg
12693 12720  instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
12694 12721    match(Set cr (CmpI op1 (LoadI op2)));
12695 12722  
12696 12723    format %{ "CMP    $op1,$op2" %}
12697 12724    ins_cost(500);
12698 12725    opcode(0x3B);  /* Opcode 3B /r */
12699 12726    ins_encode( OpcP, RegMem( op1, op2) );
12700 12727    ins_pipe( ialu_cr_reg_mem );
12701 12728  %}
12702 12729  
12703 12730  instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
12704 12731    match(Set cr (CmpI src zero));
12705 12732    effect( DEF cr, USE src );
12706 12733  
12707 12734    format %{ "TEST   $src,$src" %}
12708 12735    opcode(0x85);
12709 12736    ins_encode( OpcP, RegReg( src, src ) );
12710 12737    ins_pipe( ialu_cr_reg_imm );
12711 12738  %}
12712 12739  
12713 12740  instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
12714 12741    match(Set cr (CmpI (AndI src con) zero));
12715 12742  
12716 12743    format %{ "TEST   $src,$con" %}
12717 12744    opcode(0xF7,0x00);
12718 12745    ins_encode( OpcP, RegOpc(src), Con32(con) );
12719 12746    ins_pipe( ialu_cr_reg_imm );
12720 12747  %}
12721 12748  
12722 12749  instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
12723 12750    match(Set cr (CmpI (AndI src mem) zero));
12724 12751  
12725 12752    format %{ "TEST   $src,$mem" %}
12726 12753    opcode(0x85);
12727 12754    ins_encode( OpcP, RegMem( src, mem ) );
12728 12755    ins_pipe( ialu_cr_reg_mem );
12729 12756  %}
12730 12757  
12731 12758  // Unsigned compare Instructions; really, same as signed except they
12732 12759  // produce an eFlagsRegU instead of eFlagsReg.
12733 12760  instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
12734 12761    match(Set cr (CmpU op1 op2));
12735 12762  
12736 12763    format %{ "CMPu   $op1,$op2" %}
12737 12764    opcode(0x3B);  /* Opcode 3B /r */
12738 12765    ins_encode( OpcP, RegReg( op1, op2) );
12739 12766    ins_pipe( ialu_cr_reg_reg );
12740 12767  %}
12741 12768  
12742 12769  instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
12743 12770    match(Set cr (CmpU op1 op2));
12744 12771  
12745 12772    format %{ "CMPu   $op1,$op2" %}
12746 12773    opcode(0x81,0x07);  /* Opcode 81 /7 */
12747 12774    ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12748 12775    ins_pipe( ialu_cr_reg_imm );
12749 12776  %}
12750 12777  
12751 12778  // // Cisc-spilled version of cmpU_eReg
12752 12779  instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
12753 12780    match(Set cr (CmpU op1 (LoadI op2)));
12754 12781  
12755 12782    format %{ "CMPu   $op1,$op2" %}
12756 12783    ins_cost(500);
12757 12784    opcode(0x3B);  /* Opcode 3B /r */
12758 12785    ins_encode( OpcP, RegMem( op1, op2) );
12759 12786    ins_pipe( ialu_cr_reg_mem );
12760 12787  %}
12761 12788  
12762 12789  // // Cisc-spilled version of cmpU_eReg
12763 12790  //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
12764 12791  //  match(Set cr (CmpU (LoadI op1) op2));
12765 12792  //
12766 12793  //  format %{ "CMPu   $op1,$op2" %}
12767 12794  //  ins_cost(500);
12768 12795  //  opcode(0x39);  /* Opcode 39 /r */
12769 12796  //  ins_encode( OpcP, RegMem( op1, op2) );
12770 12797  //%}
12771 12798  
12772 12799  instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
12773 12800    match(Set cr (CmpU src zero));
12774 12801  
12775 12802    format %{ "TESTu  $src,$src" %}
12776 12803    opcode(0x85);
12777 12804    ins_encode( OpcP, RegReg( src, src ) );
12778 12805    ins_pipe( ialu_cr_reg_imm );
12779 12806  %}
12780 12807  
12781 12808  // Unsigned pointer compare Instructions
12782 12809  instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12783 12810    match(Set cr (CmpP op1 op2));
12784 12811  
12785 12812    format %{ "CMPu   $op1,$op2" %}
12786 12813    opcode(0x3B);  /* Opcode 3B /r */
12787 12814    ins_encode( OpcP, RegReg( op1, op2) );
12788 12815    ins_pipe( ialu_cr_reg_reg );
12789 12816  %}
12790 12817  
12791 12818  instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12792 12819    match(Set cr (CmpP op1 op2));
12793 12820  
12794 12821    format %{ "CMPu   $op1,$op2" %}
12795 12822    opcode(0x81,0x07);  /* Opcode 81 /7 */
12796 12823    ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12797 12824    ins_pipe( ialu_cr_reg_imm );
12798 12825  %}
12799 12826  
12800 12827  // // Cisc-spilled version of cmpP_eReg
12801 12828  instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12802 12829    match(Set cr (CmpP op1 (LoadP op2)));
12803 12830  
12804 12831    format %{ "CMPu   $op1,$op2" %}
12805 12832    ins_cost(500);
12806 12833    opcode(0x3B);  /* Opcode 3B /r */
12807 12834    ins_encode( OpcP, RegMem( op1, op2) );
12808 12835    ins_pipe( ialu_cr_reg_mem );
12809 12836  %}
12810 12837  
12811 12838  // // Cisc-spilled version of cmpP_eReg
12812 12839  //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12813 12840  //  match(Set cr (CmpP (LoadP op1) op2));
12814 12841  //
12815 12842  //  format %{ "CMPu   $op1,$op2" %}
12816 12843  //  ins_cost(500);
12817 12844  //  opcode(0x39);  /* Opcode 39 /r */
12818 12845  //  ins_encode( OpcP, RegMem( op1, op2) );
12819 12846  //%}
12820 12847  
12821 12848  // Compare raw pointer (used in out-of-heap check).
12822 12849  // Only works because non-oop pointers must be raw pointers
12823 12850  // and raw pointers have no anti-dependencies.
12824 12851  instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12825 12852    predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
12826 12853    match(Set cr (CmpP op1 (LoadP op2)));
12827 12854  
12828 12855    format %{ "CMPu   $op1,$op2" %}
12829 12856    opcode(0x3B);  /* Opcode 3B /r */
12830 12857    ins_encode( OpcP, RegMem( op1, op2) );
12831 12858    ins_pipe( ialu_cr_reg_mem );
12832 12859  %}
12833 12860  
12834 12861  //
12835 12862  // This will generate a signed flags result. This should be ok
12836 12863  // since any compare to a zero should be eq/neq.
12837 12864  instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12838 12865    match(Set cr (CmpP src zero));
12839 12866  
12840 12867    format %{ "TEST   $src,$src" %}
12841 12868    opcode(0x85);
12842 12869    ins_encode( OpcP, RegReg( src, src ) );
12843 12870    ins_pipe( ialu_cr_reg_imm );
12844 12871  %}
12845 12872  
12846 12873  // Cisc-spilled version of testP_reg
12847 12874  // This will generate a signed flags result. This should be ok
12848 12875  // since any compare to a zero should be eq/neq.
12849 12876  instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12850 12877    match(Set cr (CmpP (LoadP op) zero));
12851 12878  
12852 12879    format %{ "TEST   $op,0xFFFFFFFF" %}
12853 12880    ins_cost(500);
12854 12881    opcode(0xF7);               /* Opcode F7 /0 */
12855 12882    ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12856 12883    ins_pipe( ialu_cr_reg_imm );
12857 12884  %}
12858 12885  
12859 12886  // Yanked all unsigned pointer compare operations.
12860 12887  // Pointer compares are done with CmpP which is already unsigned.
12861 12888  
12862 12889  //----------Max and Min--------------------------------------------------------
12863 12890  // Min Instructions
12864 12891  ////
12865 12892  //   *** Min and Max using the conditional move are slower than the
12866 12893  //   *** branch version on a Pentium III.
12867 12894  // // Conditional move for min
12868 12895  //instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12869 12896  //  effect( USE_DEF op2, USE op1, USE cr );
12870 12897  //  format %{ "CMOVlt $op2,$op1\t! min" %}
12871 12898  //  opcode(0x4C,0x0F);
12872 12899  //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12873 12900  //  ins_pipe( pipe_cmov_reg );
12874 12901  //%}
12875 12902  //
12876 12903  //// Min Register with Register (P6 version)
12877 12904  //instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
12878 12905  //  predicate(VM_Version::supports_cmov() );
12879 12906  //  match(Set op2 (MinI op1 op2));
12880 12907  //  ins_cost(200);
12881 12908  //  expand %{
12882 12909  //    eFlagsReg cr;
12883 12910  //    compI_eReg(cr,op1,op2);
12884 12911  //    cmovI_reg_lt(op2,op1,cr);
12885 12912  //  %}
12886 12913  //%}
12887 12914  
12888 12915  // Min Register with Register (generic version)
12889 12916  instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12890 12917    match(Set dst (MinI dst src));
12891 12918    effect(KILL flags);
12892 12919    ins_cost(300);
12893 12920  
12894 12921    format %{ "MIN    $dst,$src" %}
12895 12922    opcode(0xCC);
12896 12923    ins_encode( min_enc(dst,src) );
12897 12924    ins_pipe( pipe_slow );
12898 12925  %}
12899 12926  
12900 12927  // Max Register with Register
12901 12928  //   *** Min and Max using the conditional move are slower than the
12902 12929  //   *** branch version on a Pentium III.
12903 12930  // // Conditional move for max
12904 12931  //instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12905 12932  //  effect( USE_DEF op2, USE op1, USE cr );
12906 12933  //  format %{ "CMOVgt $op2,$op1\t! max" %}
12907 12934  //  opcode(0x4F,0x0F);
12908 12935  //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12909 12936  //  ins_pipe( pipe_cmov_reg );
12910 12937  //%}
12911 12938  //
12912 12939  // // Max Register with Register (P6 version)
12913 12940  //instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
12914 12941  //  predicate(VM_Version::supports_cmov() );
12915 12942  //  match(Set op2 (MaxI op1 op2));
12916 12943  //  ins_cost(200);
12917 12944  //  expand %{
12918 12945  //    eFlagsReg cr;
12919 12946  //    compI_eReg(cr,op1,op2);
12920 12947  //    cmovI_reg_gt(op2,op1,cr);
12921 12948  //  %}
12922 12949  //%}
12923 12950  
12924 12951  // Max Register with Register (generic version)
12925 12952  instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12926 12953    match(Set dst (MaxI dst src));
12927 12954    effect(KILL flags);
12928 12955    ins_cost(300);
12929 12956  
12930 12957    format %{ "MAX    $dst,$src" %}
12931 12958    opcode(0xCC);

↓ open down ↓

1337 lines elided

↑ open up ↑

12932 12959    ins_encode( max_enc(dst,src) );
12933 12960    ins_pipe( pipe_slow );
12934 12961  %}
12935 12962  
12936 12963  // ============================================================================
12937 12964  // Branch Instructions
12938 12965  // Jump Table
12939 12966  instruct jumpXtnd(eRegI switch_val) %{
12940 12967    match(Jump switch_val);
12941 12968    ins_cost(350);
12942      -
12943      -  format %{  "JMP    [table_base](,$switch_val,1)\n\t" %}
12944      -
     12969 +  format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12945 12970    ins_encode %{
12946      -    address table_base  = __ address_table_constant(_index2label);
12947      -
12948 12971      // Jump to Address(table_base + switch_reg)
12949      -    InternalAddress table(table_base);
12950 12972      Address index(noreg, $switch_val$$Register, Address::times_1);
12951      -    __ jump(ArrayAddress(table, index));
     12973 +    __ jump(ArrayAddress($constantaddress, index));
12952 12974    %}
12953 12975    ins_pc_relative(1);
12954 12976    ins_pipe(pipe_jmp);
12955 12977  %}
12956 12978  
12957 12979  // Jump Direct - Label defines a relative address from JMP+1
12958 12980  instruct jmpDir(label labl) %{
12959 12981    match(Goto);
12960 12982    effect(USE labl);
12961 12983

12962 12984    ins_cost(300);
12963 12985    format %{ "JMP    $labl" %}
12964 12986    size(5);
12965 12987    opcode(0xE9);
12966 12988    ins_encode( OpcP, Lbl( labl ) );
12967 12989    ins_pipe( pipe_jmp );
12968 12990    ins_pc_relative(1);
12969 12991  %}
12970 12992  
12971 12993  // Jump Direct Conditional - Label defines a relative address from Jcc+1
12972 12994  instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12973 12995    match(If cop cr);
12974 12996    effect(USE labl);
12975 12997  
12976 12998    ins_cost(300);
12977 12999    format %{ "J$cop    $labl" %}
12978 13000    size(6);
12979 13001    opcode(0x0F, 0x80);
12980 13002    ins_encode( Jcc( cop, labl) );
12981 13003    ins_pipe( pipe_jcc );
12982 13004    ins_pc_relative(1);
12983 13005  %}
12984 13006  
12985 13007  // Jump Direct Conditional - Label defines a relative address from Jcc+1
12986 13008  instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12987 13009    match(CountedLoopEnd cop cr);
12988 13010    effect(USE labl);
12989 13011  
12990 13012    ins_cost(300);
12991 13013    format %{ "J$cop    $labl\t# Loop end" %}
12992 13014    size(6);
12993 13015    opcode(0x0F, 0x80);
12994 13016    ins_encode( Jcc( cop, labl) );
12995 13017    ins_pipe( pipe_jcc );
12996 13018    ins_pc_relative(1);
12997 13019  %}
12998 13020  
12999 13021  // Jump Direct Conditional - Label defines a relative address from Jcc+1
13000 13022  instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13001 13023    match(CountedLoopEnd cop cmp);
13002 13024    effect(USE labl);
13003 13025  
13004 13026    ins_cost(300);
13005 13027    format %{ "J$cop,u  $labl\t# Loop end" %}
13006 13028    size(6);
13007 13029    opcode(0x0F, 0x80);
13008 13030    ins_encode( Jcc( cop, labl) );
13009 13031    ins_pipe( pipe_jcc );
13010 13032    ins_pc_relative(1);
13011 13033  %}
13012 13034  
13013 13035  instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13014 13036    match(CountedLoopEnd cop cmp);
13015 13037    effect(USE labl);
13016 13038  
13017 13039    ins_cost(200);
13018 13040    format %{ "J$cop,u  $labl\t# Loop end" %}
13019 13041    size(6);
13020 13042    opcode(0x0F, 0x80);
13021 13043    ins_encode( Jcc( cop, labl) );
13022 13044    ins_pipe( pipe_jcc );
13023 13045    ins_pc_relative(1);
13024 13046  %}
13025 13047  
13026 13048  // Jump Direct Conditional - using unsigned comparison
13027 13049  instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13028 13050    match(If cop cmp);
13029 13051    effect(USE labl);
13030 13052  
13031 13053    ins_cost(300);
13032 13054    format %{ "J$cop,u  $labl" %}
13033 13055    size(6);
13034 13056    opcode(0x0F, 0x80);
13035 13057    ins_encode(Jcc(cop, labl));
13036 13058    ins_pipe(pipe_jcc);
13037 13059    ins_pc_relative(1);
13038 13060  %}
13039 13061  
13040 13062  instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13041 13063    match(If cop cmp);
13042 13064    effect(USE labl);
13043 13065  
13044 13066    ins_cost(200);
13045 13067    format %{ "J$cop,u  $labl" %}
13046 13068    size(6);
13047 13069    opcode(0x0F, 0x80);
13048 13070    ins_encode(Jcc(cop, labl));
13049 13071    ins_pipe(pipe_jcc);
13050 13072    ins_pc_relative(1);
13051 13073  %}
13052 13074  
13053 13075  instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13054 13076    match(If cop cmp);
13055 13077    effect(USE labl);
13056 13078  
13057 13079    ins_cost(200);
13058 13080    format %{ $$template
13059 13081      if ($cop$$cmpcode == Assembler::notEqual) {
13060 13082        $$emit$$"JP,u   $labl\n\t"
13061 13083        $$emit$$"J$cop,u   $labl"
13062 13084      } else {
13063 13085        $$emit$$"JP,u   done\n\t"
13064 13086        $$emit$$"J$cop,u   $labl\n\t"
13065 13087        $$emit$$"done:"
13066 13088      }
13067 13089    %}
13068 13090    size(12);
13069 13091    opcode(0x0F, 0x80);
13070 13092    ins_encode %{
13071 13093      Label* l = $labl$$label;
13072 13094      $$$emit8$primary;
13073 13095      emit_cc(cbuf, $secondary, Assembler::parity);
13074 13096      int parity_disp = -1;
13075 13097      bool ok = false;
13076 13098      if ($cop$$cmpcode == Assembler::notEqual) {
13077 13099         // the two jumps 6 bytes apart so the jump distances are too
13078 13100         parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
13079 13101      } else if ($cop$$cmpcode == Assembler::equal) {
13080 13102         parity_disp = 6;
13081 13103         ok = true;
13082 13104      } else {
13083 13105         ShouldNotReachHere();
13084 13106      }
13085 13107      emit_d32(cbuf, parity_disp);
13086 13108      $$$emit8$primary;
13087 13109      emit_cc(cbuf, $secondary, $cop$$cmpcode);
13088 13110      int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
13089 13111      emit_d32(cbuf, disp);
13090 13112    %}
13091 13113    ins_pipe(pipe_jcc);
13092 13114    ins_pc_relative(1);
13093 13115  %}
13094 13116  
13095 13117  // ============================================================================
13096 13118  // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
13097 13119  // array for an instance of the superklass.  Set a hidden internal cache on a
13098 13120  // hit (cache is checked with exposed code in gen_subtype_check()).  Return
13099 13121  // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
13100 13122  instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
13101 13123    match(Set result (PartialSubtypeCheck sub super));
13102 13124    effect( KILL rcx, KILL cr );
13103 13125  
13104 13126    ins_cost(1100);  // slightly larger than the next version
13105 13127    format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
13106 13128              "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13107 13129              "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13108 13130              "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13109 13131              "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
13110 13132              "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
13111 13133              "XOR    $result,$result\t\t Hit: EDI zero\n\t"
13112 13134       "miss:\t" %}
13113 13135  
13114 13136    opcode(0x1); // Force a XOR of EDI
13115 13137    ins_encode( enc_PartialSubtypeCheck() );
13116 13138    ins_pipe( pipe_slow );
13117 13139  %}
13118 13140  
13119 13141  instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
13120 13142    match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13121 13143    effect( KILL rcx, KILL result );
13122 13144  
13123 13145    ins_cost(1000);
13124 13146    format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
13125 13147              "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13126 13148              "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13127 13149              "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13128 13150              "JNE,s  miss\t\t# Missed: flags NZ\n\t"
13129 13151              "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
13130 13152       "miss:\t" %}
13131 13153  
13132 13154    opcode(0x0);  // No need to XOR EDI
13133 13155    ins_encode( enc_PartialSubtypeCheck() );
13134 13156    ins_pipe( pipe_slow );
13135 13157  %}
13136 13158  
13137 13159  // ============================================================================
13138 13160  // Branch Instructions -- short offset versions
13139 13161  //
13140 13162  // These instructions are used to replace jumps of a long offset (the default
13141 13163  // match) with jumps of a shorter offset.  These instructions are all tagged
13142 13164  // with the ins_short_branch attribute, which causes the ADLC to suppress the
13143 13165  // match rules in general matching.  Instead, the ADLC generates a conversion
13144 13166  // method in the MachNode which can be used to do in-place replacement of the
13145 13167  // long variant with the shorter variant.  The compiler will determine if a
13146 13168  // branch can be taken by the is_short_branch_offset() predicate in the machine
13147 13169  // specific code section of the file.
13148 13170  
13149 13171  // Jump Direct - Label defines a relative address from JMP+1
13150 13172  instruct jmpDir_short(label labl) %{
13151 13173    match(Goto);
13152 13174    effect(USE labl);
13153 13175  
13154 13176    ins_cost(300);
13155 13177    format %{ "JMP,s  $labl" %}
13156 13178    size(2);
13157 13179    opcode(0xEB);
13158 13180    ins_encode( OpcP, LblShort( labl ) );
13159 13181    ins_pipe( pipe_jmp );
13160 13182    ins_pc_relative(1);
13161 13183    ins_short_branch(1);
13162 13184  %}
13163 13185  
13164 13186  // Jump Direct Conditional - Label defines a relative address from Jcc+1
13165 13187  instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
13166 13188    match(If cop cr);
13167 13189    effect(USE labl);
13168 13190  
13169 13191    ins_cost(300);
13170 13192    format %{ "J$cop,s  $labl" %}
13171 13193    size(2);
13172 13194    opcode(0x70);
13173 13195    ins_encode( JccShort( cop, labl) );
13174 13196    ins_pipe( pipe_jcc );
13175 13197    ins_pc_relative(1);
13176 13198    ins_short_branch(1);
13177 13199  %}
13178 13200  
13179 13201  // Jump Direct Conditional - Label defines a relative address from Jcc+1
13180 13202  instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
13181 13203    match(CountedLoopEnd cop cr);
13182 13204    effect(USE labl);
13183 13205  
13184 13206    ins_cost(300);
13185 13207    format %{ "J$cop,s  $labl\t# Loop end" %}
13186 13208    size(2);
13187 13209    opcode(0x70);
13188 13210    ins_encode( JccShort( cop, labl) );
13189 13211    ins_pipe( pipe_jcc );
13190 13212    ins_pc_relative(1);
13191 13213    ins_short_branch(1);
13192 13214  %}
13193 13215  
13194 13216  // Jump Direct Conditional - Label defines a relative address from Jcc+1
13195 13217  instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13196 13218    match(CountedLoopEnd cop cmp);
13197 13219    effect(USE labl);
13198 13220  
13199 13221    ins_cost(300);
13200 13222    format %{ "J$cop,us $labl\t# Loop end" %}
13201 13223    size(2);
13202 13224    opcode(0x70);
13203 13225    ins_encode( JccShort( cop, labl) );
13204 13226    ins_pipe( pipe_jcc );
13205 13227    ins_pc_relative(1);
13206 13228    ins_short_branch(1);
13207 13229  %}
13208 13230  
13209 13231  instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13210 13232    match(CountedLoopEnd cop cmp);
13211 13233    effect(USE labl);
13212 13234  
13213 13235    ins_cost(300);
13214 13236    format %{ "J$cop,us $labl\t# Loop end" %}
13215 13237    size(2);
13216 13238    opcode(0x70);
13217 13239    ins_encode( JccShort( cop, labl) );
13218 13240    ins_pipe( pipe_jcc );
13219 13241    ins_pc_relative(1);
13220 13242    ins_short_branch(1);
13221 13243  %}
13222 13244  
13223 13245  // Jump Direct Conditional - using unsigned comparison
13224 13246  instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13225 13247    match(If cop cmp);
13226 13248    effect(USE labl);
13227 13249  
13228 13250    ins_cost(300);
13229 13251    format %{ "J$cop,us $labl" %}
13230 13252    size(2);
13231 13253    opcode(0x70);
13232 13254    ins_encode( JccShort( cop, labl) );
13233 13255    ins_pipe( pipe_jcc );
13234 13256    ins_pc_relative(1);
13235 13257    ins_short_branch(1);
13236 13258  %}
13237 13259  
13238 13260  instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13239 13261    match(If cop cmp);
13240 13262    effect(USE labl);
13241 13263  
13242 13264    ins_cost(300);
13243 13265    format %{ "J$cop,us $labl" %}
13244 13266    size(2);
13245 13267    opcode(0x70);
13246 13268    ins_encode( JccShort( cop, labl) );
13247 13269    ins_pipe( pipe_jcc );
13248 13270    ins_pc_relative(1);
13249 13271    ins_short_branch(1);
13250 13272  %}
13251 13273  
13252 13274  instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13253 13275    match(If cop cmp);
13254 13276    effect(USE labl);
13255 13277  
13256 13278    ins_cost(300);
13257 13279    format %{ $$template
13258 13280      if ($cop$$cmpcode == Assembler::notEqual) {
13259 13281        $$emit$$"JP,u,s   $labl\n\t"
13260 13282        $$emit$$"J$cop,u,s   $labl"
13261 13283      } else {
13262 13284        $$emit$$"JP,u,s   done\n\t"
13263 13285        $$emit$$"J$cop,u,s  $labl\n\t"
13264 13286        $$emit$$"done:"
13265 13287      }
13266 13288    %}
13267 13289    size(4);
13268 13290    opcode(0x70);
13269 13291    ins_encode %{
13270 13292      Label* l = $labl$$label;
13271 13293      emit_cc(cbuf, $primary, Assembler::parity);
13272 13294      int parity_disp = -1;
13273 13295      if ($cop$$cmpcode == Assembler::notEqual) {
13274 13296        parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
13275 13297      } else if ($cop$$cmpcode == Assembler::equal) {
13276 13298        parity_disp = 2;
13277 13299      } else {
13278 13300        ShouldNotReachHere();
13279 13301      }
13280 13302      emit_d8(cbuf, parity_disp);
13281 13303      emit_cc(cbuf, $primary, $cop$$cmpcode);
13282 13304      int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
13283 13305      emit_d8(cbuf, disp);
13284 13306      assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
13285 13307      assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
13286 13308    %}
13287 13309    ins_pipe(pipe_jcc);
13288 13310    ins_pc_relative(1);
13289 13311    ins_short_branch(1);
13290 13312  %}
13291 13313  
13292 13314  // ============================================================================
13293 13315  // Long Compare
13294 13316  //
13295 13317  // Currently we hold longs in 2 registers.  Comparing such values efficiently
13296 13318  // is tricky.  The flavor of compare used depends on whether we are testing
13297 13319  // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
13298 13320  // The GE test is the negated LT test.  The LE test can be had by commuting
13299 13321  // the operands (yielding a GE test) and then negating; negate again for the
13300 13322  // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
13301 13323  // NE test is negated from that.
13302 13324  
13303 13325  // Due to a shortcoming in the ADLC, it mixes up expressions like:
13304 13326  // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
13305 13327  // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
13306 13328  // are collapsed internally in the ADLC's dfa-gen code.  The match for
13307 13329  // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
13308 13330  // foo match ends up with the wrong leaf.  One fix is to not match both
13309 13331  // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
13310 13332  // both forms beat the trinary form of long-compare and both are very useful
13311 13333  // on Intel which has so few registers.
13312 13334  
13313 13335  // Manifest a CmpL result in an integer register.  Very painful.
13314 13336  // This is the test to avoid.
13315 13337  instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
13316 13338    match(Set dst (CmpL3 src1 src2));
13317 13339    effect( KILL flags );
13318 13340    ins_cost(1000);
13319 13341    format %{ "XOR    $dst,$dst\n\t"
13320 13342              "CMP    $src1.hi,$src2.hi\n\t"
13321 13343              "JLT,s  m_one\n\t"
13322 13344              "JGT,s  p_one\n\t"
13323 13345              "CMP    $src1.lo,$src2.lo\n\t"
13324 13346              "JB,s   m_one\n\t"
13325 13347              "JEQ,s  done\n"
13326 13348      "p_one:\tINC    $dst\n\t"
13327 13349              "JMP,s  done\n"
13328 13350      "m_one:\tDEC    $dst\n"
13329 13351       "done:" %}
13330 13352    ins_encode %{
13331 13353      Label p_one, m_one, done;
13332 13354      __ xorptr($dst$$Register, $dst$$Register);
13333 13355      __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13334 13356      __ jccb(Assembler::less,    m_one);
13335 13357      __ jccb(Assembler::greater, p_one);
13336 13358      __ cmpl($src1$$Register, $src2$$Register);
13337 13359      __ jccb(Assembler::below,   m_one);
13338 13360      __ jccb(Assembler::equal,   done);
13339 13361      __ bind(p_one);
13340 13362      __ incrementl($dst$$Register);
13341 13363      __ jmpb(done);
13342 13364      __ bind(m_one);
13343 13365      __ decrementl($dst$$Register);
13344 13366      __ bind(done);
13345 13367    %}
13346 13368    ins_pipe( pipe_slow );
13347 13369  %}
13348 13370  
13349 13371  //======
13350 13372  // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13351 13373  // compares.  Can be used for LE or GT compares by reversing arguments.
13352 13374  // NOT GOOD FOR EQ/NE tests.
13353 13375  instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13354 13376    match( Set flags (CmpL src zero ));
13355 13377    ins_cost(100);
13356 13378    format %{ "TEST   $src.hi,$src.hi" %}
13357 13379    opcode(0x85);
13358 13380    ins_encode( OpcP, RegReg_Hi2( src, src ) );
13359 13381    ins_pipe( ialu_cr_reg_reg );
13360 13382  %}
13361 13383  
13362 13384  // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13363 13385  // compares.  Can be used for LE or GT compares by reversing arguments.
13364 13386  // NOT GOOD FOR EQ/NE tests.
13365 13387  instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13366 13388    match( Set flags (CmpL src1 src2 ));
13367 13389    effect( TEMP tmp );
13368 13390    ins_cost(300);
13369 13391    format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13370 13392              "MOV    $tmp,$src1.hi\n\t"
13371 13393              "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13372 13394    ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13373 13395    ins_pipe( ialu_cr_reg_reg );
13374 13396  %}
13375 13397  
13376 13398  // Long compares reg < zero/req OR reg >= zero/req.
13377 13399  // Just a wrapper for a normal branch, plus the predicate test.
13378 13400  instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13379 13401    match(If cmp flags);
13380 13402    effect(USE labl);
13381 13403    predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13382 13404    expand %{
13383 13405      jmpCon(cmp,flags,labl);    // JLT or JGE...
13384 13406    %}
13385 13407  %}
13386 13408  
13387 13409  // Compare 2 longs and CMOVE longs.
13388 13410  instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13389 13411    match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13390 13412    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13391 13413    ins_cost(400);
13392 13414    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13393 13415              "CMOV$cmp $dst.hi,$src.hi" %}
13394 13416    opcode(0x0F,0x40);
13395 13417    ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13396 13418    ins_pipe( pipe_cmov_reg_long );
13397 13419  %}
13398 13420  
13399 13421  instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13400 13422    match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13401 13423    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13402 13424    ins_cost(500);
13403 13425    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13404 13426              "CMOV$cmp $dst.hi,$src.hi" %}
13405 13427    opcode(0x0F,0x40);
13406 13428    ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13407 13429    ins_pipe( pipe_cmov_reg_long );
13408 13430  %}
13409 13431  
13410 13432  // Compare 2 longs and CMOVE ints.
13411 13433  instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
13412 13434    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13413 13435    match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13414 13436    ins_cost(200);
13415 13437    format %{ "CMOV$cmp $dst,$src" %}
13416 13438    opcode(0x0F,0x40);
13417 13439    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13418 13440    ins_pipe( pipe_cmov_reg );
13419 13441  %}
13420 13442  
13421 13443  instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
13422 13444    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13423 13445    match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13424 13446    ins_cost(250);
13425 13447    format %{ "CMOV$cmp $dst,$src" %}
13426 13448    opcode(0x0F,0x40);
13427 13449    ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13428 13450    ins_pipe( pipe_cmov_mem );
13429 13451  %}
13430 13452  
13431 13453  // Compare 2 longs and CMOVE ints.
13432 13454  instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13433 13455    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13434 13456    match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13435 13457    ins_cost(200);
13436 13458    format %{ "CMOV$cmp $dst,$src" %}
13437 13459    opcode(0x0F,0x40);
13438 13460    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13439 13461    ins_pipe( pipe_cmov_reg );
13440 13462  %}
13441 13463  
13442 13464  // Compare 2 longs and CMOVE doubles
13443 13465  instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13444 13466    predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13445 13467    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13446 13468    ins_cost(200);
13447 13469    expand %{
13448 13470      fcmovD_regS(cmp,flags,dst,src);
13449 13471    %}
13450 13472  %}
13451 13473  
13452 13474  // Compare 2 longs and CMOVE doubles
13453 13475  instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
13454 13476    predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13455 13477    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13456 13478    ins_cost(200);
13457 13479    expand %{
13458 13480      fcmovXD_regS(cmp,flags,dst,src);
13459 13481    %}
13460 13482  %}
13461 13483  
13462 13484  instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13463 13485    predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13464 13486    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13465 13487    ins_cost(200);
13466 13488    expand %{
13467 13489      fcmovF_regS(cmp,flags,dst,src);
13468 13490    %}
13469 13491  %}
13470 13492  
13471 13493  instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
13472 13494    predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13473 13495    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13474 13496    ins_cost(200);
13475 13497    expand %{
13476 13498      fcmovX_regS(cmp,flags,dst,src);
13477 13499    %}
13478 13500  %}
13479 13501  
13480 13502  //======
13481 13503  // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13482 13504  instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
13483 13505    match( Set flags (CmpL src zero ));
13484 13506    effect(TEMP tmp);
13485 13507    ins_cost(200);
13486 13508    format %{ "MOV    $tmp,$src.lo\n\t"
13487 13509              "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13488 13510    ins_encode( long_cmp_flags0( src, tmp ) );
13489 13511    ins_pipe( ialu_reg_reg_long );
13490 13512  %}
13491 13513  
13492 13514  // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13493 13515  instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13494 13516    match( Set flags (CmpL src1 src2 ));
13495 13517    ins_cost(200+300);
13496 13518    format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13497 13519              "JNE,s  skip\n\t"
13498 13520              "CMP    $src1.hi,$src2.hi\n\t"
13499 13521       "skip:\t" %}
13500 13522    ins_encode( long_cmp_flags1( src1, src2 ) );
13501 13523    ins_pipe( ialu_cr_reg_reg );
13502 13524  %}
13503 13525  
13504 13526  // Long compare reg == zero/reg OR reg != zero/reg
13505 13527  // Just a wrapper for a normal branch, plus the predicate test.
13506 13528  instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13507 13529    match(If cmp flags);
13508 13530    effect(USE labl);
13509 13531    predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13510 13532    expand %{
13511 13533      jmpCon(cmp,flags,labl);    // JEQ or JNE...
13512 13534    %}
13513 13535  %}
13514 13536  
13515 13537  // Compare 2 longs and CMOVE longs.
13516 13538  instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13517 13539    match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13518 13540    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13519 13541    ins_cost(400);
13520 13542    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13521 13543              "CMOV$cmp $dst.hi,$src.hi" %}
13522 13544    opcode(0x0F,0x40);
13523 13545    ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13524 13546    ins_pipe( pipe_cmov_reg_long );
13525 13547  %}
13526 13548  
13527 13549  instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13528 13550    match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13529 13551    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13530 13552    ins_cost(500);
13531 13553    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13532 13554              "CMOV$cmp $dst.hi,$src.hi" %}
13533 13555    opcode(0x0F,0x40);
13534 13556    ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13535 13557    ins_pipe( pipe_cmov_reg_long );
13536 13558  %}
13537 13559  
13538 13560  // Compare 2 longs and CMOVE ints.
13539 13561  instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
13540 13562    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13541 13563    match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13542 13564    ins_cost(200);
13543 13565    format %{ "CMOV$cmp $dst,$src" %}
13544 13566    opcode(0x0F,0x40);
13545 13567    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13546 13568    ins_pipe( pipe_cmov_reg );
13547 13569  %}
13548 13570  
13549 13571  instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
13550 13572    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13551 13573    match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13552 13574    ins_cost(250);
13553 13575    format %{ "CMOV$cmp $dst,$src" %}
13554 13576    opcode(0x0F,0x40);
13555 13577    ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13556 13578    ins_pipe( pipe_cmov_mem );
13557 13579  %}
13558 13580  
13559 13581  // Compare 2 longs and CMOVE ints.
13560 13582  instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13561 13583    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13562 13584    match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13563 13585    ins_cost(200);
13564 13586    format %{ "CMOV$cmp $dst,$src" %}
13565 13587    opcode(0x0F,0x40);
13566 13588    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13567 13589    ins_pipe( pipe_cmov_reg );
13568 13590  %}
13569 13591  
13570 13592  // Compare 2 longs and CMOVE doubles
13571 13593  instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13572 13594    predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13573 13595    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13574 13596    ins_cost(200);
13575 13597    expand %{
13576 13598      fcmovD_regS(cmp,flags,dst,src);
13577 13599    %}
13578 13600  %}
13579 13601  
13580 13602  // Compare 2 longs and CMOVE doubles
13581 13603  instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
13582 13604    predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13583 13605    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13584 13606    ins_cost(200);
13585 13607    expand %{
13586 13608      fcmovXD_regS(cmp,flags,dst,src);
13587 13609    %}
13588 13610  %}
13589 13611  
13590 13612  instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13591 13613    predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13592 13614    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13593 13615    ins_cost(200);
13594 13616    expand %{
13595 13617      fcmovF_regS(cmp,flags,dst,src);
13596 13618    %}
13597 13619  %}
13598 13620  
13599 13621  instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
13600 13622    predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13601 13623    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13602 13624    ins_cost(200);
13603 13625    expand %{
13604 13626      fcmovX_regS(cmp,flags,dst,src);
13605 13627    %}
13606 13628  %}
13607 13629  
13608 13630  //======
13609 13631  // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13610 13632  // Same as cmpL_reg_flags_LEGT except must negate src
13611 13633  instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
13612 13634    match( Set flags (CmpL src zero ));
13613 13635    effect( TEMP tmp );
13614 13636    ins_cost(300);
13615 13637    format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13616 13638              "CMP    $tmp,$src.lo\n\t"
13617 13639              "SBB    $tmp,$src.hi\n\t" %}
13618 13640    ins_encode( long_cmp_flags3(src, tmp) );
13619 13641    ins_pipe( ialu_reg_reg_long );
13620 13642  %}
13621 13643  
13622 13644  // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13623 13645  // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13624 13646  // requires a commuted test to get the same result.
13625 13647  instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13626 13648    match( Set flags (CmpL src1 src2 ));
13627 13649    effect( TEMP tmp );
13628 13650    ins_cost(300);
13629 13651    format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13630 13652              "MOV    $tmp,$src2.hi\n\t"
13631 13653              "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13632 13654    ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13633 13655    ins_pipe( ialu_cr_reg_reg );
13634 13656  %}
13635 13657  
13636 13658  // Long compares reg < zero/req OR reg >= zero/req.
13637 13659  // Just a wrapper for a normal branch, plus the predicate test
13638 13660  instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13639 13661    match(If cmp flags);
13640 13662    effect(USE labl);
13641 13663    predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13642 13664    ins_cost(300);
13643 13665    expand %{
13644 13666      jmpCon(cmp,flags,labl);    // JGT or JLE...
13645 13667    %}
13646 13668  %}
13647 13669  
13648 13670  // Compare 2 longs and CMOVE longs.
13649 13671  instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13650 13672    match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13651 13673    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13652 13674    ins_cost(400);
13653 13675    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13654 13676              "CMOV$cmp $dst.hi,$src.hi" %}
13655 13677    opcode(0x0F,0x40);
13656 13678    ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13657 13679    ins_pipe( pipe_cmov_reg_long );
13658 13680  %}
13659 13681  
13660 13682  instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13661 13683    match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13662 13684    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13663 13685    ins_cost(500);
13664 13686    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13665 13687              "CMOV$cmp $dst.hi,$src.hi+4" %}
13666 13688    opcode(0x0F,0x40);
13667 13689    ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13668 13690    ins_pipe( pipe_cmov_reg_long );
13669 13691  %}
13670 13692  
13671 13693  // Compare 2 longs and CMOVE ints.
13672 13694  instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
13673 13695    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13674 13696    match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13675 13697    ins_cost(200);
13676 13698    format %{ "CMOV$cmp $dst,$src" %}
13677 13699    opcode(0x0F,0x40);
13678 13700    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13679 13701    ins_pipe( pipe_cmov_reg );
13680 13702  %}
13681 13703  
13682 13704  instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
13683 13705    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13684 13706    match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13685 13707    ins_cost(250);
13686 13708    format %{ "CMOV$cmp $dst,$src" %}
13687 13709    opcode(0x0F,0x40);
13688 13710    ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13689 13711    ins_pipe( pipe_cmov_mem );
13690 13712  %}
13691 13713  
13692 13714  // Compare 2 longs and CMOVE ptrs.
13693 13715  instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13694 13716    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13695 13717    match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13696 13718    ins_cost(200);
13697 13719    format %{ "CMOV$cmp $dst,$src" %}
13698 13720    opcode(0x0F,0x40);
13699 13721    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13700 13722    ins_pipe( pipe_cmov_reg );
13701 13723  %}
13702 13724  
13703 13725  // Compare 2 longs and CMOVE doubles
13704 13726  instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13705 13727    predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13706 13728    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13707 13729    ins_cost(200);
13708 13730    expand %{
13709 13731      fcmovD_regS(cmp,flags,dst,src);
13710 13732    %}
13711 13733  %}
13712 13734  
13713 13735  // Compare 2 longs and CMOVE doubles
13714 13736  instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
13715 13737    predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13716 13738    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13717 13739    ins_cost(200);
13718 13740    expand %{
13719 13741      fcmovXD_regS(cmp,flags,dst,src);
13720 13742    %}
13721 13743  %}
13722 13744  
13723 13745  instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13724 13746    predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13725 13747    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13726 13748    ins_cost(200);
13727 13749    expand %{
13728 13750      fcmovF_regS(cmp,flags,dst,src);
13729 13751    %}
13730 13752  %}
13731 13753  
13732 13754  
13733 13755  instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
13734 13756    predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13735 13757    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13736 13758    ins_cost(200);
13737 13759    expand %{
13738 13760      fcmovX_regS(cmp,flags,dst,src);
13739 13761    %}
13740 13762  %}
13741 13763  
13742 13764  
13743 13765  // ============================================================================
13744 13766  // Procedure Call/Return Instructions
13745 13767  // Call Java Static Instruction
13746 13768  // Note: If this code changes, the corresponding ret_addr_offset() and
13747 13769  //       compute_padding() functions will have to be adjusted.
13748 13770  instruct CallStaticJavaDirect(method meth) %{
13749 13771    match(CallStaticJava);
13750 13772    predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13751 13773    effect(USE meth);
13752 13774  
13753 13775    ins_cost(300);
13754 13776    format %{ "CALL,static " %}
13755 13777    opcode(0xE8); /* E8 cd */
13756 13778    ins_encode( pre_call_FPU,
13757 13779                Java_Static_Call( meth ),
13758 13780                call_epilog,
13759 13781                post_call_FPU );
13760 13782    ins_pipe( pipe_slow );
13761 13783    ins_pc_relative(1);
13762 13784    ins_alignment(4);
13763 13785  %}
13764 13786  
13765 13787  // Call Java Static Instruction (method handle version)
13766 13788  // Note: If this code changes, the corresponding ret_addr_offset() and
13767 13789  //       compute_padding() functions will have to be adjusted.
13768 13790  instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
13769 13791    match(CallStaticJava);
13770 13792    predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13771 13793    effect(USE meth);
13772 13794    // EBP is saved by all callees (for interpreter stack correction).
13773 13795    // We use it here for a similar purpose, in {preserve,restore}_SP.
13774 13796  
13775 13797    ins_cost(300);
13776 13798    format %{ "CALL,static/MethodHandle " %}
13777 13799    opcode(0xE8); /* E8 cd */
13778 13800    ins_encode( pre_call_FPU,
13779 13801                preserve_SP,
13780 13802                Java_Static_Call( meth ),
13781 13803                restore_SP,
13782 13804                call_epilog,
13783 13805                post_call_FPU );
13784 13806    ins_pipe( pipe_slow );
13785 13807    ins_pc_relative(1);
13786 13808    ins_alignment(4);
13787 13809  %}
13788 13810  
13789 13811  // Call Java Dynamic Instruction
13790 13812  // Note: If this code changes, the corresponding ret_addr_offset() and
13791 13813  //       compute_padding() functions will have to be adjusted.
13792 13814  instruct CallDynamicJavaDirect(method meth) %{
13793 13815    match(CallDynamicJava);
13794 13816    effect(USE meth);
13795 13817  
13796 13818    ins_cost(300);
13797 13819    format %{ "MOV    EAX,(oop)-1\n\t"
13798 13820              "CALL,dynamic" %}
13799 13821    opcode(0xE8); /* E8 cd */
13800 13822    ins_encode( pre_call_FPU,
13801 13823                Java_Dynamic_Call( meth ),
13802 13824                call_epilog,
13803 13825                post_call_FPU );
13804 13826    ins_pipe( pipe_slow );
13805 13827    ins_pc_relative(1);
13806 13828    ins_alignment(4);
13807 13829  %}
13808 13830  
13809 13831  // Call Runtime Instruction
13810 13832  instruct CallRuntimeDirect(method meth) %{
13811 13833    match(CallRuntime );
13812 13834    effect(USE meth);
13813 13835  
13814 13836    ins_cost(300);
13815 13837    format %{ "CALL,runtime " %}
13816 13838    opcode(0xE8); /* E8 cd */
13817 13839    // Use FFREEs to clear entries in float stack
13818 13840    ins_encode( pre_call_FPU,
13819 13841                FFree_Float_Stack_All,
13820 13842                Java_To_Runtime( meth ),
13821 13843                post_call_FPU );
13822 13844    ins_pipe( pipe_slow );
13823 13845    ins_pc_relative(1);
13824 13846  %}
13825 13847  
13826 13848  // Call runtime without safepoint
13827 13849  instruct CallLeafDirect(method meth) %{
13828 13850    match(CallLeaf);
13829 13851    effect(USE meth);
13830 13852  
13831 13853    ins_cost(300);
13832 13854    format %{ "CALL_LEAF,runtime " %}
13833 13855    opcode(0xE8); /* E8 cd */
13834 13856    ins_encode( pre_call_FPU,
13835 13857                FFree_Float_Stack_All,
13836 13858                Java_To_Runtime( meth ),
13837 13859                Verify_FPU_For_Leaf, post_call_FPU );
13838 13860    ins_pipe( pipe_slow );
13839 13861    ins_pc_relative(1);
13840 13862  %}
13841 13863  
13842 13864  instruct CallLeafNoFPDirect(method meth) %{
13843 13865    match(CallLeafNoFP);
13844 13866    effect(USE meth);
13845 13867  
13846 13868    ins_cost(300);
13847 13869    format %{ "CALL_LEAF_NOFP,runtime " %}
13848 13870    opcode(0xE8); /* E8 cd */
13849 13871    ins_encode(Java_To_Runtime(meth));
13850 13872    ins_pipe( pipe_slow );
13851 13873    ins_pc_relative(1);
13852 13874  %}
13853 13875  
13854 13876  
13855 13877  // Return Instruction
13856 13878  // Remove the return address & jump to it.
13857 13879  instruct Ret() %{
13858 13880    match(Return);
13859 13881    format %{ "RET" %}
13860 13882    opcode(0xC3);
13861 13883    ins_encode(OpcP);
13862 13884    ins_pipe( pipe_jmp );
13863 13885  %}
13864 13886  
13865 13887  // Tail Call; Jump from runtime stub to Java code.
13866 13888  // Also known as an 'interprocedural jump'.
13867 13889  // Target of jump will eventually return to caller.
13868 13890  // TailJump below removes the return address.
13869 13891  instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13870 13892    match(TailCall jump_target method_oop );
13871 13893    ins_cost(300);
13872 13894    format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13873 13895    opcode(0xFF, 0x4);  /* Opcode FF /4 */
13874 13896    ins_encode( OpcP, RegOpc(jump_target) );
13875 13897    ins_pipe( pipe_jmp );
13876 13898  %}
13877 13899  
13878 13900  
13879 13901  // Tail Jump; remove the return address; jump to target.
13880 13902  // TailCall above leaves the return address around.
13881 13903  instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13882 13904    match( TailJump jump_target ex_oop );
13883 13905    ins_cost(300);
13884 13906    format %{ "POP    EDX\t# pop return address into dummy\n\t"
13885 13907              "JMP    $jump_target " %}
13886 13908    opcode(0xFF, 0x4);  /* Opcode FF /4 */
13887 13909    ins_encode( enc_pop_rdx,
13888 13910                OpcP, RegOpc(jump_target) );
13889 13911    ins_pipe( pipe_jmp );
13890 13912  %}
13891 13913  
13892 13914  // Create exception oop: created by stack-crawling runtime code.
13893 13915  // Created exception is now available to this handler, and is setup
13894 13916  // just prior to jumping to this handler.  No code emitted.
13895 13917  instruct CreateException( eAXRegP ex_oop )
13896 13918  %{
13897 13919    match(Set ex_oop (CreateEx));
13898 13920  
13899 13921    size(0);
13900 13922    // use the following format syntax
13901 13923    format %{ "# exception oop is in EAX; no code emitted" %}
13902 13924    ins_encode();
13903 13925    ins_pipe( empty );
13904 13926  %}
13905 13927  
13906 13928  
13907 13929  // Rethrow exception:
13908 13930  // The exception oop will come in the first argument position.
13909 13931  // Then JUMP (not call) to the rethrow stub code.
13910 13932  instruct RethrowException()
13911 13933  %{
13912 13934    match(Rethrow);
13913 13935  
13914 13936    // use the following format syntax
13915 13937    format %{ "JMP    rethrow_stub" %}
13916 13938    ins_encode(enc_rethrow);
13917 13939    ins_pipe( pipe_jmp );
13918 13940  %}
13919 13941  
13920 13942  // inlined locking and unlocking
13921 13943  
13922 13944  
13923 13945  instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
13924 13946    match( Set cr (FastLock object box) );
13925 13947    effect( TEMP tmp, TEMP scr );
13926 13948    ins_cost(300);
13927 13949    format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
13928 13950    ins_encode( Fast_Lock(object,box,tmp,scr) );
13929 13951    ins_pipe( pipe_slow );
13930 13952    ins_pc_relative(1);
13931 13953  %}
13932 13954  
13933 13955  instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13934 13956    match( Set cr (FastUnlock object box) );
13935 13957    effect( TEMP tmp );
13936 13958    ins_cost(300);
13937 13959    format %{ "FASTUNLOCK $object, $box, $tmp" %}
13938 13960    ins_encode( Fast_Unlock(object,box,tmp) );
13939 13961    ins_pipe( pipe_slow );
13940 13962    ins_pc_relative(1);
13941 13963  %}
13942 13964  
13943 13965  
13944 13966  
13945 13967  // ============================================================================
13946 13968  // Safepoint Instruction
13947 13969  instruct safePoint_poll(eFlagsReg cr) %{
13948 13970    match(SafePoint);
13949 13971    effect(KILL cr);
13950 13972  
13951 13973    // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13952 13974    // On SPARC that might be acceptable as we can generate the address with
13953 13975    // just a sethi, saving an or.  By polling at offset 0 we can end up
13954 13976    // putting additional pressure on the index-0 in the D$.  Because of
13955 13977    // alignment (just like the situation at hand) the lower indices tend
13956 13978    // to see more traffic.  It'd be better to change the polling address
13957 13979    // to offset 0 of the last $line in the polling page.
13958 13980  
13959 13981    format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13960 13982    ins_cost(125);
13961 13983    size(6) ;
13962 13984    ins_encode( Safepoint_Poll() );
13963 13985    ins_pipe( ialu_reg_mem );
13964 13986  %}
13965 13987  
13966 13988  //----------PEEPHOLE RULES-----------------------------------------------------
13967 13989  // These must follow all instruction definitions as they use the names
13968 13990  // defined in the instructions definitions.
13969 13991  //
13970 13992  // peepmatch ( root_instr_name [preceding_instruction]* );
13971 13993  //
13972 13994  // peepconstraint %{
13973 13995  // (instruction_number.operand_name relational_op instruction_number.operand_name
13974 13996  //  [, ...] );
13975 13997  // // instruction numbers are zero-based using left to right order in peepmatch
13976 13998  //
13977 13999  // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13978 14000  // // provide an instruction_number.operand_name for each operand that appears
13979 14001  // // in the replacement instruction's match rule
13980 14002  //
13981 14003  // ---------VM FLAGS---------------------------------------------------------
13982 14004  //
13983 14005  // All peephole optimizations can be turned off using -XX:-OptoPeephole
13984 14006  //
13985 14007  // Each peephole rule is given an identifying number starting with zero and
13986 14008  // increasing by one in the order seen by the parser.  An individual peephole
13987 14009  // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13988 14010  // on the command-line.
13989 14011  //
13990 14012  // ---------CURRENT LIMITATIONS----------------------------------------------
13991 14013  //
13992 14014  // Only match adjacent instructions in same basic block
13993 14015  // Only equality constraints
13994 14016  // Only constraints between operands, not (0.dest_reg == EAX_enc)
13995 14017  // Only one replacement instruction
13996 14018  //
13997 14019  // ---------EXAMPLE----------------------------------------------------------
13998 14020  //
13999 14021  // // pertinent parts of existing instructions in architecture description
14000 14022  // instruct movI(eRegI dst, eRegI src) %{
14001 14023  //   match(Set dst (CopyI src));
14002 14024  // %}
14003 14025  //
14004 14026  // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
14005 14027  //   match(Set dst (AddI dst src));
14006 14028  //   effect(KILL cr);
14007 14029  // %}
14008 14030  //
14009 14031  // // Change (inc mov) to lea
14010 14032  // peephole %{
14011 14033  //   // increment preceeded by register-register move
14012 14034  //   peepmatch ( incI_eReg movI );
14013 14035  //   // require that the destination register of the increment
14014 14036  //   // match the destination register of the move
14015 14037  //   peepconstraint ( 0.dst == 1.dst );
14016 14038  //   // construct a replacement instruction that sets
14017 14039  //   // the destination to ( move's source register + one )
14018 14040  //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14019 14041  // %}
14020 14042  //
14021 14043  // Implementation no longer uses movX instructions since
14022 14044  // machine-independent system no longer uses CopyX nodes.
14023 14045  //
14024 14046  // peephole %{
14025 14047  //   peepmatch ( incI_eReg movI );
14026 14048  //   peepconstraint ( 0.dst == 1.dst );
14027 14049  //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14028 14050  // %}
14029 14051  //
14030 14052  // peephole %{
14031 14053  //   peepmatch ( decI_eReg movI );
14032 14054  //   peepconstraint ( 0.dst == 1.dst );
14033 14055  //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14034 14056  // %}
14035 14057  //
14036 14058  // peephole %{
14037 14059  //   peepmatch ( addI_eReg_imm movI );
14038 14060  //   peepconstraint ( 0.dst == 1.dst );
14039 14061  //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14040 14062  // %}
14041 14063  //
14042 14064  // peephole %{
14043 14065  //   peepmatch ( addP_eReg_imm movP );
14044 14066  //   peepconstraint ( 0.dst == 1.dst );
14045 14067  //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14046 14068  // %}
14047 14069  
14048 14070  // // Change load of spilled value to only a spill
14049 14071  // instruct storeI(memory mem, eRegI src) %{
14050 14072  //   match(Set mem (StoreI mem src));
14051 14073  // %}
14052 14074  //
14053 14075  // instruct loadI(eRegI dst, memory mem) %{
14054 14076  //   match(Set dst (LoadI mem));
14055 14077  // %}
14056 14078  //
14057 14079  peephole %{
14058 14080    peepmatch ( loadI storeI );
14059 14081    peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14060 14082    peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14061 14083  %}
14062 14084  
14063 14085  //----------SMARTSPILL RULES---------------------------------------------------
14064 14086  // These must follow all instruction definitions as they use the names
14065 14087  // defined in the instructions definitions.

↓ open down ↓

1104 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX