6961690 Wdiff src/cpu/x86/vm/x86_32.ad

Print this page

rev 1839 : 6961690: load oops from constant table on SPARC
Summary: oops should be loaded from the constant table of an nmethod instead of materializing them with a long code sequence.
Reviewed-by:

Split	Close
Expand all
Collapse all

          --- old/src/cpu/x86/vm/x86_32.ad
          +++ new/src/cpu/x86/vm/x86_32.ad

   1    1  //
   2    2  // Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
   3    3  // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4  //
   5    5  // This code is free software; you can redistribute it and/or modify it
   6    6  // under the terms of the GNU General Public License version 2 only, as
   7    7  // published by the Free Software Foundation.
   8    8  //
   9    9  // This code is distributed in the hope that it will be useful, but WITHOUT
  10   10  // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11  // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12  // version 2 for more details (a copy is included in the LICENSE file that
  13   13  // accompanied this code).
  14   14  //
  15   15  // You should have received a copy of the GNU General Public License version
  16   16  // 2 along with this work; if not, write to the Free Software Foundation,
  17   17  // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18  //
  19   19  // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20  // or visit www.oracle.com if you need additional information or have any
  21   21  // questions.
  22   22  //
  23   23  //
  24   24  
  25   25  // X86 Architecture Description File
  26   26  
  27   27  //----------REGISTER DEFINITION BLOCK------------------------------------------
  28   28  // This information is used by the matcher and the register allocator to
  29   29  // describe individual registers and classes of registers within the target
  30   30  // archtecture.
  31   31  
  32   32  register %{
  33   33  //----------Architecture Description Register Definitions----------------------
  34   34  // General Registers
  35   35  // "reg_def"  name ( register save type, C convention save type,
  36   36  //                   ideal register type, encoding );
  37   37  // Register Save Types:
  38   38  //
  39   39  // NS  = No-Save:       The register allocator assumes that these registers
  40   40  //                      can be used without saving upon entry to the method, &
  41   41  //                      that they do not need to be saved at call sites.
  42   42  //
  43   43  // SOC = Save-On-Call:  The register allocator assumes that these registers
  44   44  //                      can be used without saving upon entry to the method,
  45   45  //                      but that they must be saved at call sites.
  46   46  //
  47   47  // SOE = Save-On-Entry: The register allocator assumes that these registers
  48   48  //                      must be saved before using them upon entry to the
  49   49  //                      method, but they do not need to be saved at call
  50   50  //                      sites.
  51   51  //
  52   52  // AS  = Always-Save:   The register allocator assumes that these registers
  53   53  //                      must be saved before using them upon entry to the
  54   54  //                      method, & that they must be saved at call sites.
  55   55  //
  56   56  // Ideal Register Type is used to determine how to save & restore a
  57   57  // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58   58  // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59   59  //
  60   60  // The encoding number is the actual bit-pattern placed into the opcodes.
  61   61  
  62   62  // General Registers
  63   63  // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64   64  // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65   65  // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66   66  
  67   67  reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68   68  reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69   69  reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70   70  reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71   71  // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72   72  reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73   73  reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74   74  reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75   75  reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76   76  
  77   77  // Special Registers
  78   78  reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
  79   79  
  80   80  // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  81   81  // allocator, and only shows up in the encodings.
  82   82  reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  83   83  reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  84   84  // Ok so here's the trick FPR1 is really st(0) except in the midst
  85   85  // of emission of assembly for a machnode. During the emission the fpu stack
  86   86  // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  87   87  // the stack will not have this element so FPR1 == st(0) from the
  88   88  // oopMap viewpoint. This same weirdness with numbering causes
  89   89  // instruction encoding to have to play games with the register
  90   90  // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  91   91  // where it does flt->flt moves to see an example
  92   92  //
  93   93  reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  94   94  reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  95   95  reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  96   96  reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  97   97  reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  98   98  reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  99   99  reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
 100  100  reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
 101  101  reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
 102  102  reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 103  103  reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 104  104  reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 105  105  reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 106  106  reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 107  107  
 108  108  // XMM registers.  128-bit registers or 4 words each, labeled a-d.
 109  109  // Word a in each register holds a Float, words ab hold a Double.
 110  110  // We currently do not use the SIMD capabilities, so registers cd
 111  111  // are unused at the moment.
 112  112  reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
 113  113  reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
 114  114  reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
 115  115  reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
 116  116  reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 117  117  reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
 118  118  reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 119  119  reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
 120  120  reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 121  121  reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
 122  122  reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 123  123  reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
 124  124  reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 125  125  reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
 126  126  reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 127  127  reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
 128  128  
 129  129  // Specify priority of register selection within phases of register
 130  130  // allocation.  Highest priority is first.  A useful heuristic is to
 131  131  // give registers a low priority when they are required by machine
 132  132  // instructions, like EAX and EDX.  Registers which are used as
 133  133  // pairs must fall on an even boundary (witness the FPR#L's in this list).
 134  134  // For the Intel integer registers, the equivalent Long pairs are
 135  135  // EDX:EAX, EBX:ECX, and EDI:EBP.
 136  136  alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 137  137                      FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 138  138                      FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 139  139                      FPR6L, FPR6H, FPR7L, FPR7H );
 140  140  
 141  141  alloc_class chunk1( XMM0a, XMM0b,
 142  142                      XMM1a, XMM1b,
 143  143                      XMM2a, XMM2b,
 144  144                      XMM3a, XMM3b,
 145  145                      XMM4a, XMM4b,
 146  146                      XMM5a, XMM5b,
 147  147                      XMM6a, XMM6b,
 148  148                      XMM7a, XMM7b, EFLAGS);
 149  149  
 150  150  
 151  151  //----------Architecture Description Register Classes--------------------------
 152  152  // Several register classes are automatically defined based upon information in
 153  153  // this architecture description.
 154  154  // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 155  155  // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 156  156  // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 157  157  // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 158  158  //
 159  159  // Class for all registers
 160  160  reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 161  161  // Class for general registers
 162  162  reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 163  163  // Class for general registers which may be used for implicit null checks on win95
 164  164  // Also safe for use by tailjump. We don't want to allocate in rbp,
 165  165  reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
 166  166  // Class of "X" registers
 167  167  reg_class x_reg(EBX, ECX, EDX, EAX);
 168  168  // Class of registers that can appear in an address with no offset.
 169  169  // EBP and ESP require an extra instruction byte for zero offset.
 170  170  // Used in fast-unlock
 171  171  reg_class p_reg(EDX, EDI, ESI, EBX);
 172  172  // Class for general registers not including ECX
 173  173  reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
 174  174  // Class for general registers not including EAX
 175  175  reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 176  176  // Class for general registers not including EAX or EBX.
 177  177  reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
 178  178  // Class of EAX (for multiply and divide operations)
 179  179  reg_class eax_reg(EAX);
 180  180  // Class of EBX (for atomic add)
 181  181  reg_class ebx_reg(EBX);
 182  182  // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 183  183  reg_class ecx_reg(ECX);
 184  184  // Class of EDX (for multiply and divide operations)
 185  185  reg_class edx_reg(EDX);
 186  186  // Class of EDI (for synchronization)
 187  187  reg_class edi_reg(EDI);
 188  188  // Class of ESI (for synchronization)
 189  189  reg_class esi_reg(ESI);
 190  190  // Singleton class for interpreter's stack pointer
 191  191  reg_class ebp_reg(EBP);
 192  192  // Singleton class for stack pointer
 193  193  reg_class sp_reg(ESP);
 194  194  // Singleton class for instruction pointer
 195  195  // reg_class ip_reg(EIP);
 196  196  // Singleton class for condition codes
 197  197  reg_class int_flags(EFLAGS);
 198  198  // Class of integer register pairs
 199  199  reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
 200  200  // Class of integer register pairs that aligns with calling convention
 201  201  reg_class eadx_reg( EAX,EDX );
 202  202  reg_class ebcx_reg( ECX,EBX );
 203  203  // Not AX or DX, used in divides
 204  204  reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
 205  205  
 206  206  // Floating point registers.  Notice FPR0 is not a choice.
 207  207  // FPR0 is not ever allocated; we use clever encodings to fake
 208  208  // a 2-address instructions out of Intels FP stack.
 209  209  reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 210  210  
 211  211  // make a register class for SSE registers
 212  212  reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
 213  213  
 214  214  // make a double register class for SSE2 registers
 215  215  reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
 216  216                    XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
 217  217  
 218  218  reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 219  219                     FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 220  220                     FPR7L,FPR7H );
 221  221  
 222  222  reg_class flt_reg0( FPR1L );
 223  223  reg_class dbl_reg0( FPR1L,FPR1H );
 224  224  reg_class dbl_reg1( FPR2L,FPR2H );
 225  225  reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 226  226                         FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 227  227  
 228  228  // XMM6 and XMM7 could be used as temporary registers for long, float and
 229  229  // double values for SSE2.
 230  230  reg_class xdb_reg6( XMM6a,XMM6b );
 231  231  reg_class xdb_reg7( XMM7a,XMM7b );
 232  232  %}
 233  233  
 234  234  
 235  235  //----------SOURCE BLOCK-------------------------------------------------------
 236  236  // This is a block of C++ code which provides values, functions, and
 237  237  // definitions necessary in the rest of the architecture description
 238  238  source_hpp %{
 239  239  // Must be visible to the DFA in dfa_x86_32.cpp
 240  240  extern bool is_operand_hi32_zero(Node* n);
 241  241  %}
 242  242  
 243  243  source %{
 244  244  #define   RELOC_IMM32    Assembler::imm_operand
 245  245  #define   RELOC_DISP32   Assembler::disp32_operand
 246  246  
 247  247  #define __ _masm.
 248  248  
 249  249  // How to find the high register of a Long pair, given the low register
 250  250  #define   HIGH_FROM_LOW(x) ((x)+2)
 251  251  
 252  252  // These masks are used to provide 128-bit aligned bitmasks to the XMM
 253  253  // instructions, to allow sign-masking or sign-bit flipping.  They allow
 254  254  // fast versions of NegF/NegD and AbsF/AbsD.
 255  255  
 256  256  // Note: 'double' and 'long long' have 32-bits alignment on x86.
 257  257  static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 258  258    // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 259  259    // of 128-bits operands for SSE instructions.
 260  260    jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 261  261    // Store the value to a 128-bits operand.
 262  262    operand[0] = lo;
 263  263    operand[1] = hi;
 264  264    return operand;
 265  265  }
 266  266  
 267  267  // Buffer for 128-bits masks used by SSE instructions.
 268  268  static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 269  269  
 270  270  // Static initialization during VM startup.
 271  271  static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 272  272  static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 273  273  static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 274  274  static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 275  275  
 276  276  // Offset hacking within calls.
 277  277  static int pre_call_FPU_size() {
 278  278    if (Compile::current()->in_24_bit_fp_mode())
 279  279      return 6; // fldcw
 280  280    return 0;
 281  281  }
 282  282  
 283  283  static int preserve_SP_size() {
 284  284    return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 285  285  }
 286  286  
 287  287  // !!!!! Special hack to get all type of calls to specify the byte offset
 288  288  //       from the start of the call to the point where the return address
 289  289  //       will point.
 290  290  int MachCallStaticJavaNode::ret_addr_offset() {
 291  291    int offset = 5 + pre_call_FPU_size();  // 5 bytes from start of call to where return address points
 292  292    if (_method_handle_invoke)
 293  293      offset += preserve_SP_size();
 294  294    return offset;
 295  295  }
 296  296  
 297  297  int MachCallDynamicJavaNode::ret_addr_offset() {
 298  298    return 10 + pre_call_FPU_size();  // 10 bytes from start of call to where return address points
 299  299  }
 300  300  
 301  301  static int sizeof_FFree_Float_Stack_All = -1;
 302  302  
 303  303  int MachCallRuntimeNode::ret_addr_offset() {
 304  304    assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 305  305    return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
 306  306  }
 307  307  
 308  308  // Indicate if the safepoint node needs the polling page as an input.
 309  309  // Since x86 does have absolute addressing, it doesn't.
 310  310  bool SafePointNode::needs_polling_address_input() {
 311  311    return false;
 312  312  }
 313  313  
 314  314  //
 315  315  // Compute padding required for nodes which need alignment
 316  316  //
 317  317  
 318  318  // The address of the call instruction needs to be 4-byte aligned to
 319  319  // ensure that it does not span a cache line so that it can be patched.
 320  320  int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 321  321    current_offset += pre_call_FPU_size();  // skip fldcw, if any
 322  322    current_offset += 1;      // skip call opcode byte
 323  323    return round_to(current_offset, alignment_required()) - current_offset;
 324  324  }
 325  325  
 326  326  // The address of the call instruction needs to be 4-byte aligned to
 327  327  // ensure that it does not span a cache line so that it can be patched.
 328  328  int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
 329  329    current_offset += pre_call_FPU_size();  // skip fldcw, if any
 330  330    current_offset += preserve_SP_size();   // skip mov rbp, rsp
 331  331    current_offset += 1;      // skip call opcode byte
 332  332    return round_to(current_offset, alignment_required()) - current_offset;
 333  333  }
 334  334  
 335  335  // The address of the call instruction needs to be 4-byte aligned to
 336  336  // ensure that it does not span a cache line so that it can be patched.
 337  337  int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338  338    current_offset += pre_call_FPU_size();  // skip fldcw, if any
 339  339    current_offset += 5;      // skip MOV instruction
 340  340    current_offset += 1;      // skip call opcode byte
 341  341    return round_to(current_offset, alignment_required()) - current_offset;
 342  342  }
 343  343  
 344  344  #ifndef PRODUCT
 345  345  void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
 346  346    st->print("INT3");
 347  347  }
 348  348  #endif
 349  349  
 350  350  // EMIT_RM()
 351  351  void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 352  352    unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 353  353    cbuf.insts()->emit_int8(c);
 354  354  }
 355  355  
 356  356  // EMIT_CC()
 357  357  void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 358  358    unsigned char c = (unsigned char)( f1 | f2 );
 359  359    cbuf.insts()->emit_int8(c);
 360  360  }
 361  361  
 362  362  // EMIT_OPCODE()
 363  363  void emit_opcode(CodeBuffer &cbuf, int code) {
 364  364    cbuf.insts()->emit_int8((unsigned char) code);
 365  365  }
 366  366  
 367  367  // EMIT_OPCODE() w/ relocation information
 368  368  void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 369  369    cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 370  370    emit_opcode(cbuf, code);
 371  371  }
 372  372  
 373  373  // EMIT_D8()
 374  374  void emit_d8(CodeBuffer &cbuf, int d8) {
 375  375    cbuf.insts()->emit_int8((unsigned char) d8);
 376  376  }
 377  377  
 378  378  // EMIT_D16()
 379  379  void emit_d16(CodeBuffer &cbuf, int d16) {
 380  380    cbuf.insts()->emit_int16(d16);
 381  381  }
 382  382  
 383  383  // EMIT_D32()
 384  384  void emit_d32(CodeBuffer &cbuf, int d32) {
 385  385    cbuf.insts()->emit_int32(d32);
 386  386  }
 387  387  
 388  388  // emit 32 bit value and construct relocation entry from relocInfo::relocType
 389  389  void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 390  390          int format) {
 391  391    cbuf.relocate(cbuf.insts_mark(), reloc, format);
 392  392    cbuf.insts()->emit_int32(d32);
 393  393  }
 394  394  
 395  395  // emit 32 bit value and construct relocation entry from RelocationHolder
 396  396  void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 397  397          int format) {
 398  398  #ifdef ASSERT
 399  399    if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 400  400      assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 401  401    }
 402  402  #endif
 403  403    cbuf.relocate(cbuf.insts_mark(), rspec, format);
 404  404    cbuf.insts()->emit_int32(d32);
 405  405  }
 406  406  
 407  407  // Access stack slot for load or store
 408  408  void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 409  409    emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 410  410    if( -128 <= disp && disp <= 127 ) {
 411  411      emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 412  412      emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 413  413      emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 414  414    } else {
 415  415      emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 416  416      emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 417  417      emit_d32(cbuf, disp);     // Displacement  // R/M byte
 418  418    }
 419  419  }
 420  420  
 421  421     // eRegI ereg, memory mem) %{    // emit_reg_mem
 422  422  void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
 423  423    // There is no index & no scale, use form without SIB byte
 424  424    if ((index == 0x4) &&
 425  425        (scale == 0) && (base != ESP_enc)) {
 426  426      // If no displacement, mode is 0x0; unless base is [EBP]
 427  427      if ( (displace == 0) && (base != EBP_enc) ) {
 428  428        emit_rm(cbuf, 0x0, reg_encoding, base);
 429  429      }
 430  430      else {                    // If 8-bit displacement, mode 0x1
 431  431        if ((displace >= -128) && (displace <= 127)
 432  432            && !(displace_is_oop) ) {
 433  433          emit_rm(cbuf, 0x1, reg_encoding, base);
 434  434          emit_d8(cbuf, displace);
 435  435        }
 436  436        else {                  // If 32-bit displacement
 437  437          if (base == -1) { // Special flag for absolute address
 438  438            emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 439  439            // (manual lies; no SIB needed here)
 440  440            if ( displace_is_oop ) {
 441  441              emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 442  442            } else {
 443  443              emit_d32      (cbuf, displace);
 444  444            }
 445  445          }
 446  446          else {                // Normal base + offset
 447  447            emit_rm(cbuf, 0x2, reg_encoding, base);
 448  448            if ( displace_is_oop ) {
 449  449              emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 450  450            } else {
 451  451              emit_d32      (cbuf, displace);
 452  452            }
 453  453          }
 454  454        }
 455  455      }
 456  456    }
 457  457    else {                      // Else, encode with the SIB byte
 458  458      // If no displacement, mode is 0x0; unless base is [EBP]
 459  459      if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 460  460        emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 461  461        emit_rm(cbuf, scale, index, base);
 462  462      }
 463  463      else {                    // If 8-bit displacement, mode 0x1
 464  464        if ((displace >= -128) && (displace <= 127)
 465  465            && !(displace_is_oop) ) {
 466  466          emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 467  467          emit_rm(cbuf, scale, index, base);
 468  468          emit_d8(cbuf, displace);
 469  469        }
 470  470        else {                  // If 32-bit displacement
 471  471          if (base == 0x04 ) {
 472  472            emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 473  473            emit_rm(cbuf, scale, index, 0x04);
 474  474          } else {
 475  475            emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 476  476            emit_rm(cbuf, scale, index, base);
 477  477          }
 478  478          if ( displace_is_oop ) {
 479  479            emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 480  480          } else {
 481  481            emit_d32      (cbuf, displace);
 482  482          }
 483  483        }
 484  484      }
 485  485    }
 486  486  }
 487  487  
 488  488  
 489  489  void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 490  490    if( dst_encoding == src_encoding ) {
 491  491      // reg-reg copy, use an empty encoding
 492  492    } else {
 493  493      emit_opcode( cbuf, 0x8B );
 494  494      emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 495  495    }
 496  496  }
 497  497  
 498  498  void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 499  499    if( dst_encoding == src_encoding ) {

↓ open down ↓

499 lines elided

↑ open up ↑

 500  500      // reg-reg copy, use an empty encoding
 501  501    } else {
 502  502      MacroAssembler _masm(&cbuf);
 503  503  
 504  504      __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 505  505    }
 506  506  }
 507  507  
 508  508  
 509  509  //=============================================================================
      510 +const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
      511 +
      512 +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
      513 +  // Empty encoding
      514 +}
      515 +
      516 +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
      517 +  return 0;
      518 +}
      519 +
      520 +#ifndef PRODUCT
      521 +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
      522 +  st->print("# MachConstantBaseNode (empty encoding)");
      523 +}
      524 +#endif
      525 +
      526 +
      527 +//=============================================================================
 510  528  #ifndef PRODUCT
 511  529  void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 512  530    Compile* C = ra_->C;
 513  531    if( C->in_24_bit_fp_mode() ) {
 514  532      st->print("FLDCW  24 bit fpu control word");
 515  533      st->print_cr(""); st->print("\t");
 516  534    }
 517  535  
 518  536    int framesize = C->frame_slots() << LogBytesPerInt;
 519  537    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");

 520  538    // Remove two words for return addr and rbp,
 521  539    framesize -= 2*wordSize;
 522  540  
 523  541    // Calls to C2R adapters often do not accept exceptional returns.
 524  542    // We require that their callers must bang for them.  But be careful, because
 525  543    // some VM calls (such as call site linkage) can use several kilobytes of
 526  544    // stack.  But the stack safety zone should account for that.
 527  545    // See bugs 4446381, 4468289, 4497237.
 528  546    if (C->need_stack_bang(framesize)) {
 529  547      st->print_cr("# stack bang"); st->print("\t");
 530  548    }
 531  549    st->print_cr("PUSHL  EBP"); st->print("\t");
 532  550  
 533  551    if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
 534  552      st->print("PUSH   0xBADB100D\t# Majik cookie for stack depth check");
 535  553      st->print_cr(""); st->print("\t");
 536  554      framesize -= wordSize;
 537  555    }
 538  556  
 539  557    if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
 540  558      if (framesize) {
 541  559        st->print("SUB    ESP,%d\t# Create frame",framesize);
 542  560      }
 543  561    } else {
 544  562      st->print("SUB    ESP,%d\t# Create frame",framesize);
 545  563    }
 546  564  }
 547  565  #endif
 548  566  
 549  567  
 550  568  void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 551  569    Compile* C = ra_->C;
 552  570  
 553  571    if (UseSSE >= 2 && VerifyFPU) {
 554  572      MacroAssembler masm(&cbuf);
 555  573      masm.verify_FPU(0, "FPU stack must be clean on entry");
 556  574    }
 557  575  
 558  576    // WARNING: Initial instruction MUST be 5 bytes or longer so that
 559  577    // NativeJump::patch_verified_entry will be able to patch out the entry
 560  578    // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 561  579    // depth is ok at 5 bytes, the frame allocation can be either 3 or
 562  580    // 6 bytes. So if we don't do the fldcw or the push then we must
 563  581    // use the 6 byte frame allocation even if we have no frame. :-(
 564  582    // If method sets FPU control word do it now
 565  583    if( C->in_24_bit_fp_mode() ) {
 566  584      MacroAssembler masm(&cbuf);
 567  585      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
 568  586    }
 569  587  
 570  588    int framesize = C->frame_slots() << LogBytesPerInt;
 571  589    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 572  590    // Remove two words for return addr and rbp,
 573  591    framesize -= 2*wordSize;
 574  592  
 575  593    // Calls to C2R adapters often do not accept exceptional returns.
 576  594    // We require that their callers must bang for them.  But be careful, because
 577  595    // some VM calls (such as call site linkage) can use several kilobytes of
 578  596    // stack.  But the stack safety zone should account for that.
 579  597    // See bugs 4446381, 4468289, 4497237.
 580  598    if (C->need_stack_bang(framesize)) {
 581  599      MacroAssembler masm(&cbuf);
 582  600      masm.generate_stack_overflow_check(framesize);
 583  601    }
 584  602  
 585  603    // We always push rbp, so that on return to interpreter rbp, will be
 586  604    // restored correctly and we can correct the stack.
 587  605    emit_opcode(cbuf, 0x50 | EBP_enc);
 588  606  
 589  607    if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
 590  608      emit_opcode(cbuf, 0x68); // push 0xbadb100d
 591  609      emit_d32(cbuf, 0xbadb100d);
 592  610      framesize -= wordSize;
 593  611    }
 594  612  
 595  613    if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
 596  614      if (framesize) {
 597  615        emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 598  616        emit_rm(cbuf, 0x3, 0x05, ESP_enc);
 599  617        emit_d8(cbuf, framesize);
 600  618      }
 601  619    } else {
 602  620      emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 603  621      emit_rm(cbuf, 0x3, 0x05, ESP_enc);
 604  622      emit_d32(cbuf, framesize);
 605  623    }
 606  624    C->set_frame_complete(cbuf.insts_size());
 607  625  
 608  626  #ifdef ASSERT
 609  627    if (VerifyStackAtCalls) {
 610  628      Label L;
 611  629      MacroAssembler masm(&cbuf);
 612  630      masm.push(rax);
 613  631      masm.mov(rax, rsp);
 614  632      masm.andptr(rax, StackAlignmentInBytes-1);
 615  633      masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 616  634      masm.pop(rax);
 617  635      masm.jcc(Assembler::equal, L);
 618  636      masm.stop("Stack is not properly aligned!");
 619  637      masm.bind(L);
 620  638    }
 621  639  #endif
 622  640  
 623  641  }
 624  642  
 625  643  uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 626  644    return MachNode::size(ra_); // too many variables; just compute it the hard way
 627  645  }
 628  646  
 629  647  int MachPrologNode::reloc() const {
 630  648    return 0; // a large enough number
 631  649  }
 632  650  
 633  651  //=============================================================================
 634  652  #ifndef PRODUCT
 635  653  void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 636  654    Compile *C = ra_->C;
 637  655    int framesize = C->frame_slots() << LogBytesPerInt;
 638  656    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 639  657    // Remove two words for return addr and rbp,
 640  658    framesize -= 2*wordSize;
 641  659  
 642  660    if( C->in_24_bit_fp_mode() ) {
 643  661      st->print("FLDCW  standard control word");
 644  662      st->cr(); st->print("\t");
 645  663    }
 646  664    if( framesize ) {
 647  665      st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 648  666      st->cr(); st->print("\t");
 649  667    }
 650  668    st->print_cr("POPL   EBP"); st->print("\t");
 651  669    if( do_polling() && C->is_method_compilation() ) {
 652  670      st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 653  671      st->cr(); st->print("\t");
 654  672    }
 655  673  }
 656  674  #endif
 657  675  
 658  676  void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 659  677    Compile *C = ra_->C;
 660  678  
 661  679    // If method set FPU control word, restore to standard control word
 662  680    if( C->in_24_bit_fp_mode() ) {
 663  681      MacroAssembler masm(&cbuf);
 664  682      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 665  683    }
 666  684  
 667  685    int framesize = C->frame_slots() << LogBytesPerInt;
 668  686    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 669  687    // Remove two words for return addr and rbp,
 670  688    framesize -= 2*wordSize;
 671  689  
 672  690    // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 673  691  
 674  692    if( framesize >= 128 ) {
 675  693      emit_opcode(cbuf, 0x81); // add  SP, #framesize
 676  694      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 677  695      emit_d32(cbuf, framesize);
 678  696    }
 679  697    else if( framesize ) {
 680  698      emit_opcode(cbuf, 0x83); // add  SP, #framesize
 681  699      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 682  700      emit_d8(cbuf, framesize);
 683  701    }
 684  702  
 685  703    emit_opcode(cbuf, 0x58 | EBP_enc);
 686  704  
 687  705    if( do_polling() && C->is_method_compilation() ) {
 688  706      cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 689  707      emit_opcode(cbuf,0x85);
 690  708      emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 691  709      emit_d32(cbuf, (intptr_t)os::get_polling_page());
 692  710    }
 693  711  }
 694  712  
 695  713  uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 696  714    Compile *C = ra_->C;
 697  715    // If method set FPU control word, restore to standard control word
 698  716    int size = C->in_24_bit_fp_mode() ? 6 : 0;
 699  717    if( do_polling() && C->is_method_compilation() ) size += 6;
 700  718  
 701  719    int framesize = C->frame_slots() << LogBytesPerInt;
 702  720    assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 703  721    // Remove two words for return addr and rbp,
 704  722    framesize -= 2*wordSize;
 705  723  
 706  724    size++; // popl rbp,
 707  725  
 708  726    if( framesize >= 128 ) {
 709  727      size += 6;
 710  728    } else {
 711  729      size += framesize ? 3 : 0;
 712  730    }
 713  731    return size;
 714  732  }
 715  733  
 716  734  int MachEpilogNode::reloc() const {
 717  735    return 0; // a large enough number
 718  736  }
 719  737  
 720  738  const Pipeline * MachEpilogNode::pipeline() const {
 721  739    return MachNode::pipeline_class();
 722  740  }
 723  741  
 724  742  int MachEpilogNode::safepoint_offset() const { return 0; }
 725  743  
 726  744  //=============================================================================
 727  745  
 728  746  enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 729  747  static enum RC rc_class( OptoReg::Name reg ) {
 730  748  
 731  749    if( !OptoReg::is_valid(reg)  ) return rc_bad;
 732  750    if (OptoReg::is_stack(reg)) return rc_stack;
 733  751  
 734  752    VMReg r = OptoReg::as_VMReg(reg);
 735  753    if (r->is_Register()) return rc_int;
 736  754    if (r->is_FloatRegister()) {
 737  755      assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 738  756      return rc_float;
 739  757    }
 740  758    assert(r->is_XMMRegister(), "must be");
 741  759    return rc_xmm;
 742  760  }
 743  761  
 744  762  static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 745  763                          int opcode, const char *op_str, int size, outputStream* st ) {
 746  764    if( cbuf ) {
 747  765      emit_opcode  (*cbuf, opcode );
 748  766      encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
 749  767  #ifndef PRODUCT
 750  768    } else if( !do_size ) {
 751  769      if( size != 0 ) st->print("\n\t");
 752  770      if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 753  771        if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 754  772        else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 755  773      } else { // FLD, FST, PUSH, POP
 756  774        st->print("%s [ESP + #%d]",op_str,offset);
 757  775      }
 758  776  #endif
 759  777    }
 760  778    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 761  779    return size+3+offset_size;
 762  780  }
 763  781  
 764  782  // Helper for XMM registers.  Extra opcode bits, limited syntax.
 765  783  static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 766  784                           int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 767  785    if( cbuf ) {
 768  786      if( reg_lo+1 == reg_hi ) { // double move?
 769  787        if( is_load && !UseXmmLoadAndClearUpper )
 770  788          emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
 771  789        else
 772  790          emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
 773  791      } else {
 774  792        emit_opcode(*cbuf, 0xF3 );
 775  793      }
 776  794      emit_opcode(*cbuf, 0x0F );
 777  795      if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
 778  796        emit_opcode(*cbuf, 0x12 );   // use 'movlpd' for load
 779  797      else
 780  798        emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
 781  799      encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
 782  800  #ifndef PRODUCT
 783  801    } else if( !do_size ) {
 784  802      if( size != 0 ) st->print("\n\t");
 785  803      if( reg_lo+1 == reg_hi ) { // double move?
 786  804        if( is_load ) st->print("%s %s,[ESP + #%d]",
 787  805                                 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 788  806                                 Matcher::regName[reg_lo], offset);
 789  807        else          st->print("MOVSD  [ESP + #%d],%s",
 790  808                                 offset, Matcher::regName[reg_lo]);
 791  809      } else {
 792  810        if( is_load ) st->print("MOVSS  %s,[ESP + #%d]",
 793  811                                 Matcher::regName[reg_lo], offset);
 794  812        else          st->print("MOVSS  [ESP + #%d],%s",
 795  813                                 offset, Matcher::regName[reg_lo]);
 796  814      }
 797  815  #endif
 798  816    }
 799  817    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 800  818    return size+5+offset_size;
 801  819  }
 802  820  
 803  821  
 804  822  static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 805  823                              int src_hi, int dst_hi, int size, outputStream* st ) {
 806  824    if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
 807  825      if( cbuf ) {
 808  826        if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
 809  827          emit_opcode(*cbuf, 0x66 );
 810  828        }
 811  829        emit_opcode(*cbuf, 0x0F );
 812  830        emit_opcode(*cbuf, 0x28 );
 813  831        emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 814  832  #ifndef PRODUCT
 815  833      } else if( !do_size ) {
 816  834        if( size != 0 ) st->print("\n\t");
 817  835        if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 818  836          st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 819  837        } else {
 820  838          st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 821  839        }
 822  840  #endif
 823  841      }
 824  842      return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
 825  843    } else {
 826  844      if( cbuf ) {
 827  845        emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
 828  846        emit_opcode(*cbuf, 0x0F );
 829  847        emit_opcode(*cbuf, 0x10 );
 830  848        emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 831  849  #ifndef PRODUCT
 832  850      } else if( !do_size ) {
 833  851        if( size != 0 ) st->print("\n\t");
 834  852        if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 835  853          st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 836  854        } else {
 837  855          st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 838  856        }
 839  857  #endif
 840  858      }
 841  859      return size+4;
 842  860    }
 843  861  }
 844  862  
 845  863  static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 846  864                              int src_hi, int dst_hi, int size, outputStream* st ) {
 847  865    // 32-bit
 848  866    if (cbuf) {
 849  867      emit_opcode(*cbuf, 0x66);
 850  868      emit_opcode(*cbuf, 0x0F);
 851  869      emit_opcode(*cbuf, 0x6E);
 852  870      emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
 853  871  #ifndef PRODUCT
 854  872    } else if (!do_size) {
 855  873      st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 856  874  #endif
 857  875    }
 858  876    return 4;
 859  877  }
 860  878  
 861  879  
 862  880  static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 863  881                                   int src_hi, int dst_hi, int size, outputStream* st ) {
 864  882    // 32-bit
 865  883    if (cbuf) {
 866  884      emit_opcode(*cbuf, 0x66);
 867  885      emit_opcode(*cbuf, 0x0F);
 868  886      emit_opcode(*cbuf, 0x7E);
 869  887      emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
 870  888  #ifndef PRODUCT
 871  889    } else if (!do_size) {
 872  890      st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 873  891  #endif
 874  892    }
 875  893    return 4;
 876  894  }
 877  895  
 878  896  static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 879  897    if( cbuf ) {
 880  898      emit_opcode(*cbuf, 0x8B );
 881  899      emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 882  900  #ifndef PRODUCT
 883  901    } else if( !do_size ) {
 884  902      if( size != 0 ) st->print("\n\t");
 885  903      st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 886  904  #endif
 887  905    }
 888  906    return size+2;
 889  907  }
 890  908  
 891  909  static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 892  910                                   int offset, int size, outputStream* st ) {
 893  911    if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 894  912      if( cbuf ) {
 895  913        emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 896  914        emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 897  915  #ifndef PRODUCT
 898  916      } else if( !do_size ) {
 899  917        if( size != 0 ) st->print("\n\t");
 900  918        st->print("FLD    %s",Matcher::regName[src_lo]);
 901  919  #endif
 902  920      }
 903  921      size += 2;
 904  922    }
 905  923  
 906  924    int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 907  925    const char *op_str;
 908  926    int op;
 909  927    if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 910  928      op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 911  929      op = 0xDD;
 912  930    } else {                   // 32-bit store
 913  931      op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 914  932      op = 0xD9;
 915  933      assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 916  934    }
 917  935  
 918  936    return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 919  937  }
 920  938  
 921  939  uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 922  940    // Get registers to move
 923  941    OptoReg::Name src_second = ra_->get_reg_second(in(1));
 924  942    OptoReg::Name src_first = ra_->get_reg_first(in(1));
 925  943    OptoReg::Name dst_second = ra_->get_reg_second(this );
 926  944    OptoReg::Name dst_first = ra_->get_reg_first(this );
 927  945  
 928  946    enum RC src_second_rc = rc_class(src_second);
 929  947    enum RC src_first_rc = rc_class(src_first);
 930  948    enum RC dst_second_rc = rc_class(dst_second);
 931  949    enum RC dst_first_rc = rc_class(dst_first);
 932  950  
 933  951    assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 934  952  
 935  953    // Generate spill code!
 936  954    int size = 0;
 937  955  
 938  956    if( src_first == dst_first && src_second == dst_second )
 939  957      return size;            // Self copy, no move
 940  958  
 941  959    // --------------------------------------
 942  960    // Check for mem-mem move.  push/pop to move.
 943  961    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 944  962      if( src_second == dst_first ) { // overlapping stack copy ranges
 945  963        assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 946  964        size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 947  965        size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 948  966        src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 949  967      }
 950  968      // move low bits
 951  969      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 952  970      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 953  971      if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 954  972        size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 955  973        size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 956  974      }
 957  975      return size;
 958  976    }
 959  977  
 960  978    // --------------------------------------
 961  979    // Check for integer reg-reg copy
 962  980    if( src_first_rc == rc_int && dst_first_rc == rc_int )
 963  981      size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 964  982  
 965  983    // Check for integer store
 966  984    if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 967  985      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 968  986  
 969  987    // Check for integer load
 970  988    if( dst_first_rc == rc_int && src_first_rc == rc_stack )
 971  989      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 972  990  
 973  991    // Check for integer reg-xmm reg copy
 974  992    if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 975  993      assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 976  994              "no 64 bit integer-float reg moves" );
 977  995      return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 978  996    }
 979  997    // --------------------------------------
 980  998    // Check for float reg-reg copy
 981  999    if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 982 1000      assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 983 1001              (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 984 1002      if( cbuf ) {
 985 1003  
 986 1004        // Note the mucking with the register encode to compensate for the 0/1
 987 1005        // indexing issue mentioned in a comment in the reg_def sections
 988 1006        // for FPR registers many lines above here.
 989 1007  
 990 1008        if( src_first != FPR1L_num ) {
 991 1009          emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 992 1010          emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 993 1011          emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 994 1012          emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 995 1013       } else {
 996 1014          emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 997 1015          emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 998 1016       }
 999 1017  #ifndef PRODUCT
1000 1018      } else if( !do_size ) {
1001 1019        if( size != 0 ) st->print("\n\t");
1002 1020        if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1003 1021        else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1004 1022  #endif
1005 1023      }
1006 1024      return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1007 1025    }
1008 1026  
1009 1027    // Check for float store
1010 1028    if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1011 1029      return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1012 1030    }
1013 1031  
1014 1032    // Check for float load
1015 1033    if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1016 1034      int offset = ra_->reg2offset(src_first);
1017 1035      const char *op_str;
1018 1036      int op;
1019 1037      if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1020 1038        op_str = "FLD_D";
1021 1039        op = 0xDD;
1022 1040      } else {                   // 32-bit load
1023 1041        op_str = "FLD_S";
1024 1042        op = 0xD9;
1025 1043        assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1026 1044      }
1027 1045      if( cbuf ) {
1028 1046        emit_opcode  (*cbuf, op );
1029 1047        encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
1030 1048        emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1031 1049        emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1032 1050  #ifndef PRODUCT
1033 1051      } else if( !do_size ) {
1034 1052        if( size != 0 ) st->print("\n\t");
1035 1053        st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1036 1054  #endif
1037 1055      }
1038 1056      int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1039 1057      return size + 3+offset_size+2;
1040 1058    }
1041 1059  
1042 1060    // Check for xmm reg-reg copy
1043 1061    if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1044 1062      assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1045 1063              (src_first+1 == src_second && dst_first+1 == dst_second),
1046 1064              "no non-adjacent float-moves" );
1047 1065      return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1048 1066    }
1049 1067  
1050 1068    // Check for xmm reg-integer reg copy
1051 1069    if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1052 1070      assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1053 1071              "no 64 bit float-integer reg moves" );
1054 1072      return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1055 1073    }
1056 1074  
1057 1075    // Check for xmm store
1058 1076    if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1059 1077      return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1060 1078    }
1061 1079  
1062 1080    // Check for float xmm load
1063 1081    if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1064 1082      return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1065 1083    }
1066 1084  
1067 1085    // Copy from float reg to xmm reg
1068 1086    if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1069 1087      // copy to the top of stack from floating point reg
1070 1088      // and use LEA to preserve flags
1071 1089      if( cbuf ) {
1072 1090        emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1073 1091        emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1074 1092        emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1075 1093        emit_d8(*cbuf,0xF8);
1076 1094  #ifndef PRODUCT
1077 1095      } else if( !do_size ) {
1078 1096        if( size != 0 ) st->print("\n\t");
1079 1097        st->print("LEA    ESP,[ESP-8]");
1080 1098  #endif
1081 1099      }
1082 1100      size += 4;
1083 1101  
1084 1102      size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1085 1103  
1086 1104      // Copy from the temp memory to the xmm reg.
1087 1105      size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1088 1106  
1089 1107      if( cbuf ) {
1090 1108        emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1091 1109        emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1092 1110        emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1093 1111        emit_d8(*cbuf,0x08);
1094 1112  #ifndef PRODUCT
1095 1113      } else if( !do_size ) {
1096 1114        if( size != 0 ) st->print("\n\t");
1097 1115        st->print("LEA    ESP,[ESP+8]");
1098 1116  #endif
1099 1117      }
1100 1118      size += 4;
1101 1119      return size;
1102 1120    }
1103 1121  
1104 1122    assert( size > 0, "missed a case" );
1105 1123  
1106 1124    // --------------------------------------------------------------------
1107 1125    // Check for second bits still needing moving.
1108 1126    if( src_second == dst_second )
1109 1127      return size;               // Self copy; no move
1110 1128    assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1111 1129  
1112 1130    // Check for second word int-int move
1113 1131    if( src_second_rc == rc_int && dst_second_rc == rc_int )
1114 1132      return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1115 1133  
1116 1134    // Check for second word integer store
1117 1135    if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1118 1136      return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1119 1137  
1120 1138    // Check for second word integer load
1121 1139    if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1122 1140      return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1123 1141  
1124 1142  
1125 1143    Unimplemented();
1126 1144  }
1127 1145  
1128 1146  #ifndef PRODUCT
1129 1147  void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1130 1148    implementation( NULL, ra_, false, st );
1131 1149  }
1132 1150  #endif
1133 1151  
1134 1152  void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1135 1153    implementation( &cbuf, ra_, false, NULL );
1136 1154  }
1137 1155  
1138 1156  uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1139 1157    return implementation( NULL, ra_, true, NULL );
1140 1158  }
1141 1159  
1142 1160  //=============================================================================
1143 1161  #ifndef PRODUCT
1144 1162  void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
1145 1163    st->print("NOP \t# %d bytes pad for loops and calls", _count);
1146 1164  }
1147 1165  #endif
1148 1166  
1149 1167  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
1150 1168    MacroAssembler _masm(&cbuf);
1151 1169    __ nop(_count);
1152 1170  }
1153 1171  
1154 1172  uint MachNopNode::size(PhaseRegAlloc *) const {
1155 1173    return _count;
1156 1174  }
1157 1175  
1158 1176  
1159 1177  //=============================================================================
1160 1178  #ifndef PRODUCT
1161 1179  void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1162 1180    int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1163 1181    int reg = ra_->get_reg_first(this);
1164 1182    st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1165 1183  }
1166 1184  #endif
1167 1185  
1168 1186  void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1169 1187    int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1170 1188    int reg = ra_->get_encode(this);
1171 1189    if( offset >= 128 ) {
1172 1190      emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1173 1191      emit_rm(cbuf, 0x2, reg, 0x04);
1174 1192      emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1175 1193      emit_d32(cbuf, offset);
1176 1194    }
1177 1195    else {
1178 1196      emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1179 1197      emit_rm(cbuf, 0x1, reg, 0x04);
1180 1198      emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1181 1199      emit_d8(cbuf, offset);
1182 1200    }
1183 1201  }
1184 1202  
1185 1203  uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1186 1204    int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1187 1205    if( offset >= 128 ) {
1188 1206      return 7;
1189 1207    }
1190 1208    else {
1191 1209      return 4;
1192 1210    }
1193 1211  }
1194 1212  
1195 1213  //=============================================================================
1196 1214  
1197 1215  // emit call stub, compiled java to interpreter
1198 1216  void emit_java_to_interp(CodeBuffer &cbuf ) {
1199 1217    // Stub is fixed up when the corresponding call is converted from calling
1200 1218    // compiled code to calling interpreted code.
1201 1219    // mov rbx,0
1202 1220    // jmp -1
1203 1221  
1204 1222    address mark = cbuf.insts_mark();  // get mark within main instrs section
1205 1223  
1206 1224    // Note that the code buffer's insts_mark is always relative to insts.
1207 1225    // That's why we must use the macroassembler to generate a stub.
1208 1226    MacroAssembler _masm(&cbuf);
1209 1227  
1210 1228    address base =
1211 1229    __ start_a_stub(Compile::MAX_stubs_size);
1212 1230    if (base == NULL)  return;  // CodeBuffer::expand failed
1213 1231    // static stub relocation stores the instruction address of the call
1214 1232    __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1215 1233    // static stub relocation also tags the methodOop in the code-stream.
1216 1234    __ movoop(rbx, (jobject)NULL);  // method is zapped till fixup time
1217 1235    // This is recognized as unresolved by relocs/nativeInst/ic code
1218 1236    __ jump(RuntimeAddress(__ pc()));
1219 1237  
1220 1238    __ end_a_stub();
1221 1239    // Update current stubs pointer and restore insts_end.
1222 1240  }
1223 1241  // size of call stub, compiled java to interpretor
1224 1242  uint size_java_to_interp() {
1225 1243    return 10;  // movl; jmp
1226 1244  }
1227 1245  // relocation entries for call stub, compiled java to interpretor
1228 1246  uint reloc_java_to_interp() {
1229 1247    return 4;  // 3 in emit_java_to_interp + 1 in Java_Static_Call
1230 1248  }
1231 1249  
1232 1250  //=============================================================================
1233 1251  #ifndef PRODUCT
1234 1252  void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1235 1253    st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1236 1254    st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1237 1255    st->print_cr("\tNOP");
1238 1256    st->print_cr("\tNOP");
1239 1257    if( !OptoBreakpoint )
1240 1258      st->print_cr("\tNOP");
1241 1259  }
1242 1260  #endif
1243 1261  
1244 1262  void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1245 1263    MacroAssembler masm(&cbuf);
1246 1264  #ifdef ASSERT
1247 1265    uint insts_size = cbuf.insts_size();
1248 1266  #endif
1249 1267    masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1250 1268    masm.jump_cc(Assembler::notEqual,
1251 1269                 RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1252 1270    /* WARNING these NOPs are critical so that verified entry point is properly
1253 1271       aligned for patching by NativeJump::patch_verified_entry() */
1254 1272    int nops_cnt = 2;
1255 1273    if( !OptoBreakpoint ) // Leave space for int3
1256 1274       nops_cnt += 1;
1257 1275    masm.nop(nops_cnt);
1258 1276  
1259 1277    assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1260 1278  }
1261 1279  
1262 1280  uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1263 1281    return OptoBreakpoint ? 11 : 12;
1264 1282  }
1265 1283  
1266 1284  
1267 1285  //=============================================================================
1268 1286  uint size_exception_handler() {
1269 1287    // NativeCall instruction size is the same as NativeJump.
1270 1288    // exception handler starts out as jump and can be patched to
1271 1289    // a call be deoptimization.  (4932387)
1272 1290    // Note that this value is also credited (in output.cpp) to
1273 1291    // the size of the code section.
1274 1292    return NativeJump::instruction_size;
1275 1293  }
1276 1294  
1277 1295  // Emit exception handler code.  Stuff framesize into a register
1278 1296  // and call a VM stub routine.
1279 1297  int emit_exception_handler(CodeBuffer& cbuf) {
1280 1298  
1281 1299    // Note that the code buffer's insts_mark is always relative to insts.
1282 1300    // That's why we must use the macroassembler to generate a handler.
1283 1301    MacroAssembler _masm(&cbuf);
1284 1302    address base =
1285 1303    __ start_a_stub(size_exception_handler());
1286 1304    if (base == NULL)  return 0;  // CodeBuffer::expand failed
1287 1305    int offset = __ offset();
1288 1306    __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1289 1307    assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1290 1308    __ end_a_stub();
1291 1309    return offset;
1292 1310  }
1293 1311  
1294 1312  uint size_deopt_handler() {
1295 1313    // NativeCall instruction size is the same as NativeJump.
1296 1314    // exception handler starts out as jump and can be patched to
1297 1315    // a call be deoptimization.  (4932387)
1298 1316    // Note that this value is also credited (in output.cpp) to
1299 1317    // the size of the code section.
1300 1318    return 5 + NativeJump::instruction_size; // pushl(); jmp;
1301 1319  }
1302 1320  
1303 1321  // Emit deopt handler code.
1304 1322  int emit_deopt_handler(CodeBuffer& cbuf) {
1305 1323  
1306 1324    // Note that the code buffer's insts_mark is always relative to insts.
1307 1325    // That's why we must use the macroassembler to generate a handler.
1308 1326    MacroAssembler _masm(&cbuf);
1309 1327    address base =
1310 1328    __ start_a_stub(size_exception_handler());
1311 1329    if (base == NULL)  return 0;  // CodeBuffer::expand failed
1312 1330    int offset = __ offset();

↓ open down ↓

793 lines elided

↑ open up ↑

1313 1331    InternalAddress here(__ pc());
1314 1332    __ pushptr(here.addr());
1315 1333  
1316 1334    __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1317 1335    assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1318 1336    __ end_a_stub();
1319 1337    return offset;
1320 1338  }
1321 1339  
1322 1340  
1323      -static void emit_double_constant(CodeBuffer& cbuf, double x) {
1324      -  int mark = cbuf.insts()->mark_off();
1325      -  MacroAssembler _masm(&cbuf);
1326      -  address double_address = __ double_constant(x);
1327      -  cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1328      -  emit_d32_reloc(cbuf,
1329      -                 (int)double_address,
1330      -                 internal_word_Relocation::spec(double_address),
1331      -                 RELOC_DISP32);
1332      -}
1333      -
1334      -static void emit_float_constant(CodeBuffer& cbuf, float x) {
1335      -  int mark = cbuf.insts()->mark_off();
1336      -  MacroAssembler _masm(&cbuf);
1337      -  address float_address = __ float_constant(x);
1338      -  cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1339      -  emit_d32_reloc(cbuf,
1340      -                 (int)float_address,
1341      -                 internal_word_Relocation::spec(float_address),
1342      -                 RELOC_DISP32);
1343      -}
1344      -
1345      -
1346 1341  const bool Matcher::match_rule_supported(int opcode) {
1347 1342    if (!has_match_rule(opcode))
1348 1343      return false;
1349 1344  
1350 1345    return true;  // Per default match rules are supported.
1351 1346  }
1352 1347  
1353 1348  int Matcher::regnum_to_fpu_offset(int regnum) {
1354 1349    return regnum - 32; // The FP registers are in the second chunk
1355 1350  }
1356 1351  
1357      -bool is_positive_zero_float(jfloat f) {
1358      -  return jint_cast(f) == jint_cast(0.0F);
1359      -}
1360      -
1361      -bool is_positive_one_float(jfloat f) {
1362      -  return jint_cast(f) == jint_cast(1.0F);
1363      -}
1364      -
1365      -bool is_positive_zero_double(jdouble d) {
1366      -  return jlong_cast(d) == jlong_cast(0.0);
1367      -}
1368      -
1369      -bool is_positive_one_double(jdouble d) {
1370      -  return jlong_cast(d) == jlong_cast(1.0);
1371      -}
1372      -
1373 1352  // This is UltraSparc specific, true just means we have fast l2f conversion
1374 1353  const bool Matcher::convL2FSupported(void) {
1375 1354    return true;
1376 1355  }
1377 1356  
1378 1357  // Vector width in bytes
1379 1358  const uint Matcher::vector_width_in_bytes(void) {
1380 1359    return UseSSE >= 2 ? 8 : 0;
1381 1360  }
1382 1361

1383 1362  // Vector ideal reg
1384 1363  const uint Matcher::vector_ideal_reg(void) {
1385 1364    return Op_RegD;
1386 1365  }
1387 1366  
1388 1367  // Is this branch offset short enough that a short branch can be used?
1389 1368  //
1390 1369  // NOTE: If the platform does not provide any short branch variants, then
1391 1370  //       this method should return false for offset 0.
1392 1371  bool Matcher::is_short_branch_offset(int rule, int offset) {
1393 1372    // the short version of jmpConUCF2 contains multiple branches,
1394 1373    // making the reach slightly less
1395 1374    if (rule == jmpConUCF2_rule)
1396 1375      return (-126 <= offset && offset <= 125);
1397 1376    return (-128 <= offset && offset <= 127);
1398 1377  }
1399 1378  
1400 1379  const bool Matcher::isSimpleConstant64(jlong value) {
1401 1380    // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1402 1381    return false;
1403 1382  }
1404 1383  
1405 1384  // The ecx parameter to rep stos for the ClearArray node is in dwords.
1406 1385  const bool Matcher::init_array_count_is_in_bytes = false;
1407 1386  
1408 1387  // Threshold size for cleararray.
1409 1388  const int Matcher::init_array_short_size = 8 * BytesPerLong;
1410 1389  
1411 1390  // Should the Matcher clone shifts on addressing modes, expecting them to
1412 1391  // be subsumed into complex addressing expressions or compute them into
1413 1392  // registers?  True for Intel but false for most RISCs
1414 1393  const bool Matcher::clone_shift_expressions = true;
1415 1394  
1416 1395  bool Matcher::narrow_oop_use_complex_address() {
1417 1396    ShouldNotCallThis();
1418 1397    return true;
1419 1398  }
1420 1399  
1421 1400  
1422 1401  // Is it better to copy float constants, or load them directly from memory?
1423 1402  // Intel can load a float constant from a direct address, requiring no
1424 1403  // extra registers.  Most RISCs will have to materialize an address into a
1425 1404  // register first, so they would do better to copy the constant from stack.
1426 1405  const bool Matcher::rematerialize_float_constants = true;
1427 1406  
1428 1407  // If CPU can load and store mis-aligned doubles directly then no fixup is
1429 1408  // needed.  Else we split the double into 2 integer pieces and move it
1430 1409  // piece-by-piece.  Only happens when passing doubles into C code as the
1431 1410  // Java calling convention forces doubles to be aligned.
1432 1411  const bool Matcher::misaligned_doubles_ok = true;
1433 1412  
1434 1413  
1435 1414  void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1436 1415    // Get the memory operand from the node
1437 1416    uint numopnds = node->num_opnds();        // Virtual call for number of operands
1438 1417    uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1439 1418    assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1440 1419    uint opcnt     = 1;                 // First operand
1441 1420    uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1442 1421    while( idx >= skipped+num_edges ) {
1443 1422      skipped += num_edges;
1444 1423      opcnt++;                          // Bump operand count
1445 1424      assert( opcnt < numopnds, "Accessing non-existent operand" );
1446 1425      num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1447 1426    }
1448 1427  
1449 1428    MachOper *memory = node->_opnds[opcnt];
1450 1429    MachOper *new_memory = NULL;
1451 1430    switch (memory->opcode()) {
1452 1431    case DIRECT:
1453 1432    case INDOFFSET32X:
1454 1433      // No transformation necessary.
1455 1434      return;
1456 1435    case INDIRECT:
1457 1436      new_memory = new (C) indirect_win95_safeOper( );
1458 1437      break;
1459 1438    case INDOFFSET8:
1460 1439      new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1461 1440      break;
1462 1441    case INDOFFSET32:
1463 1442      new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1464 1443      break;
1465 1444    case INDINDEXOFFSET:
1466 1445      new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1467 1446      break;
1468 1447    case INDINDEXSCALE:
1469 1448      new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1470 1449      break;
1471 1450    case INDINDEXSCALEOFFSET:
1472 1451      new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1473 1452      break;
1474 1453    case LOAD_LONG_INDIRECT:
1475 1454    case LOAD_LONG_INDOFFSET32:
1476 1455      // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1477 1456      return;
1478 1457    default:
1479 1458      assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1480 1459      return;
1481 1460    }
1482 1461    node->_opnds[opcnt] = new_memory;
1483 1462  }
1484 1463  
1485 1464  // Advertise here if the CPU requires explicit rounding operations
1486 1465  // to implement the UseStrictFP mode.
1487 1466  const bool Matcher::strict_fp_requires_explicit_rounding = true;
1488 1467  
1489 1468  // Are floats conerted to double when stored to stack during deoptimization?
1490 1469  // On x32 it is stored with convertion only when FPU is used for floats.
1491 1470  bool Matcher::float_in_double() { return (UseSSE == 0); }
1492 1471  
1493 1472  // Do ints take an entire long register or just half?
1494 1473  const bool Matcher::int_in_long = false;
1495 1474  
1496 1475  // Return whether or not this register is ever used as an argument.  This
1497 1476  // function is used on startup to build the trampoline stubs in generateOptoStub.
1498 1477  // Registers not mentioned will be killed by the VM call in the trampoline, and
1499 1478  // arguments in those registers not be available to the callee.
1500 1479  bool Matcher::can_be_java_arg( int reg ) {
1501 1480    if(  reg == ECX_num   || reg == EDX_num   ) return true;
1502 1481    if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1503 1482    if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1504 1483    return false;
1505 1484  }
1506 1485  
1507 1486  bool Matcher::is_spillable_arg( int reg ) {
1508 1487    return can_be_java_arg(reg);
1509 1488  }
1510 1489  
1511 1490  bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1512 1491    // Use hardware integer DIV instruction when
1513 1492    // it is faster than a code which use multiply.
1514 1493    // Only when constant divisor fits into 32 bit
1515 1494    // (min_jint is excluded to get only correct
1516 1495    // positive 32 bit values from negative).
1517 1496    return VM_Version::has_fast_idiv() &&
1518 1497           (divisor == (int)divisor && divisor != min_jint);
1519 1498  }
1520 1499  
1521 1500  // Register for DIVI projection of divmodI
1522 1501  RegMask Matcher::divI_proj_mask() {
1523 1502    return EAX_REG_mask;
1524 1503  }
1525 1504  
1526 1505  // Register for MODI projection of divmodI
1527 1506  RegMask Matcher::modI_proj_mask() {
1528 1507    return EDX_REG_mask;
1529 1508  }
1530 1509  
1531 1510  // Register for DIVL projection of divmodL
1532 1511  RegMask Matcher::divL_proj_mask() {
1533 1512    ShouldNotReachHere();
1534 1513    return RegMask();
1535 1514  }
1536 1515  
1537 1516  // Register for MODL projection of divmodL
1538 1517  RegMask Matcher::modL_proj_mask() {
1539 1518    ShouldNotReachHere();
1540 1519    return RegMask();
1541 1520  }
1542 1521  
1543 1522  const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1544 1523    return EBP_REG_mask;
1545 1524  }
1546 1525  
1547 1526  // Returns true if the high 32 bits of the value is known to be zero.
1548 1527  bool is_operand_hi32_zero(Node* n) {
1549 1528    int opc = n->Opcode();
1550 1529    if (opc == Op_LoadUI2L) {
1551 1530      return true;
1552 1531    }
1553 1532    if (opc == Op_AndL) {
1554 1533      Node* o2 = n->in(2);
1555 1534      if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1556 1535        return true;
1557 1536      }
1558 1537    }
1559 1538    if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1560 1539      return true;
1561 1540    }
1562 1541    return false;
1563 1542  }
1564 1543  
1565 1544  %}
1566 1545  
1567 1546  //----------ENCODING BLOCK-----------------------------------------------------
1568 1547  // This block specifies the encoding classes used by the compiler to output
1569 1548  // byte streams.  Encoding classes generate functions which are called by
1570 1549  // Machine Instruction Nodes in order to generate the bit encoding of the
1571 1550  // instruction.  Operands specify their base encoding interface with the
1572 1551  // interface keyword.  There are currently supported four interfaces,
1573 1552  // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1574 1553  // operand to generate a function which returns its register number when
1575 1554  // queried.   CONST_INTER causes an operand to generate a function which
1576 1555  // returns the value of the constant when queried.  MEMORY_INTER causes an
1577 1556  // operand to generate four functions which return the Base Register, the
1578 1557  // Index Register, the Scale Value, and the Offset Value of the operand when
1579 1558  // queried.  COND_INTER causes an operand to generate six functions which
1580 1559  // return the encoding code (ie - encoding bits for the instruction)
1581 1560  // associated with each basic boolean condition for a conditional instruction.
1582 1561  // Instructions specify two basic values for encoding.  They use the
1583 1562  // ins_encode keyword to specify their encoding class (which must be one of
1584 1563  // the class names specified in the encoding block), and they use the
1585 1564  // opcode keyword to specify, in order, their primary, secondary, and
1586 1565  // tertiary opcode.  Only the opcode sections which a particular instruction
1587 1566  // needs for encoding need to be specified.
1588 1567  encode %{
1589 1568    // Build emit functions for each basic byte or larger field in the intel
1590 1569    // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1591 1570    // code in the enc_class source block.  Emit functions will live in the
1592 1571    // main source block for now.  In future, we can generalize this by
1593 1572    // adding a syntax that specifies the sizes of fields in an order,
1594 1573    // so that the adlc can build the emit functions automagically
1595 1574  
1596 1575    // Emit primary opcode
1597 1576    enc_class OpcP %{
1598 1577      emit_opcode(cbuf, $primary);
1599 1578    %}
1600 1579  
1601 1580    // Emit secondary opcode
1602 1581    enc_class OpcS %{
1603 1582      emit_opcode(cbuf, $secondary);
1604 1583    %}
1605 1584  
1606 1585    // Emit opcode directly
1607 1586    enc_class Opcode(immI d8) %{
1608 1587      emit_opcode(cbuf, $d8$$constant);
1609 1588    %}
1610 1589  
1611 1590    enc_class SizePrefix %{
1612 1591      emit_opcode(cbuf,0x66);
1613 1592    %}
1614 1593  
1615 1594    enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
1616 1595      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1617 1596    %}
1618 1597  
1619 1598    enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{    // OpcRegReg(Many)
1620 1599      emit_opcode(cbuf,$opcode$$constant);
1621 1600      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1622 1601    %}
1623 1602  
1624 1603    enc_class mov_r32_imm0( eRegI dst ) %{
1625 1604      emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1626 1605      emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1627 1606    %}
1628 1607  
1629 1608    enc_class cdq_enc %{
1630 1609      // Full implementation of Java idiv and irem; checks for
1631 1610      // special case as described in JVM spec., p.243 & p.271.
1632 1611      //
1633 1612      //         normal case                           special case
1634 1613      //
1635 1614      // input : rax,: dividend                         min_int
1636 1615      //         reg: divisor                          -1
1637 1616      //
1638 1617      // output: rax,: quotient  (= rax, idiv reg)       min_int
1639 1618      //         rdx: remainder (= rax, irem reg)       0
1640 1619      //
1641 1620      //  Code sequnce:
1642 1621      //
1643 1622      //  81 F8 00 00 00 80    cmp         rax,80000000h
1644 1623      //  0F 85 0B 00 00 00    jne         normal_case
1645 1624      //  33 D2                xor         rdx,edx
1646 1625      //  83 F9 FF             cmp         rcx,0FFh
1647 1626      //  0F 84 03 00 00 00    je          done
1648 1627      //                  normal_case:
1649 1628      //  99                   cdq
1650 1629      //  F7 F9                idiv        rax,ecx
1651 1630      //                  done:
1652 1631      //
1653 1632      emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1654 1633      emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1655 1634      emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1656 1635      emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1657 1636      emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1658 1637      emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1659 1638      emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1660 1639      emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1661 1640      emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1662 1641      emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1663 1642      emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1664 1643      // normal_case:
1665 1644      emit_opcode(cbuf,0x99);                                         // cdq
1666 1645      // idiv (note: must be emitted by the user of this rule)
1667 1646      // normal:
1668 1647    %}
1669 1648  
1670 1649    // Dense encoding for older common ops
1671 1650    enc_class Opc_plus(immI opcode, eRegI reg) %{
1672 1651      emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1673 1652    %}
1674 1653  
1675 1654  
1676 1655    // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1677 1656    enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1678 1657      // Check for 8-bit immediate, and set sign extend bit in opcode
1679 1658      if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1680 1659        emit_opcode(cbuf, $primary | 0x02);
1681 1660      }
1682 1661      else {                          // If 32-bit immediate
1683 1662        emit_opcode(cbuf, $primary);
1684 1663      }
1685 1664    %}
1686 1665  
1687 1666    enc_class OpcSErm (eRegI dst, immI imm) %{    // OpcSEr/m
1688 1667      // Emit primary opcode and set sign-extend bit
1689 1668      // Check for 8-bit immediate, and set sign extend bit in opcode
1690 1669      if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1691 1670        emit_opcode(cbuf, $primary | 0x02);    }
1692 1671      else {                          // If 32-bit immediate
1693 1672        emit_opcode(cbuf, $primary);
1694 1673      }
1695 1674      // Emit r/m byte with secondary opcode, after primary opcode.
1696 1675      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1697 1676    %}
1698 1677  
1699 1678    enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1700 1679      // Check for 8-bit immediate, and set sign extend bit in opcode
1701 1680      if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1702 1681        $$$emit8$imm$$constant;
1703 1682      }
1704 1683      else {                          // If 32-bit immediate
1705 1684        // Output immediate
1706 1685        $$$emit32$imm$$constant;
1707 1686      }
1708 1687    %}
1709 1688  
1710 1689    enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1711 1690      // Emit primary opcode and set sign-extend bit
1712 1691      // Check for 8-bit immediate, and set sign extend bit in opcode
1713 1692      int con = (int)$imm$$constant; // Throw away top bits
1714 1693      emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1715 1694      // Emit r/m byte with secondary opcode, after primary opcode.
1716 1695      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1717 1696      if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1718 1697      else                               emit_d32(cbuf,con);
1719 1698    %}
1720 1699  
1721 1700    enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1722 1701      // Emit primary opcode and set sign-extend bit
1723 1702      // Check for 8-bit immediate, and set sign extend bit in opcode
1724 1703      int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1725 1704      emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1726 1705      // Emit r/m byte with tertiary opcode, after primary opcode.
1727 1706      emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1728 1707      if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1729 1708      else                               emit_d32(cbuf,con);
1730 1709    %}
1731 1710  
1732 1711    enc_class Lbl (label labl) %{ // JMP, CALL
1733 1712      Label *l = $labl$$label;
1734 1713      emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
1735 1714    %}
1736 1715  
1737 1716    enc_class LblShort (label labl) %{ // JMP, CALL
1738 1717      Label *l = $labl$$label;
1739 1718      int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
1740 1719      assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1741 1720      emit_d8(cbuf, disp);
1742 1721    %}
1743 1722  
1744 1723    enc_class OpcSReg (eRegI dst) %{    // BSWAP
1745 1724      emit_cc(cbuf, $secondary, $dst$$reg );
1746 1725    %}
1747 1726  
1748 1727    enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1749 1728      int destlo = $dst$$reg;
1750 1729      int desthi = HIGH_FROM_LOW(destlo);
1751 1730      // bswap lo
1752 1731      emit_opcode(cbuf, 0x0F);
1753 1732      emit_cc(cbuf, 0xC8, destlo);
1754 1733      // bswap hi
1755 1734      emit_opcode(cbuf, 0x0F);
1756 1735      emit_cc(cbuf, 0xC8, desthi);
1757 1736      // xchg lo and hi
1758 1737      emit_opcode(cbuf, 0x87);
1759 1738      emit_rm(cbuf, 0x3, destlo, desthi);
1760 1739    %}
1761 1740  
1762 1741    enc_class RegOpc (eRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1763 1742      emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1764 1743    %}
1765 1744  
1766 1745    enc_class Jcc (cmpOp cop, label labl) %{    // JCC
1767 1746      Label *l = $labl$$label;
1768 1747      $$$emit8$primary;
1769 1748      emit_cc(cbuf, $secondary, $cop$$cmpcode);
1770 1749      emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
1771 1750    %}
1772 1751  
1773 1752    enc_class JccShort (cmpOp cop, label labl) %{    // JCC
1774 1753      Label *l = $labl$$label;
1775 1754      emit_cc(cbuf, $primary, $cop$$cmpcode);
1776 1755      int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
1777 1756      assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1778 1757      emit_d8(cbuf, disp);
1779 1758    %}
1780 1759  
1781 1760    enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1782 1761      $$$emit8$primary;
1783 1762      emit_cc(cbuf, $secondary, $cop$$cmpcode);
1784 1763    %}
1785 1764  
1786 1765    enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
1787 1766      int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1788 1767      emit_d8(cbuf, op >> 8 );
1789 1768      emit_d8(cbuf, op & 255);
1790 1769    %}
1791 1770  
1792 1771    // emulate a CMOV with a conditional branch around a MOV
1793 1772    enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1794 1773      // Invert sense of branch from sense of CMOV
1795 1774      emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1796 1775      emit_d8( cbuf, $brOffs$$constant );
1797 1776    %}
1798 1777  
1799 1778    enc_class enc_PartialSubtypeCheck( ) %{
1800 1779      Register Redi = as_Register(EDI_enc); // result register
1801 1780      Register Reax = as_Register(EAX_enc); // super class
1802 1781      Register Recx = as_Register(ECX_enc); // killed
1803 1782      Register Resi = as_Register(ESI_enc); // sub class
1804 1783      Label miss;
1805 1784  
1806 1785      MacroAssembler _masm(&cbuf);
1807 1786      __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1808 1787                                       NULL, &miss,
1809 1788                                       /*set_cond_codes:*/ true);
1810 1789      if ($primary) {
1811 1790        __ xorptr(Redi, Redi);
1812 1791      }
1813 1792      __ bind(miss);
1814 1793    %}
1815 1794  
1816 1795    enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1817 1796      MacroAssembler masm(&cbuf);
1818 1797      int start = masm.offset();
1819 1798      if (UseSSE >= 2) {
1820 1799        if (VerifyFPU) {
1821 1800          masm.verify_FPU(0, "must be empty in SSE2+ mode");
1822 1801        }
1823 1802      } else {
1824 1803        // External c_calling_convention expects the FPU stack to be 'clean'.
1825 1804        // Compiled code leaves it dirty.  Do cleanup now.
1826 1805        masm.empty_FPU_stack();
1827 1806      }
1828 1807      if (sizeof_FFree_Float_Stack_All == -1) {
1829 1808        sizeof_FFree_Float_Stack_All = masm.offset() - start;
1830 1809      } else {
1831 1810        assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1832 1811      }
1833 1812    %}
1834 1813  
1835 1814    enc_class Verify_FPU_For_Leaf %{
1836 1815      if( VerifyFPU ) {
1837 1816        MacroAssembler masm(&cbuf);
1838 1817        masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1839 1818      }
1840 1819    %}
1841 1820  
1842 1821    enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1843 1822      // This is the instruction starting address for relocation info.
1844 1823      cbuf.set_insts_mark();
1845 1824      $$$emit8$primary;
1846 1825      // CALL directly to the runtime
1847 1826      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1848 1827                  runtime_call_Relocation::spec(), RELOC_IMM32 );
1849 1828  
1850 1829      if (UseSSE >= 2) {
1851 1830        MacroAssembler _masm(&cbuf);
1852 1831        BasicType rt = tf()->return_type();
1853 1832  
1854 1833        if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1855 1834          // A C runtime call where the return value is unused.  In SSE2+
1856 1835          // mode the result needs to be removed from the FPU stack.  It's
1857 1836          // likely that this function call could be removed by the
1858 1837          // optimizer if the C function is a pure function.
1859 1838          __ ffree(0);
1860 1839        } else if (rt == T_FLOAT) {
1861 1840          __ lea(rsp, Address(rsp, -4));
1862 1841          __ fstp_s(Address(rsp, 0));
1863 1842          __ movflt(xmm0, Address(rsp, 0));
1864 1843          __ lea(rsp, Address(rsp,  4));
1865 1844        } else if (rt == T_DOUBLE) {
1866 1845          __ lea(rsp, Address(rsp, -8));
1867 1846          __ fstp_d(Address(rsp, 0));
1868 1847          __ movdbl(xmm0, Address(rsp, 0));
1869 1848          __ lea(rsp, Address(rsp,  8));
1870 1849        }
1871 1850      }
1872 1851    %}
1873 1852  
1874 1853  
1875 1854    enc_class pre_call_FPU %{
1876 1855      // If method sets FPU control word restore it here
1877 1856      debug_only(int off0 = cbuf.insts_size());
1878 1857      if( Compile::current()->in_24_bit_fp_mode() ) {
1879 1858        MacroAssembler masm(&cbuf);
1880 1859        masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1881 1860      }
1882 1861      debug_only(int off1 = cbuf.insts_size());
1883 1862      assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
1884 1863    %}
1885 1864  
1886 1865    enc_class post_call_FPU %{
1887 1866      // If method sets FPU control word do it here also
1888 1867      if( Compile::current()->in_24_bit_fp_mode() ) {
1889 1868        MacroAssembler masm(&cbuf);
1890 1869        masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1891 1870      }
1892 1871    %}
1893 1872  
1894 1873    enc_class preserve_SP %{
1895 1874      debug_only(int off0 = cbuf.insts_size());
1896 1875      MacroAssembler _masm(&cbuf);
1897 1876      // RBP is preserved across all calls, even compiled calls.
1898 1877      // Use it to preserve RSP in places where the callee might change the SP.
1899 1878      __ movptr(rbp_mh_SP_save, rsp);
1900 1879      debug_only(int off1 = cbuf.insts_size());
1901 1880      assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
1902 1881    %}
1903 1882  
1904 1883    enc_class restore_SP %{
1905 1884      MacroAssembler _masm(&cbuf);
1906 1885      __ movptr(rsp, rbp_mh_SP_save);
1907 1886    %}
1908 1887  
1909 1888    enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1910 1889      // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1911 1890      // who we intended to call.
1912 1891      cbuf.set_insts_mark();
1913 1892      $$$emit8$primary;
1914 1893      if ( !_method ) {
1915 1894        emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916 1895                       runtime_call_Relocation::spec(), RELOC_IMM32 );
1917 1896      } else if(_optimized_virtual) {
1918 1897        emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1919 1898                       opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1920 1899      } else {
1921 1900        emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1922 1901                       static_call_Relocation::spec(), RELOC_IMM32 );
1923 1902      }
1924 1903      if( _method ) {  // Emit stub for static call
1925 1904        emit_java_to_interp(cbuf);
1926 1905      }
1927 1906    %}
1928 1907  
1929 1908    enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1930 1909      // !!!!!
1931 1910      // Generate  "Mov EAX,0x00", placeholder instruction to load oop-info
1932 1911      // emit_call_dynamic_prologue( cbuf );
1933 1912      cbuf.set_insts_mark();
1934 1913      emit_opcode(cbuf, 0xB8 + EAX_enc);        // mov    EAX,-1
1935 1914      emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1936 1915      address  virtual_call_oop_addr = cbuf.insts_mark();
1937 1916      // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1938 1917      // who we intended to call.
1939 1918      cbuf.set_insts_mark();
1940 1919      $$$emit8$primary;
1941 1920      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1942 1921                  virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1943 1922    %}
1944 1923  
1945 1924    enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1946 1925      int disp = in_bytes(methodOopDesc::from_compiled_offset());
1947 1926      assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1948 1927  
1949 1928      // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1950 1929      cbuf.set_insts_mark();
1951 1930      $$$emit8$primary;
1952 1931      emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1953 1932      emit_d8(cbuf, disp);             // Displacement
1954 1933  
1955 1934    %}
1956 1935  
1957 1936    enc_class Xor_Reg (eRegI dst) %{
1958 1937      emit_opcode(cbuf, 0x33);
1959 1938      emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1960 1939    %}
1961 1940  
1962 1941  //   Following encoding is no longer used, but may be restored if calling
1963 1942  //   convention changes significantly.
1964 1943  //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1965 1944  //
1966 1945  //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1967 1946  //     // int ic_reg     = Matcher::inline_cache_reg();
1968 1947  //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1969 1948  //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1970 1949  //     // int imo_encode = Matcher::_regEncode[imo_reg];
1971 1950  //
1972 1951  //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1973 1952  //     // // so we load it immediately before the call
1974 1953  //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1975 1954  //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1976 1955  //
1977 1956  //     // xor rbp,ebp
1978 1957  //     emit_opcode(cbuf, 0x33);
1979 1958  //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1980 1959  //
1981 1960  //     // CALL to interpreter.
1982 1961  //     cbuf.set_insts_mark();
1983 1962  //     $$$emit8$primary;
1984 1963  //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1985 1964  //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1986 1965  //   %}
1987 1966  
1988 1967    enc_class RegOpcImm (eRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1989 1968      $$$emit8$primary;
1990 1969      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1991 1970      $$$emit8$shift$$constant;
1992 1971    %}
1993 1972  
1994 1973    enc_class LdImmI (eRegI dst, immI src) %{    // Load Immediate
1995 1974      // Load immediate does not have a zero or sign extended version
1996 1975      // for 8-bit immediates
1997 1976      emit_opcode(cbuf, 0xB8 + $dst$$reg);
1998 1977      $$$emit32$src$$constant;
1999 1978    %}
2000 1979  
2001 1980    enc_class LdImmP (eRegI dst, immI src) %{    // Load Immediate
2002 1981      // Load immediate does not have a zero or sign extended version
2003 1982      // for 8-bit immediates
2004 1983      emit_opcode(cbuf, $primary + $dst$$reg);
2005 1984      $$$emit32$src$$constant;
2006 1985    %}
2007 1986  
2008 1987    enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
2009 1988      // Load immediate does not have a zero or sign extended version
2010 1989      // for 8-bit immediates
2011 1990      int dst_enc = $dst$$reg;
2012 1991      int src_con = $src$$constant & 0x0FFFFFFFFL;
2013 1992      if (src_con == 0) {
2014 1993        // xor dst, dst
2015 1994        emit_opcode(cbuf, 0x33);
2016 1995        emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2017 1996      } else {
2018 1997        emit_opcode(cbuf, $primary + dst_enc);
2019 1998        emit_d32(cbuf, src_con);
2020 1999      }
2021 2000    %}
2022 2001  
2023 2002    enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2024 2003      // Load immediate does not have a zero or sign extended version
2025 2004      // for 8-bit immediates
2026 2005      int dst_enc = $dst$$reg + 2;
2027 2006      int src_con = ((julong)($src$$constant)) >> 32;
2028 2007      if (src_con == 0) {

↓ open down ↓

646 lines elided

↑ open up ↑

2029 2008        // xor dst, dst
2030 2009        emit_opcode(cbuf, 0x33);
2031 2010        emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2032 2011      } else {
2033 2012        emit_opcode(cbuf, $primary + dst_enc);
2034 2013        emit_d32(cbuf, src_con);
2035 2014      }
2036 2015    %}
2037 2016  
2038 2017  
2039      -  enc_class LdImmD (immD src) %{    // Load Immediate
2040      -    if( is_positive_zero_double($src$$constant)) {
2041      -      // FLDZ
2042      -      emit_opcode(cbuf,0xD9);
2043      -      emit_opcode(cbuf,0xEE);
2044      -    } else if( is_positive_one_double($src$$constant)) {
2045      -      // FLD1
2046      -      emit_opcode(cbuf,0xD9);
2047      -      emit_opcode(cbuf,0xE8);
2048      -    } else {
2049      -      emit_opcode(cbuf,0xDD);
2050      -      emit_rm(cbuf, 0x0, 0x0, 0x5);
2051      -      emit_double_constant(cbuf, $src$$constant);
2052      -    }
2053      -  %}
2054      -
2055      -
2056      -  enc_class LdImmF (immF src) %{    // Load Immediate
2057      -    if( is_positive_zero_float($src$$constant)) {
2058      -      emit_opcode(cbuf,0xD9);
2059      -      emit_opcode(cbuf,0xEE);
2060      -    } else if( is_positive_one_float($src$$constant)) {
2061      -      emit_opcode(cbuf,0xD9);
2062      -      emit_opcode(cbuf,0xE8);
2063      -    } else {
2064      -      $$$emit8$primary;
2065      -      // Load immediate does not have a zero or sign extended version
2066      -      // for 8-bit immediates
2067      -      // First load to TOS, then move to dst
2068      -      emit_rm(cbuf, 0x0, 0x0, 0x5);
2069      -      emit_float_constant(cbuf, $src$$constant);
2070      -    }
2071      -  %}
2072      -
2073      -  enc_class LdImmX (regX dst, immXF con) %{    // Load Immediate
2074      -    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2075      -    emit_float_constant(cbuf, $con$$constant);
2076      -  %}
2077      -
2078      -  enc_class LdImmXD (regXD dst, immXD con) %{    // Load Immediate
2079      -    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2080      -    emit_double_constant(cbuf, $con$$constant);
2081      -  %}
2082      -
2083      -  enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant
2084      -    // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2085      -    emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2086      -    emit_opcode(cbuf, 0x0F);
2087      -    emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2088      -    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2089      -    emit_double_constant(cbuf, $con$$constant);
2090      -  %}
2091      -
2092      -  enc_class Opc_MemImm_F(immF src) %{
2093      -    cbuf.set_insts_mark();
2094      -    $$$emit8$primary;
2095      -    emit_rm(cbuf, 0x0, $secondary, 0x5);
2096      -    emit_float_constant(cbuf, $src$$constant);
2097      -  %}
2098      -
2099      -
2100 2018    enc_class MovI2X_reg(regX dst, eRegI src) %{
2101 2019      emit_opcode(cbuf, 0x66 );     // MOVD dst,src
2102 2020      emit_opcode(cbuf, 0x0F );
2103 2021      emit_opcode(cbuf, 0x6E );
2104 2022      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2105 2023    %}
2106 2024  
2107 2025    enc_class MovX2I_reg(eRegI dst, regX src) %{
2108 2026      emit_opcode(cbuf, 0x66 );     // MOVD dst,src
2109 2027      emit_opcode(cbuf, 0x0F );

2110 2028      emit_opcode(cbuf, 0x7E );
2111 2029      emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2112 2030    %}
2113 2031  
2114 2032    enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
2115 2033      { // MOVD $dst,$src.lo
2116 2034        emit_opcode(cbuf,0x66);
2117 2035        emit_opcode(cbuf,0x0F);
2118 2036        emit_opcode(cbuf,0x6E);
2119 2037        emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2120 2038      }
2121 2039      { // MOVD $tmp,$src.hi
2122 2040        emit_opcode(cbuf,0x66);
2123 2041        emit_opcode(cbuf,0x0F);
2124 2042        emit_opcode(cbuf,0x6E);
2125 2043        emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2126 2044      }
2127 2045      { // PUNPCKLDQ $dst,$tmp
2128 2046        emit_opcode(cbuf,0x66);
2129 2047        emit_opcode(cbuf,0x0F);
2130 2048        emit_opcode(cbuf,0x62);
2131 2049        emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2132 2050       }
2133 2051    %}
2134 2052  
2135 2053    enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2136 2054      { // MOVD $dst.lo,$src
2137 2055        emit_opcode(cbuf,0x66);
2138 2056        emit_opcode(cbuf,0x0F);
2139 2057        emit_opcode(cbuf,0x7E);
2140 2058        emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2141 2059      }
2142 2060      { // PSHUFLW $tmp,$src,0x4E  (01001110b)
2143 2061        emit_opcode(cbuf,0xF2);
2144 2062        emit_opcode(cbuf,0x0F);
2145 2063        emit_opcode(cbuf,0x70);
2146 2064        emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2147 2065        emit_d8(cbuf, 0x4E);
2148 2066      }
2149 2067      { // MOVD $dst.hi,$tmp
2150 2068        emit_opcode(cbuf,0x66);
2151 2069        emit_opcode(cbuf,0x0F);
2152 2070        emit_opcode(cbuf,0x7E);
2153 2071        emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2154 2072      }
2155 2073    %}
2156 2074  
2157 2075  
2158 2076    // Encode a reg-reg copy.  If it is useless, then empty encoding.
2159 2077    enc_class enc_Copy( eRegI dst, eRegI src ) %{
2160 2078      encode_Copy( cbuf, $dst$$reg, $src$$reg );
2161 2079    %}
2162 2080  
2163 2081    enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2164 2082      encode_Copy( cbuf, $dst$$reg, $src$$reg );
2165 2083    %}
2166 2084  
2167 2085    // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2168 2086    enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2169 2087      encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2170 2088    %}
2171 2089  
2172 2090    enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
2173 2091      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2174 2092    %}
2175 2093  
2176 2094    enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2177 2095      $$$emit8$primary;
2178 2096      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2179 2097    %}
2180 2098  
2181 2099    enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2182 2100      $$$emit8$secondary;
2183 2101      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2184 2102    %}
2185 2103  
2186 2104    enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2187 2105      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2188 2106    %}
2189 2107  
2190 2108    enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2191 2109      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2192 2110    %}
2193 2111  
2194 2112    enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2195 2113      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2196 2114    %}
2197 2115  
2198 2116    enc_class Con32 (immI src) %{    // Con32(storeImmI)
2199 2117      // Output immediate
2200 2118      $$$emit32$src$$constant;
2201 2119    %}
2202 2120  
2203 2121    enc_class Con32F_as_bits(immF src) %{        // storeF_imm
2204 2122      // Output Float immediate bits
2205 2123      jfloat jf = $src$$constant;
2206 2124      int    jf_as_bits = jint_cast( jf );
2207 2125      emit_d32(cbuf, jf_as_bits);
2208 2126    %}
2209 2127  
2210 2128    enc_class Con32XF_as_bits(immXF src) %{      // storeX_imm
2211 2129      // Output Float immediate bits
2212 2130      jfloat jf = $src$$constant;
2213 2131      int    jf_as_bits = jint_cast( jf );
2214 2132      emit_d32(cbuf, jf_as_bits);
2215 2133    %}
2216 2134  
2217 2135    enc_class Con16 (immI src) %{    // Con16(storeImmI)
2218 2136      // Output immediate
2219 2137      $$$emit16$src$$constant;
2220 2138    %}
2221 2139  
2222 2140    enc_class Con_d32(immI src) %{
2223 2141      emit_d32(cbuf,$src$$constant);
2224 2142    %}
2225 2143  
2226 2144    enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2227 2145      // Output immediate memory reference
2228 2146      emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2229 2147      emit_d32(cbuf, 0x00);
2230 2148    %}
2231 2149  
2232 2150    enc_class lock_prefix( ) %{
2233 2151      if( os::is_MP() )
2234 2152        emit_opcode(cbuf,0xF0);         // [Lock]
2235 2153    %}
2236 2154  
2237 2155    // Cmp-xchg long value.
2238 2156    // Note: we need to swap rbx, and rcx before and after the
2239 2157    //       cmpxchg8 instruction because the instruction uses
2240 2158    //       rcx as the high order word of the new value to store but
2241 2159    //       our register encoding uses rbx,.
2242 2160    enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2243 2161  
2244 2162      // XCHG  rbx,ecx
2245 2163      emit_opcode(cbuf,0x87);
2246 2164      emit_opcode(cbuf,0xD9);
2247 2165      // [Lock]
2248 2166      if( os::is_MP() )
2249 2167        emit_opcode(cbuf,0xF0);
2250 2168      // CMPXCHG8 [Eptr]
2251 2169      emit_opcode(cbuf,0x0F);
2252 2170      emit_opcode(cbuf,0xC7);
2253 2171      emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2254 2172      // XCHG  rbx,ecx
2255 2173      emit_opcode(cbuf,0x87);
2256 2174      emit_opcode(cbuf,0xD9);
2257 2175    %}
2258 2176  
2259 2177    enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2260 2178      // [Lock]
2261 2179      if( os::is_MP() )
2262 2180        emit_opcode(cbuf,0xF0);
2263 2181  
2264 2182      // CMPXCHG [Eptr]
2265 2183      emit_opcode(cbuf,0x0F);
2266 2184      emit_opcode(cbuf,0xB1);
2267 2185      emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2268 2186    %}
2269 2187  
2270 2188    enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2271 2189      int res_encoding = $res$$reg;
2272 2190  
2273 2191      // MOV  res,0
2274 2192      emit_opcode( cbuf, 0xB8 + res_encoding);
2275 2193      emit_d32( cbuf, 0 );
2276 2194      // JNE,s  fail
2277 2195      emit_opcode(cbuf,0x75);
2278 2196      emit_d8(cbuf, 5 );
2279 2197      // MOV  res,1
2280 2198      emit_opcode( cbuf, 0xB8 + res_encoding);
2281 2199      emit_d32( cbuf, 1 );
2282 2200      // fail:
2283 2201    %}
2284 2202  
2285 2203    enc_class set_instruction_start( ) %{
2286 2204      cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2287 2205    %}
2288 2206  
2289 2207    enc_class RegMem (eRegI ereg, memory mem) %{    // emit_reg_mem
2290 2208      int reg_encoding = $ereg$$reg;
2291 2209      int base  = $mem$$base;
2292 2210      int index = $mem$$index;
2293 2211      int scale = $mem$$scale;
2294 2212      int displace = $mem$$disp;
2295 2213      bool disp_is_oop = $mem->disp_is_oop();
2296 2214      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2297 2215    %}
2298 2216  
2299 2217    enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2300 2218      int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2301 2219      int base  = $mem$$base;
2302 2220      int index = $mem$$index;
2303 2221      int scale = $mem$$scale;
2304 2222      int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2305 2223      assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2306 2224      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2307 2225    %}
2308 2226  
2309 2227    enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2310 2228      int r1, r2;
2311 2229      if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2312 2230      else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2313 2231      emit_opcode(cbuf,0x0F);
2314 2232      emit_opcode(cbuf,$tertiary);
2315 2233      emit_rm(cbuf, 0x3, r1, r2);
2316 2234      emit_d8(cbuf,$cnt$$constant);
2317 2235      emit_d8(cbuf,$primary);
2318 2236      emit_rm(cbuf, 0x3, $secondary, r1);
2319 2237      emit_d8(cbuf,$cnt$$constant);
2320 2238    %}
2321 2239  
2322 2240    enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2323 2241      emit_opcode( cbuf, 0x8B ); // Move
2324 2242      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2325 2243      if( $cnt$$constant > 32 ) { // Shift, if not by zero
2326 2244        emit_d8(cbuf,$primary);
2327 2245        emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2328 2246        emit_d8(cbuf,$cnt$$constant-32);
2329 2247      }
2330 2248      emit_d8(cbuf,$primary);
2331 2249      emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2332 2250      emit_d8(cbuf,31);
2333 2251    %}
2334 2252  
2335 2253    enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2336 2254      int r1, r2;
2337 2255      if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2338 2256      else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2339 2257  
2340 2258      emit_opcode( cbuf, 0x8B ); // Move r1,r2
2341 2259      emit_rm(cbuf, 0x3, r1, r2);
2342 2260      if( $cnt$$constant > 32 ) { // Shift, if not by zero
2343 2261        emit_opcode(cbuf,$primary);
2344 2262        emit_rm(cbuf, 0x3, $secondary, r1);
2345 2263        emit_d8(cbuf,$cnt$$constant-32);
2346 2264      }
2347 2265      emit_opcode(cbuf,0x33);  // XOR r2,r2
2348 2266      emit_rm(cbuf, 0x3, r2, r2);
2349 2267    %}
2350 2268  
2351 2269    // Clone of RegMem but accepts an extra parameter to access each
2352 2270    // half of a double in memory; it never needs relocation info.
2353 2271    enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2354 2272      emit_opcode(cbuf,$opcode$$constant);
2355 2273      int reg_encoding = $rm_reg$$reg;
2356 2274      int base     = $mem$$base;
2357 2275      int index    = $mem$$index;
2358 2276      int scale    = $mem$$scale;
2359 2277      int displace = $mem$$disp + $disp_for_half$$constant;
2360 2278      bool disp_is_oop = false;
2361 2279      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2362 2280    %}
2363 2281  
2364 2282    // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2365 2283    //
2366 2284    // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2367 2285    // and it never needs relocation information.
2368 2286    // Frequently used to move data between FPU's Stack Top and memory.
2369 2287    enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2370 2288      int rm_byte_opcode = $rm_opcode$$constant;
2371 2289      int base     = $mem$$base;
2372 2290      int index    = $mem$$index;
2373 2291      int scale    = $mem$$scale;
2374 2292      int displace = $mem$$disp;
2375 2293      assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2376 2294      encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2377 2295    %}
2378 2296  
2379 2297    enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2380 2298      int rm_byte_opcode = $rm_opcode$$constant;
2381 2299      int base     = $mem$$base;
2382 2300      int index    = $mem$$index;
2383 2301      int scale    = $mem$$scale;
2384 2302      int displace = $mem$$disp;
2385 2303      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2386 2304      encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2387 2305    %}
2388 2306  
2389 2307    enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{    // emit_reg_lea
2390 2308      int reg_encoding = $dst$$reg;
2391 2309      int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2392 2310      int index        = 0x04;            // 0x04 indicates no index
2393 2311      int scale        = 0x00;            // 0x00 indicates no scale
2394 2312      int displace     = $src1$$constant; // 0x00 indicates no displacement
2395 2313      bool disp_is_oop = false;
2396 2314      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2397 2315    %}
2398 2316  
2399 2317    enc_class min_enc (eRegI dst, eRegI src) %{    // MIN
2400 2318      // Compare dst,src
2401 2319      emit_opcode(cbuf,0x3B);
2402 2320      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2403 2321      // jmp dst < src around move
2404 2322      emit_opcode(cbuf,0x7C);
2405 2323      emit_d8(cbuf,2);
2406 2324      // move dst,src
2407 2325      emit_opcode(cbuf,0x8B);
2408 2326      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2409 2327    %}
2410 2328  
2411 2329    enc_class max_enc (eRegI dst, eRegI src) %{    // MAX
2412 2330      // Compare dst,src
2413 2331      emit_opcode(cbuf,0x3B);
2414 2332      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2415 2333      // jmp dst > src around move
2416 2334      emit_opcode(cbuf,0x7F);
2417 2335      emit_d8(cbuf,2);
2418 2336      // move dst,src
2419 2337      emit_opcode(cbuf,0x8B);
2420 2338      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2421 2339    %}
2422 2340  
2423 2341    enc_class enc_FP_store(memory mem, regD src) %{
2424 2342      // If src is FPR1, we can just FST to store it.
2425 2343      // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2426 2344      int reg_encoding = 0x2; // Just store
2427 2345      int base  = $mem$$base;
2428 2346      int index = $mem$$index;
2429 2347      int scale = $mem$$scale;
2430 2348      int displace = $mem$$disp;
2431 2349      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2432 2350      if( $src$$reg != FPR1L_enc ) {
2433 2351        reg_encoding = 0x3;  // Store & pop
2434 2352        emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2435 2353        emit_d8( cbuf, 0xC0-1+$src$$reg );
2436 2354      }
2437 2355      cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2438 2356      emit_opcode(cbuf,$primary);
2439 2357      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2440 2358    %}
2441 2359  
2442 2360    enc_class neg_reg(eRegI dst) %{
2443 2361      // NEG $dst
2444 2362      emit_opcode(cbuf,0xF7);
2445 2363      emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2446 2364    %}
2447 2365  
2448 2366    enc_class setLT_reg(eCXRegI dst) %{
2449 2367      // SETLT $dst
2450 2368      emit_opcode(cbuf,0x0F);
2451 2369      emit_opcode(cbuf,0x9C);
2452 2370      emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2453 2371    %}
2454 2372  
2455 2373    enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2456 2374      int tmpReg = $tmp$$reg;
2457 2375  
2458 2376      // SUB $p,$q
2459 2377      emit_opcode(cbuf,0x2B);
2460 2378      emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2461 2379      // SBB $tmp,$tmp
2462 2380      emit_opcode(cbuf,0x1B);
2463 2381      emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2464 2382      // AND $tmp,$y
2465 2383      emit_opcode(cbuf,0x23);
2466 2384      emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2467 2385      // ADD $p,$tmp
2468 2386      emit_opcode(cbuf,0x03);
2469 2387      emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2470 2388    %}
2471 2389  
2472 2390    enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
2473 2391      int tmpReg = $tmp$$reg;
2474 2392  
2475 2393      // SUB $p,$q
2476 2394      emit_opcode(cbuf,0x2B);
2477 2395      emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2478 2396      // SBB $tmp,$tmp
2479 2397      emit_opcode(cbuf,0x1B);
2480 2398      emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2481 2399      // AND $tmp,$y
2482 2400      cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2483 2401      emit_opcode(cbuf,0x23);
2484 2402      int reg_encoding = tmpReg;
2485 2403      int base  = $mem$$base;
2486 2404      int index = $mem$$index;
2487 2405      int scale = $mem$$scale;
2488 2406      int displace = $mem$$disp;
2489 2407      bool disp_is_oop = $mem->disp_is_oop();
2490 2408      encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2491 2409      // ADD $p,$tmp
2492 2410      emit_opcode(cbuf,0x03);
2493 2411      emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2494 2412    %}
2495 2413  
2496 2414    enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2497 2415      // TEST shift,32
2498 2416      emit_opcode(cbuf,0xF7);
2499 2417      emit_rm(cbuf, 0x3, 0, ECX_enc);
2500 2418      emit_d32(cbuf,0x20);
2501 2419      // JEQ,s small
2502 2420      emit_opcode(cbuf, 0x74);
2503 2421      emit_d8(cbuf, 0x04);
2504 2422      // MOV    $dst.hi,$dst.lo
2505 2423      emit_opcode( cbuf, 0x8B );
2506 2424      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2507 2425      // CLR    $dst.lo
2508 2426      emit_opcode(cbuf, 0x33);
2509 2427      emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2510 2428  // small:
2511 2429      // SHLD   $dst.hi,$dst.lo,$shift
2512 2430      emit_opcode(cbuf,0x0F);
2513 2431      emit_opcode(cbuf,0xA5);
2514 2432      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2515 2433      // SHL    $dst.lo,$shift"
2516 2434      emit_opcode(cbuf,0xD3);
2517 2435      emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2518 2436    %}
2519 2437  
2520 2438    enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2521 2439      // TEST shift,32
2522 2440      emit_opcode(cbuf,0xF7);
2523 2441      emit_rm(cbuf, 0x3, 0, ECX_enc);
2524 2442      emit_d32(cbuf,0x20);
2525 2443      // JEQ,s small
2526 2444      emit_opcode(cbuf, 0x74);
2527 2445      emit_d8(cbuf, 0x04);
2528 2446      // MOV    $dst.lo,$dst.hi
2529 2447      emit_opcode( cbuf, 0x8B );
2530 2448      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2531 2449      // CLR    $dst.hi
2532 2450      emit_opcode(cbuf, 0x33);
2533 2451      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2534 2452  // small:
2535 2453      // SHRD   $dst.lo,$dst.hi,$shift
2536 2454      emit_opcode(cbuf,0x0F);
2537 2455      emit_opcode(cbuf,0xAD);
2538 2456      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2539 2457      // SHR    $dst.hi,$shift"
2540 2458      emit_opcode(cbuf,0xD3);
2541 2459      emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2542 2460    %}
2543 2461  
2544 2462    enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2545 2463      // TEST shift,32
2546 2464      emit_opcode(cbuf,0xF7);
2547 2465      emit_rm(cbuf, 0x3, 0, ECX_enc);
2548 2466      emit_d32(cbuf,0x20);
2549 2467      // JEQ,s small
2550 2468      emit_opcode(cbuf, 0x74);
2551 2469      emit_d8(cbuf, 0x05);
2552 2470      // MOV    $dst.lo,$dst.hi
2553 2471      emit_opcode( cbuf, 0x8B );
2554 2472      emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2555 2473      // SAR    $dst.hi,31
2556 2474      emit_opcode(cbuf, 0xC1);
2557 2475      emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2558 2476      emit_d8(cbuf, 0x1F );
2559 2477  // small:
2560 2478      // SHRD   $dst.lo,$dst.hi,$shift
2561 2479      emit_opcode(cbuf,0x0F);
2562 2480      emit_opcode(cbuf,0xAD);
2563 2481      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2564 2482      // SAR    $dst.hi,$shift"
2565 2483      emit_opcode(cbuf,0xD3);
2566 2484      emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2567 2485    %}
2568 2486  
2569 2487  
2570 2488    // ----------------- Encodings for floating point unit -----------------
2571 2489    // May leave result in FPU-TOS or FPU reg depending on opcodes
2572 2490    enc_class OpcReg_F (regF src) %{    // FMUL, FDIV
2573 2491      $$$emit8$primary;
2574 2492      emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2575 2493    %}
2576 2494  
2577 2495    // Pop argument in FPR0 with FSTP ST(0)
2578 2496    enc_class PopFPU() %{
2579 2497      emit_opcode( cbuf, 0xDD );
2580 2498      emit_d8( cbuf, 0xD8 );
2581 2499    %}
2582 2500  
2583 2501    // !!!!! equivalent to Pop_Reg_F
2584 2502    enc_class Pop_Reg_D( regD dst ) %{
2585 2503      emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2586 2504      emit_d8( cbuf, 0xD8+$dst$$reg );
2587 2505    %}
2588 2506  
2589 2507    enc_class Push_Reg_D( regD dst ) %{
2590 2508      emit_opcode( cbuf, 0xD9 );
2591 2509      emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2592 2510    %}
2593 2511  
2594 2512    enc_class strictfp_bias1( regD dst ) %{
2595 2513      emit_opcode( cbuf, 0xDB );           // FLD m80real
2596 2514      emit_opcode( cbuf, 0x2D );
2597 2515      emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2598 2516      emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2599 2517      emit_opcode( cbuf, 0xC8+$dst$$reg );
2600 2518    %}
2601 2519  
2602 2520    enc_class strictfp_bias2( regD dst ) %{
2603 2521      emit_opcode( cbuf, 0xDB );           // FLD m80real
2604 2522      emit_opcode( cbuf, 0x2D );
2605 2523      emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2606 2524      emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2607 2525      emit_opcode( cbuf, 0xC8+$dst$$reg );
2608 2526    %}
2609 2527  
2610 2528    // Special case for moving an integer register to a stack slot.
2611 2529    enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2612 2530      store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2613 2531    %}
2614 2532  
2615 2533    // Special case for moving a register to a stack slot.
2616 2534    enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2617 2535      // Opcode already emitted
2618 2536      emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2619 2537      emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2620 2538      emit_d32(cbuf, $dst$$disp);   // Displacement
2621 2539    %}
2622 2540  
2623 2541    // Push the integer in stackSlot 'src' onto FP-stack
2624 2542    enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2625 2543      store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2626 2544    %}
2627 2545  
2628 2546    // Push the float in stackSlot 'src' onto FP-stack
2629 2547    enc_class Push_Mem_F( memory src ) %{    // FLD_S   [ESP+src]
2630 2548      store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2631 2549    %}
2632 2550  
2633 2551    // Push the double in stackSlot 'src' onto FP-stack
2634 2552    enc_class Push_Mem_D( memory src ) %{    // FLD_D   [ESP+src]
2635 2553      store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2636 2554    %}
2637 2555  
2638 2556    // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2639 2557    enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2640 2558      store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2641 2559    %}
2642 2560  
2643 2561    // Same as Pop_Mem_F except for opcode
2644 2562    // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2645 2563    enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2646 2564      store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2647 2565    %}
2648 2566  
2649 2567    enc_class Pop_Reg_F( regF dst ) %{
2650 2568      emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2651 2569      emit_d8( cbuf, 0xD8+$dst$$reg );
2652 2570    %}
2653 2571  
2654 2572    enc_class Push_Reg_F( regF dst ) %{
2655 2573      emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2656 2574      emit_d8( cbuf, 0xC0-1+$dst$$reg );
2657 2575    %}
2658 2576  
2659 2577    // Push FPU's float to a stack-slot, and pop FPU-stack
2660 2578    enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
2661 2579      int pop = 0x02;
2662 2580      if ($src$$reg != FPR1L_enc) {
2663 2581        emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2664 2582        emit_d8( cbuf, 0xC0-1+$src$$reg );
2665 2583        pop = 0x03;
2666 2584      }
2667 2585      store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2668 2586    %}
2669 2587  
2670 2588    // Push FPU's double to a stack-slot, and pop FPU-stack
2671 2589    enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
2672 2590      int pop = 0x02;
2673 2591      if ($src$$reg != FPR1L_enc) {
2674 2592        emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2675 2593        emit_d8( cbuf, 0xC0-1+$src$$reg );
2676 2594        pop = 0x03;
2677 2595      }
2678 2596      store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2679 2597    %}
2680 2598  
2681 2599    // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2682 2600    enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
2683 2601      int pop = 0xD0 - 1; // -1 since we skip FLD
2684 2602      if ($src$$reg != FPR1L_enc) {
2685 2603        emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2686 2604        emit_d8( cbuf, 0xC0-1+$src$$reg );
2687 2605        pop = 0xD8;
2688 2606      }
2689 2607      emit_opcode( cbuf, 0xDD );
2690 2608      emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2691 2609    %}
2692 2610  
2693 2611  
2694 2612    enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
2695 2613      MacroAssembler masm(&cbuf);
2696 2614      masm.fld_s(  $src1$$reg-1);   // nothing at TOS, load TOS from src1.reg
2697 2615      masm.fmul(   $src2$$reg+0);   // value at TOS
2698 2616      masm.fadd(   $src$$reg+0);    // value at TOS
2699 2617      masm.fstp_d( $dst$$reg+0);    // value at TOS, popped off after store
2700 2618    %}
2701 2619  
2702 2620  
2703 2621    enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2704 2622      // load dst in FPR0
2705 2623      emit_opcode( cbuf, 0xD9 );
2706 2624      emit_d8( cbuf, 0xC0-1+$dst$$reg );
2707 2625      if ($src$$reg != FPR1L_enc) {
2708 2626        // fincstp
2709 2627        emit_opcode (cbuf, 0xD9);
2710 2628        emit_opcode (cbuf, 0xF7);
2711 2629        // swap src with FPR1:
2712 2630        // FXCH FPR1 with src
2713 2631        emit_opcode(cbuf, 0xD9);
2714 2632        emit_d8(cbuf, 0xC8-1+$src$$reg );
2715 2633        // fdecstp
2716 2634        emit_opcode (cbuf, 0xD9);
2717 2635        emit_opcode (cbuf, 0xF6);
2718 2636      }
2719 2637    %}
2720 2638  
2721 2639    enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2722 2640      // Allocate a word
2723 2641      emit_opcode(cbuf,0x83);            // SUB ESP,8
2724 2642      emit_opcode(cbuf,0xEC);
2725 2643      emit_d8(cbuf,0x08);
2726 2644  
2727 2645      emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src1
2728 2646      emit_opcode  (cbuf, 0x0F );
2729 2647      emit_opcode  (cbuf, 0x11 );
2730 2648      encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2731 2649  
2732 2650      emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2733 2651      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2734 2652  
2735 2653      emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src0
2736 2654      emit_opcode  (cbuf, 0x0F );
2737 2655      emit_opcode  (cbuf, 0x11 );
2738 2656      encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2739 2657  
2740 2658      emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2741 2659      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2742 2660  
2743 2661    %}
2744 2662  
2745 2663    enc_class Push_ModX_encoding( regX src0, regX src1) %{
2746 2664      // Allocate a word
2747 2665      emit_opcode(cbuf,0x83);            // SUB ESP,4
2748 2666      emit_opcode(cbuf,0xEC);
2749 2667      emit_d8(cbuf,0x04);
2750 2668  
2751 2669      emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src1
2752 2670      emit_opcode  (cbuf, 0x0F );
2753 2671      emit_opcode  (cbuf, 0x11 );
2754 2672      encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2755 2673  
2756 2674      emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2757 2675      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2758 2676  
2759 2677      emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src0
2760 2678      emit_opcode  (cbuf, 0x0F );
2761 2679      emit_opcode  (cbuf, 0x11 );
2762 2680      encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2763 2681  
2764 2682      emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2765 2683      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2766 2684  
2767 2685    %}
2768 2686  
2769 2687    enc_class Push_ResultXD(regXD dst) %{
2770 2688      store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2771 2689  
2772 2690      // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2773 2691      emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2774 2692      emit_opcode  (cbuf, 0x0F );
2775 2693      emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2776 2694      encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2777 2695  
2778 2696      emit_opcode(cbuf,0x83);    // ADD ESP,8
2779 2697      emit_opcode(cbuf,0xC4);
2780 2698      emit_d8(cbuf,0x08);
2781 2699    %}
2782 2700  
2783 2701    enc_class Push_ResultX(regX dst, immI d8) %{
2784 2702      store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2785 2703  
2786 2704      emit_opcode  (cbuf, 0xF3 );     // MOVSS dst(xmm), [ESP]
2787 2705      emit_opcode  (cbuf, 0x0F );
2788 2706      emit_opcode  (cbuf, 0x10 );
2789 2707      encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2790 2708  
2791 2709      emit_opcode(cbuf,0x83);    // ADD ESP,d8 (4 or 8)
2792 2710      emit_opcode(cbuf,0xC4);
2793 2711      emit_d8(cbuf,$d8$$constant);
2794 2712    %}
2795 2713  
2796 2714    enc_class Push_SrcXD(regXD src) %{
2797 2715      // Allocate a word
2798 2716      emit_opcode(cbuf,0x83);            // SUB ESP,8
2799 2717      emit_opcode(cbuf,0xEC);
2800 2718      emit_d8(cbuf,0x08);
2801 2719  
2802 2720      emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src
2803 2721      emit_opcode  (cbuf, 0x0F );
2804 2722      emit_opcode  (cbuf, 0x11 );
2805 2723      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2806 2724  
2807 2725      emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2808 2726      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2809 2727    %}
2810 2728  
2811 2729    enc_class push_stack_temp_qword() %{
2812 2730      emit_opcode(cbuf,0x83);     // SUB ESP,8
2813 2731      emit_opcode(cbuf,0xEC);
2814 2732      emit_d8    (cbuf,0x08);
2815 2733    %}
2816 2734  
2817 2735    enc_class pop_stack_temp_qword() %{
2818 2736      emit_opcode(cbuf,0x83);     // ADD ESP,8
2819 2737      emit_opcode(cbuf,0xC4);
2820 2738      emit_d8    (cbuf,0x08);
2821 2739    %}
2822 2740  
2823 2741    enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2824 2742      emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], xmm_src
2825 2743      emit_opcode  (cbuf, 0x0F );
2826 2744      emit_opcode  (cbuf, 0x11 );
2827 2745      encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2828 2746  
2829 2747      emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2830 2748      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2831 2749    %}
2832 2750  
2833 2751    // Compute X^Y using Intel's fast hardware instructions, if possible.
2834 2752    // Otherwise return a NaN.
2835 2753    enc_class pow_exp_core_encoding %{
2836 2754      // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
2837 2755      emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
2838 2756      emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
2839 2757      emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
2840 2758      emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
2841 2759      emit_opcode(cbuf,0x1C);
2842 2760      emit_d8(cbuf,0x24);
2843 2761      emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
2844 2762      emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
2845 2763      emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
2846 2764      emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
2847 2765      encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2848 2766      emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
2849 2767      emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2850 2768      emit_d32(cbuf,0xFFFFF800);
2851 2769      emit_opcode(cbuf,0x81);                          // add rax,1023 - the double exponent bias
2852 2770      emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2853 2771      emit_d32(cbuf,1023);
2854 2772      emit_opcode(cbuf,0x8B);                          // mov rbx,eax
2855 2773      emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2856 2774      emit_opcode(cbuf,0xC1);                          // shl rax,20 - Slide to exponent position
2857 2775      emit_rm(cbuf,0x3,0x4,EAX_enc);
2858 2776      emit_d8(cbuf,20);
2859 2777      emit_opcode(cbuf,0x85);                          // test rbx,ecx - check for overflow
2860 2778      emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2861 2779      emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45);  // CMOVne rax,ecx - overflow; stuff NAN into EAX
2862 2780      emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2863 2781      emit_opcode(cbuf,0x89);                          // mov [esp+4],eax - Store as part of double word
2864 2782      encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2865 2783      emit_opcode(cbuf,0xC7);                          // mov [esp+0],0   - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2866 2784      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2867 2785      emit_d32(cbuf,0);
2868 2786      emit_opcode(cbuf,0xDC);                          // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2869 2787      encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2870 2788    %}
2871 2789  
2872 2790  //   enc_class Pop_Reg_Mod_D( regD dst, regD src)
2873 2791  //   was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2874 2792  
2875 2793    enc_class Push_Result_Mod_D( regD src) %{
2876 2794      if ($src$$reg != FPR1L_enc) {
2877 2795        // fincstp
2878 2796        emit_opcode (cbuf, 0xD9);
2879 2797        emit_opcode (cbuf, 0xF7);
2880 2798        // FXCH FPR1 with src
2881 2799        emit_opcode(cbuf, 0xD9);
2882 2800        emit_d8(cbuf, 0xC8-1+$src$$reg );
2883 2801        // fdecstp
2884 2802        emit_opcode (cbuf, 0xD9);
2885 2803        emit_opcode (cbuf, 0xF6);
2886 2804      }
2887 2805      // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2888 2806      // // FSTP   FPR$dst$$reg
2889 2807      // emit_opcode( cbuf, 0xDD );
2890 2808      // emit_d8( cbuf, 0xD8+$dst$$reg );
2891 2809    %}
2892 2810  
2893 2811    enc_class fnstsw_sahf_skip_parity() %{
2894 2812      // fnstsw ax
2895 2813      emit_opcode( cbuf, 0xDF );
2896 2814      emit_opcode( cbuf, 0xE0 );
2897 2815      // sahf
2898 2816      emit_opcode( cbuf, 0x9E );
2899 2817      // jnp  ::skip
2900 2818      emit_opcode( cbuf, 0x7B );
2901 2819      emit_opcode( cbuf, 0x05 );
2902 2820    %}
2903 2821  
2904 2822    enc_class emitModD() %{
2905 2823      // fprem must be iterative
2906 2824      // :: loop
2907 2825      // fprem
2908 2826      emit_opcode( cbuf, 0xD9 );
2909 2827      emit_opcode( cbuf, 0xF8 );
2910 2828      // wait
2911 2829      emit_opcode( cbuf, 0x9b );
2912 2830      // fnstsw ax
2913 2831      emit_opcode( cbuf, 0xDF );
2914 2832      emit_opcode( cbuf, 0xE0 );
2915 2833      // sahf
2916 2834      emit_opcode( cbuf, 0x9E );
2917 2835      // jp  ::loop
2918 2836      emit_opcode( cbuf, 0x0F );
2919 2837      emit_opcode( cbuf, 0x8A );
2920 2838      emit_opcode( cbuf, 0xF4 );
2921 2839      emit_opcode( cbuf, 0xFF );
2922 2840      emit_opcode( cbuf, 0xFF );
2923 2841      emit_opcode( cbuf, 0xFF );
2924 2842    %}
2925 2843  
2926 2844    enc_class fpu_flags() %{
2927 2845      // fnstsw_ax
2928 2846      emit_opcode( cbuf, 0xDF);
2929 2847      emit_opcode( cbuf, 0xE0);
2930 2848      // test ax,0x0400
2931 2849      emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2932 2850      emit_opcode( cbuf, 0xA9 );
2933 2851      emit_d16   ( cbuf, 0x0400 );
2934 2852      // // // This sequence works, but stalls for 12-16 cycles on PPro
2935 2853      // // test rax,0x0400
2936 2854      // emit_opcode( cbuf, 0xA9 );
2937 2855      // emit_d32   ( cbuf, 0x00000400 );
2938 2856      //
2939 2857      // jz exit (no unordered comparison)
2940 2858      emit_opcode( cbuf, 0x74 );
2941 2859      emit_d8    ( cbuf, 0x02 );
2942 2860      // mov ah,1 - treat as LT case (set carry flag)
2943 2861      emit_opcode( cbuf, 0xB4 );
2944 2862      emit_d8    ( cbuf, 0x01 );
2945 2863      // sahf
2946 2864      emit_opcode( cbuf, 0x9E);
2947 2865    %}
2948 2866  
2949 2867    enc_class cmpF_P6_fixup() %{
2950 2868      // Fixup the integer flags in case comparison involved a NaN
2951 2869      //
2952 2870      // JNP exit (no unordered comparison, P-flag is set by NaN)
2953 2871      emit_opcode( cbuf, 0x7B );
2954 2872      emit_d8    ( cbuf, 0x03 );
2955 2873      // MOV AH,1 - treat as LT case (set carry flag)
2956 2874      emit_opcode( cbuf, 0xB4 );
2957 2875      emit_d8    ( cbuf, 0x01 );
2958 2876      // SAHF
2959 2877      emit_opcode( cbuf, 0x9E);
2960 2878      // NOP     // target for branch to avoid branch to branch
2961 2879      emit_opcode( cbuf, 0x90);
2962 2880    %}
2963 2881  
2964 2882  //     fnstsw_ax();
2965 2883  //     sahf();
2966 2884  //     movl(dst, nan_result);
2967 2885  //     jcc(Assembler::parity, exit);
2968 2886  //     movl(dst, less_result);
2969 2887  //     jcc(Assembler::below, exit);
2970 2888  //     movl(dst, equal_result);
2971 2889  //     jcc(Assembler::equal, exit);
2972 2890  //     movl(dst, greater_result);
2973 2891  
2974 2892  // less_result     =  1;
2975 2893  // greater_result  = -1;
2976 2894  // equal_result    = 0;
2977 2895  // nan_result      = -1;
2978 2896  
2979 2897    enc_class CmpF_Result(eRegI dst) %{
2980 2898      // fnstsw_ax();
2981 2899      emit_opcode( cbuf, 0xDF);
2982 2900      emit_opcode( cbuf, 0xE0);
2983 2901      // sahf
2984 2902      emit_opcode( cbuf, 0x9E);
2985 2903      // movl(dst, nan_result);
2986 2904      emit_opcode( cbuf, 0xB8 + $dst$$reg);
2987 2905      emit_d32( cbuf, -1 );
2988 2906      // jcc(Assembler::parity, exit);
2989 2907      emit_opcode( cbuf, 0x7A );
2990 2908      emit_d8    ( cbuf, 0x13 );
2991 2909      // movl(dst, less_result);
2992 2910      emit_opcode( cbuf, 0xB8 + $dst$$reg);
2993 2911      emit_d32( cbuf, -1 );
2994 2912      // jcc(Assembler::below, exit);
2995 2913      emit_opcode( cbuf, 0x72 );
2996 2914      emit_d8    ( cbuf, 0x0C );
2997 2915      // movl(dst, equal_result);
2998 2916      emit_opcode( cbuf, 0xB8 + $dst$$reg);
2999 2917      emit_d32( cbuf, 0 );
3000 2918      // jcc(Assembler::equal, exit);
3001 2919      emit_opcode( cbuf, 0x74 );
3002 2920      emit_d8    ( cbuf, 0x05 );
3003 2921      // movl(dst, greater_result);
3004 2922      emit_opcode( cbuf, 0xB8 + $dst$$reg);
3005 2923      emit_d32( cbuf, 1 );
3006 2924    %}
3007 2925  
3008 2926  
3009 2927    // XMM version of CmpF_Result. Because the XMM compare
3010 2928    // instructions set the EFLAGS directly. It becomes simpler than
3011 2929    // the float version above.
3012 2930    enc_class CmpX_Result(eRegI dst) %{
3013 2931      MacroAssembler _masm(&cbuf);
3014 2932      Label nan, inc, done;
3015 2933  
3016 2934      __ jccb(Assembler::parity, nan);
3017 2935      __ jccb(Assembler::equal,  done);
3018 2936      __ jccb(Assembler::above,  inc);
3019 2937      __ bind(nan);
3020 2938      __ decrement(as_Register($dst$$reg)); // NO L qqq
3021 2939      __ jmpb(done);
3022 2940      __ bind(inc);
3023 2941      __ increment(as_Register($dst$$reg)); // NO L qqq
3024 2942      __ bind(done);
3025 2943    %}
3026 2944  
3027 2945    // Compare the longs and set flags
3028 2946    // BROKEN!  Do Not use as-is
3029 2947    enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
3030 2948      // CMP    $src1.hi,$src2.hi
3031 2949      emit_opcode( cbuf, 0x3B );
3032 2950      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3033 2951      // JNE,s  done
3034 2952      emit_opcode(cbuf,0x75);
3035 2953      emit_d8(cbuf, 2 );
3036 2954      // CMP    $src1.lo,$src2.lo
3037 2955      emit_opcode( cbuf, 0x3B );
3038 2956      emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3039 2957  // done:
3040 2958    %}
3041 2959  
3042 2960    enc_class convert_int_long( regL dst, eRegI src ) %{
3043 2961      // mov $dst.lo,$src
3044 2962      int dst_encoding = $dst$$reg;
3045 2963      int src_encoding = $src$$reg;
3046 2964      encode_Copy( cbuf, dst_encoding  , src_encoding );
3047 2965      // mov $dst.hi,$src
3048 2966      encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
3049 2967      // sar $dst.hi,31
3050 2968      emit_opcode( cbuf, 0xC1 );
3051 2969      emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
3052 2970      emit_d8(cbuf, 0x1F );
3053 2971    %}
3054 2972  
3055 2973    enc_class convert_long_double( eRegL src ) %{
3056 2974      // push $src.hi
3057 2975      emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3058 2976      // push $src.lo
3059 2977      emit_opcode(cbuf, 0x50+$src$$reg  );
3060 2978      // fild 64-bits at [SP]
3061 2979      emit_opcode(cbuf,0xdf);
3062 2980      emit_d8(cbuf, 0x6C);
3063 2981      emit_d8(cbuf, 0x24);
3064 2982      emit_d8(cbuf, 0x00);
3065 2983      // pop stack
3066 2984      emit_opcode(cbuf, 0x83); // add  SP, #8
3067 2985      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3068 2986      emit_d8(cbuf, 0x8);
3069 2987    %}
3070 2988  
3071 2989    enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
3072 2990      // IMUL   EDX:EAX,$src1
3073 2991      emit_opcode( cbuf, 0xF7 );
3074 2992      emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
3075 2993      // SAR    EDX,$cnt-32
3076 2994      int shift_count = ((int)$cnt$$constant) - 32;
3077 2995      if (shift_count > 0) {
3078 2996        emit_opcode(cbuf, 0xC1);
3079 2997        emit_rm(cbuf, 0x3, 7, $dst$$reg );
3080 2998        emit_d8(cbuf, shift_count);
3081 2999      }
3082 3000    %}
3083 3001  
3084 3002    // this version doesn't have add sp, 8
3085 3003    enc_class convert_long_double2( eRegL src ) %{
3086 3004      // push $src.hi
3087 3005      emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3088 3006      // push $src.lo
3089 3007      emit_opcode(cbuf, 0x50+$src$$reg  );
3090 3008      // fild 64-bits at [SP]
3091 3009      emit_opcode(cbuf,0xdf);
3092 3010      emit_d8(cbuf, 0x6C);
3093 3011      emit_d8(cbuf, 0x24);
3094 3012      emit_d8(cbuf, 0x00);
3095 3013    %}
3096 3014  
3097 3015    enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
3098 3016      // Basic idea: long = (long)int * (long)int
3099 3017      // IMUL EDX:EAX, src
3100 3018      emit_opcode( cbuf, 0xF7 );
3101 3019      emit_rm( cbuf, 0x3, 0x5, $src$$reg);
3102 3020    %}
3103 3021  
3104 3022    enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
3105 3023      // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
3106 3024      // MUL EDX:EAX, src
3107 3025      emit_opcode( cbuf, 0xF7 );
3108 3026      emit_rm( cbuf, 0x3, 0x4, $src$$reg);
3109 3027    %}
3110 3028  
3111 3029    enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
3112 3030      // Basic idea: lo(result) = lo(x_lo * y_lo)
3113 3031      //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
3114 3032      // MOV    $tmp,$src.lo
3115 3033      encode_Copy( cbuf, $tmp$$reg, $src$$reg );
3116 3034      // IMUL   $tmp,EDX
3117 3035      emit_opcode( cbuf, 0x0F );
3118 3036      emit_opcode( cbuf, 0xAF );
3119 3037      emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3120 3038      // MOV    EDX,$src.hi
3121 3039      encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
3122 3040      // IMUL   EDX,EAX
3123 3041      emit_opcode( cbuf, 0x0F );
3124 3042      emit_opcode( cbuf, 0xAF );
3125 3043      emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
3126 3044      // ADD    $tmp,EDX
3127 3045      emit_opcode( cbuf, 0x03 );
3128 3046      emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3129 3047      // MUL   EDX:EAX,$src.lo
3130 3048      emit_opcode( cbuf, 0xF7 );
3131 3049      emit_rm( cbuf, 0x3, 0x4, $src$$reg );
3132 3050      // ADD    EDX,ESI
3133 3051      emit_opcode( cbuf, 0x03 );
3134 3052      emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
3135 3053    %}
3136 3054  
3137 3055    enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
3138 3056      // Basic idea: lo(result) = lo(src * y_lo)
3139 3057      //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
3140 3058      // IMUL   $tmp,EDX,$src
3141 3059      emit_opcode( cbuf, 0x6B );
3142 3060      emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3143 3061      emit_d8( cbuf, (int)$src$$constant );
3144 3062      // MOV    EDX,$src
3145 3063      emit_opcode(cbuf, 0xB8 + EDX_enc);
3146 3064      emit_d32( cbuf, (int)$src$$constant );
3147 3065      // MUL   EDX:EAX,EDX
3148 3066      emit_opcode( cbuf, 0xF7 );
3149 3067      emit_rm( cbuf, 0x3, 0x4, EDX_enc );
3150 3068      // ADD    EDX,ESI
3151 3069      emit_opcode( cbuf, 0x03 );
3152 3070      emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
3153 3071    %}
3154 3072  
3155 3073    enc_class long_div( eRegL src1, eRegL src2 ) %{
3156 3074      // PUSH src1.hi
3157 3075      emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3158 3076      // PUSH src1.lo
3159 3077      emit_opcode(cbuf,               0x50+$src1$$reg  );
3160 3078      // PUSH src2.hi
3161 3079      emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3162 3080      // PUSH src2.lo
3163 3081      emit_opcode(cbuf,               0x50+$src2$$reg  );
3164 3082      // CALL directly to the runtime
3165 3083      cbuf.set_insts_mark();
3166 3084      emit_opcode(cbuf,0xE8);       // Call into runtime
3167 3085      emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3168 3086      // Restore stack
3169 3087      emit_opcode(cbuf, 0x83); // add  SP, #framesize
3170 3088      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3171 3089      emit_d8(cbuf, 4*4);
3172 3090    %}
3173 3091  
3174 3092    enc_class long_mod( eRegL src1, eRegL src2 ) %{
3175 3093      // PUSH src1.hi
3176 3094      emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3177 3095      // PUSH src1.lo
3178 3096      emit_opcode(cbuf,               0x50+$src1$$reg  );
3179 3097      // PUSH src2.hi
3180 3098      emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3181 3099      // PUSH src2.lo
3182 3100      emit_opcode(cbuf,               0x50+$src2$$reg  );
3183 3101      // CALL directly to the runtime
3184 3102      cbuf.set_insts_mark();
3185 3103      emit_opcode(cbuf,0xE8);       // Call into runtime
3186 3104      emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3187 3105      // Restore stack
3188 3106      emit_opcode(cbuf, 0x83); // add  SP, #framesize
3189 3107      emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3190 3108      emit_d8(cbuf, 4*4);
3191 3109    %}
3192 3110  
3193 3111    enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
3194 3112      // MOV   $tmp,$src.lo
3195 3113      emit_opcode(cbuf, 0x8B);
3196 3114      emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
3197 3115      // OR    $tmp,$src.hi
3198 3116      emit_opcode(cbuf, 0x0B);
3199 3117      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
3200 3118    %}
3201 3119  
3202 3120    enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
3203 3121      // CMP    $src1.lo,$src2.lo
3204 3122      emit_opcode( cbuf, 0x3B );
3205 3123      emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3206 3124      // JNE,s  skip
3207 3125      emit_cc(cbuf, 0x70, 0x5);
3208 3126      emit_d8(cbuf,2);
3209 3127      // CMP    $src1.hi,$src2.hi
3210 3128      emit_opcode( cbuf, 0x3B );
3211 3129      emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3212 3130    %}
3213 3131  
3214 3132    enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
3215 3133      // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
3216 3134      emit_opcode( cbuf, 0x3B );
3217 3135      emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3218 3136      // MOV    $tmp,$src1.hi
3219 3137      emit_opcode( cbuf, 0x8B );
3220 3138      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
3221 3139      // SBB   $tmp,$src2.hi\t! Compute flags for long compare
3222 3140      emit_opcode( cbuf, 0x1B );
3223 3141      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
3224 3142    %}
3225 3143  
3226 3144    enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
3227 3145      // XOR    $tmp,$tmp
3228 3146      emit_opcode(cbuf,0x33);  // XOR
3229 3147      emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3230 3148      // CMP    $tmp,$src.lo
3231 3149      emit_opcode( cbuf, 0x3B );
3232 3150      emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3233 3151      // SBB    $tmp,$src.hi
3234 3152      emit_opcode( cbuf, 0x1B );
3235 3153      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3236 3154    %}
3237 3155  
3238 3156   // Sniff, sniff... smells like Gnu Superoptimizer
3239 3157    enc_class neg_long( eRegL dst ) %{
3240 3158      emit_opcode(cbuf,0xF7);    // NEG hi
3241 3159      emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3242 3160      emit_opcode(cbuf,0xF7);    // NEG lo
3243 3161      emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
3244 3162      emit_opcode(cbuf,0x83);    // SBB hi,0
3245 3163      emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3246 3164      emit_d8    (cbuf,0 );
3247 3165    %}
3248 3166  
3249 3167    enc_class movq_ld(regXD dst, memory mem) %{
3250 3168      MacroAssembler _masm(&cbuf);
3251 3169      __ movq($dst$$XMMRegister, $mem$$Address);
3252 3170    %}
3253 3171  
3254 3172    enc_class movq_st(memory mem, regXD src) %{
3255 3173      MacroAssembler _masm(&cbuf);
3256 3174      __ movq($mem$$Address, $src$$XMMRegister);
3257 3175    %}
3258 3176  
3259 3177    enc_class pshufd_8x8(regX dst, regX src) %{
3260 3178      MacroAssembler _masm(&cbuf);
3261 3179  
3262 3180      encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3263 3181      __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3264 3182      __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3265 3183    %}
3266 3184  
3267 3185    enc_class pshufd_4x16(regX dst, regX src) %{
3268 3186      MacroAssembler _masm(&cbuf);
3269 3187  
3270 3188      __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3271 3189    %}
3272 3190  
3273 3191    enc_class pshufd(regXD dst, regXD src, int mode) %{
3274 3192      MacroAssembler _masm(&cbuf);
3275 3193  
3276 3194      __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3277 3195    %}
3278 3196  
3279 3197    enc_class pxor(regXD dst, regXD src) %{
3280 3198      MacroAssembler _masm(&cbuf);
3281 3199  
3282 3200      __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3283 3201    %}
3284 3202  
3285 3203    enc_class mov_i2x(regXD dst, eRegI src) %{
3286 3204      MacroAssembler _masm(&cbuf);
3287 3205  
3288 3206      __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3289 3207    %}
3290 3208  
3291 3209  
3292 3210    // Because the transitions from emitted code to the runtime
3293 3211    // monitorenter/exit helper stubs are so slow it's critical that
3294 3212    // we inline both the stack-locking fast-path and the inflated fast path.
3295 3213    //
3296 3214    // See also: cmpFastLock and cmpFastUnlock.
3297 3215    //
3298 3216    // What follows is a specialized inline transliteration of the code
3299 3217    // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
3300 3218    // another option would be to emit TrySlowEnter and TrySlowExit methods
3301 3219    // at startup-time.  These methods would accept arguments as
3302 3220    // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3303 3221    // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
3304 3222    // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3305 3223    // In practice, however, the # of lock sites is bounded and is usually small.
3306 3224    // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3307 3225    // if the processor uses simple bimodal branch predictors keyed by EIP
3308 3226    // Since the helper routines would be called from multiple synchronization
3309 3227    // sites.
3310 3228    //
3311 3229    // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3312 3230    // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
3313 3231    // to those specialized methods.  That'd give us a mostly platform-independent
3314 3232    // implementation that the JITs could optimize and inline at their pleasure.
3315 3233    // Done correctly, the only time we'd need to cross to native could would be
3316 3234    // to park() or unpark() threads.  We'd also need a few more unsafe operators
3317 3235    // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
3318 3236    // (b) explicit barriers or fence operations.
3319 3237    //
3320 3238    // TODO:
3321 3239    //
3322 3240    // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
3323 3241    //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
3324 3242    //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
3325 3243    //    the lock operators would typically be faster than reifying Self.
3326 3244    //
3327 3245    // *  Ideally I'd define the primitives as:
3328 3246    //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
3329 3247    //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
3330 3248    //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
3331 3249    //    Instead, we're stuck with a rather awkward and brittle register assignments below.
3332 3250    //    Furthermore the register assignments are overconstrained, possibly resulting in
3333 3251    //    sub-optimal code near the synchronization site.
3334 3252    //
3335 3253    // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
3336 3254    //    Alternately, use a better sp-proximity test.
3337 3255    //
3338 3256    // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
3339 3257    //    Either one is sufficient to uniquely identify a thread.
3340 3258    //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
3341 3259    //
3342 3260    // *  Intrinsify notify() and notifyAll() for the common cases where the
3343 3261    //    object is locked by the calling thread but the waitlist is empty.
3344 3262    //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
3345 3263    //
3346 3264    // *  use jccb and jmpb instead of jcc and jmp to improve code density.
3347 3265    //    But beware of excessive branch density on AMD Opterons.
3348 3266    //
3349 3267    // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
3350 3268    //    or failure of the fast-path.  If the fast-path fails then we pass
3351 3269    //    control to the slow-path, typically in C.  In Fast_Lock and
3352 3270    //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
3353 3271    //    will emit a conditional branch immediately after the node.
3354 3272    //    So we have branches to branches and lots of ICC.ZF games.
3355 3273    //    Instead, it might be better to have C2 pass a "FailureLabel"
3356 3274    //    into Fast_Lock and Fast_Unlock.  In the case of success, control
3357 3275    //    will drop through the node.  ICC.ZF is undefined at exit.
3358 3276    //    In the case of failure, the node will branch directly to the
3359 3277    //    FailureLabel
3360 3278  
3361 3279  
3362 3280    // obj: object to lock
3363 3281    // box: on-stack box address (displaced header location) - KILLED
3364 3282    // rax,: tmp -- KILLED
3365 3283    // scr: tmp -- KILLED
3366 3284    enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
3367 3285  
3368 3286      Register objReg = as_Register($obj$$reg);
3369 3287      Register boxReg = as_Register($box$$reg);
3370 3288      Register tmpReg = as_Register($tmp$$reg);
3371 3289      Register scrReg = as_Register($scr$$reg);
3372 3290  
3373 3291      // Ensure the register assignents are disjoint
3374 3292      guarantee (objReg != boxReg, "") ;
3375 3293      guarantee (objReg != tmpReg, "") ;
3376 3294      guarantee (objReg != scrReg, "") ;
3377 3295      guarantee (boxReg != tmpReg, "") ;
3378 3296      guarantee (boxReg != scrReg, "") ;
3379 3297      guarantee (tmpReg == as_Register(EAX_enc), "") ;
3380 3298  
3381 3299      MacroAssembler masm(&cbuf);
3382 3300  
3383 3301      if (_counters != NULL) {
3384 3302        masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3385 3303      }
3386 3304      if (EmitSync & 1) {
3387 3305          // set box->dhw = unused_mark (3)
3388 3306          // Force all sync thru slow-path: slow_enter() and slow_exit() 
3389 3307          masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;             
3390 3308          masm.cmpptr (rsp, (int32_t)0) ;                        
3391 3309      } else 
3392 3310      if (EmitSync & 2) { 
3393 3311          Label DONE_LABEL ;           
3394 3312          if (UseBiasedLocking) {
3395 3313             // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3396 3314             masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3397 3315          }
3398 3316  
3399 3317          masm.movptr(tmpReg, Address(objReg, 0)) ;          // fetch markword 
3400 3318          masm.orptr (tmpReg, 0x1);
3401 3319          masm.movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS 
3402 3320          if (os::is_MP()) { masm.lock();  }
3403 3321          masm.cmpxchgptr(boxReg, Address(objReg, 0));          // Updates tmpReg
3404 3322          masm.jcc(Assembler::equal, DONE_LABEL);
3405 3323          // Recursive locking
3406 3324          masm.subptr(tmpReg, rsp);
3407 3325          masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3408 3326          masm.movptr(Address(boxReg, 0), tmpReg);
3409 3327          masm.bind(DONE_LABEL) ; 
3410 3328      } else {  
3411 3329        // Possible cases that we'll encounter in fast_lock 
3412 3330        // ------------------------------------------------
3413 3331        // * Inflated
3414 3332        //    -- unlocked
3415 3333        //    -- Locked
3416 3334        //       = by self
3417 3335        //       = by other
3418 3336        // * biased
3419 3337        //    -- by Self
3420 3338        //    -- by other
3421 3339        // * neutral
3422 3340        // * stack-locked
3423 3341        //    -- by self
3424 3342        //       = sp-proximity test hits
3425 3343        //       = sp-proximity test generates false-negative
3426 3344        //    -- by other
3427 3345        //
3428 3346  
3429 3347        Label IsInflated, DONE_LABEL, PopDone ;
3430 3348  
3431 3349        // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3432 3350        // order to reduce the number of conditional branches in the most common cases.
3433 3351        // Beware -- there's a subtle invariant that fetch of the markword
3434 3352        // at [FETCH], below, will never observe a biased encoding (*101b).
3435 3353        // If this invariant is not held we risk exclusion (safety) failure.
3436 3354        if (UseBiasedLocking && !UseOptoBiasInlining) {
3437 3355          masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3438 3356        }
3439 3357  
3440 3358        masm.movptr(tmpReg, Address(objReg, 0)) ;         // [FETCH]
3441 3359        masm.testptr(tmpReg, 0x02) ;                      // Inflated v (Stack-locked or neutral)
3442 3360        masm.jccb  (Assembler::notZero, IsInflated) ;
3443 3361  
3444 3362        // Attempt stack-locking ...
3445 3363        masm.orptr (tmpReg, 0x1);
3446 3364        masm.movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
3447 3365        if (os::is_MP()) { masm.lock();  }
3448 3366        masm.cmpxchgptr(boxReg, Address(objReg, 0));           // Updates tmpReg
3449 3367        if (_counters != NULL) {
3450 3368          masm.cond_inc32(Assembler::equal,
3451 3369                          ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3452 3370        }
3453 3371        masm.jccb (Assembler::equal, DONE_LABEL);
3454 3372  
3455 3373        // Recursive locking
3456 3374        masm.subptr(tmpReg, rsp);
3457 3375        masm.andptr(tmpReg, 0xFFFFF003 );
3458 3376        masm.movptr(Address(boxReg, 0), tmpReg);
3459 3377        if (_counters != NULL) {
3460 3378          masm.cond_inc32(Assembler::equal,
3461 3379                          ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3462 3380        }
3463 3381        masm.jmp  (DONE_LABEL) ;
3464 3382  
3465 3383        masm.bind (IsInflated) ;
3466 3384  
3467 3385        // The object is inflated.
3468 3386        //
3469 3387        // TODO-FIXME: eliminate the ugly use of manifest constants:
3470 3388        //   Use markOopDesc::monitor_value instead of "2".
3471 3389        //   use markOop::unused_mark() instead of "3".
3472 3390        // The tmpReg value is an objectMonitor reference ORed with
3473 3391        // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
3474 3392        // objectmonitor pointer by masking off the "2" bit or we can just
3475 3393        // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3476 3394        // field offsets with "-2" to compensate for and annul the low-order tag bit.
3477 3395        //
3478 3396        // I use the latter as it avoids AGI stalls.
3479 3397        // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3480 3398        // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3481 3399        //
3482 3400        #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3483 3401  
3484 3402        // boxReg refers to the on-stack BasicLock in the current frame.
3485 3403        // We'd like to write:
3486 3404        //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
3487 3405        // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
3488 3406        // additional latency as we have another ST in the store buffer that must drain.
3489 3407  
3490 3408        if (EmitSync & 8192) { 
3491 3409           masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
3492 3410           masm.get_thread (scrReg) ; 
3493 3411           masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2] 
3494 3412           masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
3495 3413           if (os::is_MP()) { masm.lock(); } 
3496 3414           masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3497 3415        } else 
3498 3416        if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
3499 3417           masm.movptr(scrReg, boxReg) ; 
3500 3418           masm.movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2] 
3501 3419  
3502 3420           // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3503 3421           if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3504 3422              // prefetchw [eax + Offset(_owner)-2]
3505 3423              masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3506 3424           }
3507 3425  
3508 3426           if ((EmitSync & 64) == 0) {
3509 3427             // Optimistic form: consider XORL tmpReg,tmpReg
3510 3428             masm.movptr(tmpReg, NULL_WORD) ; 
3511 3429           } else { 
3512 3430             // Can suffer RTS->RTO upgrades on shared or cold $ lines
3513 3431             // Test-And-CAS instead of CAS
3514 3432             masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3515 3433             masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3516 3434             masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3517 3435           }
3518 3436  
3519 3437           // Appears unlocked - try to swing _owner from null to non-null.
3520 3438           // Ideally, I'd manifest "Self" with get_thread and then attempt
3521 3439           // to CAS the register containing Self into m->Owner.
3522 3440           // But we don't have enough registers, so instead we can either try to CAS
3523 3441           // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
3524 3442           // we later store "Self" into m->Owner.  Transiently storing a stack address
3525 3443           // (rsp or the address of the box) into  m->owner is harmless.
3526 3444           // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3527 3445           if (os::is_MP()) { masm.lock();  }
3528 3446           masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3529 3447           masm.movptr(Address(scrReg, 0), 3) ;          // box->_displaced_header = 3
3530 3448           masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3531 3449           masm.get_thread (scrReg) ;                    // beware: clobbers ICCs
3532 3450           masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 
3533 3451           masm.xorptr(boxReg, boxReg) ;                 // set icc.ZFlag = 1 to indicate success
3534 3452                         
3535 3453           // If the CAS fails we can either retry or pass control to the slow-path.  
3536 3454           // We use the latter tactic.  
3537 3455           // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3538 3456           // If the CAS was successful ...
3539 3457           //   Self has acquired the lock
3540 3458           //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3541 3459           // Intentional fall-through into DONE_LABEL ...
3542 3460        } else {
3543 3461           masm.movptr(Address(boxReg, 0), 3) ;       // results in ST-before-CAS penalty
3544 3462           masm.movptr(boxReg, tmpReg) ; 
3545 3463  
3546 3464           // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3547 3465           if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3548 3466              // prefetchw [eax + Offset(_owner)-2]
3549 3467              masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3550 3468           }
3551 3469  
3552 3470           if ((EmitSync & 64) == 0) {
3553 3471             // Optimistic form
3554 3472             masm.xorptr  (tmpReg, tmpReg) ; 
3555 3473           } else { 
3556 3474             // Can suffer RTS->RTO upgrades on shared or cold $ lines
3557 3475             masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3558 3476             masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3559 3477             masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3560 3478           }
3561 3479  
3562 3480           // Appears unlocked - try to swing _owner from null to non-null.
3563 3481           // Use either "Self" (in scr) or rsp as thread identity in _owner.
3564 3482           // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3565 3483           masm.get_thread (scrReg) ;
3566 3484           if (os::is_MP()) { masm.lock(); }
3567 3485           masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3568 3486  
3569 3487           // If the CAS fails we can either retry or pass control to the slow-path.
3570 3488           // We use the latter tactic.
3571 3489           // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3572 3490           // If the CAS was successful ...
3573 3491           //   Self has acquired the lock
3574 3492           //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3575 3493           // Intentional fall-through into DONE_LABEL ...
3576 3494        }
3577 3495  
3578 3496        // DONE_LABEL is a hot target - we'd really like to place it at the
3579 3497        // start of cache line by padding with NOPs.
3580 3498        // See the AMD and Intel software optimization manuals for the
3581 3499        // most efficient "long" NOP encodings.
3582 3500        // Unfortunately none of our alignment mechanisms suffice.
3583 3501        masm.bind(DONE_LABEL);
3584 3502  
3585 3503        // Avoid branch-to-branch on AMD processors
3586 3504        // This appears to be superstition.
3587 3505        if (EmitSync & 32) masm.nop() ;
3588 3506  
3589 3507  
3590 3508        // At DONE_LABEL the icc ZFlag is set as follows ...
3591 3509        // Fast_Unlock uses the same protocol.
3592 3510        // ZFlag == 1 -> Success
3593 3511        // ZFlag == 0 -> Failure - force control through the slow-path
3594 3512      }
3595 3513    %}
3596 3514  
3597 3515    // obj: object to unlock
3598 3516    // box: box address (displaced header location), killed.  Must be EAX.
3599 3517    // rbx,: killed tmp; cannot be obj nor box.
3600 3518    //
3601 3519    // Some commentary on balanced locking:
3602 3520    //
3603 3521    // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3604 3522    // Methods that don't have provably balanced locking are forced to run in the
3605 3523    // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3606 3524    // The interpreter provides two properties:
3607 3525    // I1:  At return-time the interpreter automatically and quietly unlocks any
3608 3526    //      objects acquired the current activation (frame).  Recall that the
3609 3527    //      interpreter maintains an on-stack list of locks currently held by
3610 3528    //      a frame.
3611 3529    // I2:  If a method attempts to unlock an object that is not held by the
3612 3530    //      the frame the interpreter throws IMSX.
3613 3531    //
3614 3532    // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3615 3533    // B() doesn't have provably balanced locking so it runs in the interpreter.
3616 3534    // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
3617 3535    // is still locked by A().
3618 3536    //
3619 3537    // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
3620 3538    // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3621 3539    // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
3622 3540    // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3623 3541  
3624 3542    enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3625 3543  
3626 3544      Register objReg = as_Register($obj$$reg);
3627 3545      Register boxReg = as_Register($box$$reg);
3628 3546      Register tmpReg = as_Register($tmp$$reg);
3629 3547  
3630 3548      guarantee (objReg != boxReg, "") ;
3631 3549      guarantee (objReg != tmpReg, "") ;
3632 3550      guarantee (boxReg != tmpReg, "") ;
3633 3551      guarantee (boxReg == as_Register(EAX_enc), "") ;
3634 3552      MacroAssembler masm(&cbuf);
3635 3553  
3636 3554      if (EmitSync & 4) {
3637 3555        // Disable - inhibit all inlining.  Force control through the slow-path
3638 3556        masm.cmpptr (rsp, 0) ; 
3639 3557      } else 
3640 3558      if (EmitSync & 8) {
3641 3559        Label DONE_LABEL ;
3642 3560        if (UseBiasedLocking) {
3643 3561           masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3644 3562        }
3645 3563        // classic stack-locking code ...
3646 3564        masm.movptr(tmpReg, Address(boxReg, 0)) ;
3647 3565        masm.testptr(tmpReg, tmpReg) ;
3648 3566        masm.jcc   (Assembler::zero, DONE_LABEL) ;
3649 3567        if (os::is_MP()) { masm.lock(); }
3650 3568        masm.cmpxchgptr(tmpReg, Address(objReg, 0));          // Uses EAX which is box
3651 3569        masm.bind(DONE_LABEL);
3652 3570      } else {
3653 3571        Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3654 3572  
3655 3573        // Critically, the biased locking test must have precedence over
3656 3574        // and appear before the (box->dhw == 0) recursive stack-lock test.
3657 3575        if (UseBiasedLocking && !UseOptoBiasInlining) {
3658 3576           masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3659 3577        }
3660 3578        
3661 3579        masm.cmpptr(Address(boxReg, 0), 0) ;            // Examine the displaced header
3662 3580        masm.movptr(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
3663 3581        masm.jccb  (Assembler::zero, DONE_LABEL) ;      // 0 indicates recursive stack-lock
3664 3582  
3665 3583        masm.testptr(tmpReg, 0x02) ;                     // Inflated? 
3666 3584        masm.jccb  (Assembler::zero, Stacked) ;
3667 3585  
3668 3586        masm.bind  (Inflated) ;
3669 3587        // It's inflated.
3670 3588        // Despite our balanced locking property we still check that m->_owner == Self
3671 3589        // as java routines or native JNI code called by this thread might
3672 3590        // have released the lock.
3673 3591        // Refer to the comments in synchronizer.cpp for how we might encode extra
3674 3592        // state in _succ so we can avoid fetching EntryList|cxq.
3675 3593        //
3676 3594        // I'd like to add more cases in fast_lock() and fast_unlock() --
3677 3595        // such as recursive enter and exit -- but we have to be wary of
3678 3596        // I$ bloat, T$ effects and BP$ effects.
3679 3597        //
3680 3598        // If there's no contention try a 1-0 exit.  That is, exit without
3681 3599        // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
3682 3600        // we detect and recover from the race that the 1-0 exit admits.
3683 3601        //
3684 3602        // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3685 3603        // before it STs null into _owner, releasing the lock.  Updates
3686 3604        // to data protected by the critical section must be visible before
3687 3605        // we drop the lock (and thus before any other thread could acquire
3688 3606        // the lock and observe the fields protected by the lock).
3689 3607        // IA32's memory-model is SPO, so STs are ordered with respect to
3690 3608        // each other and there's no need for an explicit barrier (fence).
3691 3609        // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3692 3610  
3693 3611        masm.get_thread (boxReg) ;
3694 3612        if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) {
3695 3613          // prefetchw [ebx + Offset(_owner)-2]
3696 3614          masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3697 3615        }
3698 3616  
3699 3617        // Note that we could employ various encoding schemes to reduce
3700 3618        // the number of loads below (currently 4) to just 2 or 3.
3701 3619        // Refer to the comments in synchronizer.cpp.
3702 3620        // In practice the chain of fetches doesn't seem to impact performance, however.
3703 3621        if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3704 3622           // Attempt to reduce branch density - AMD's branch predictor.
3705 3623           masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3706 3624           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3707 3625           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3708 3626           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3709 3627           masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3710 3628           masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3711 3629           masm.jmpb  (DONE_LABEL) ; 
3712 3630        } else { 
3713 3631           masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3714 3632           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3715 3633           masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3716 3634           masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3717 3635           masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3718 3636           masm.jccb  (Assembler::notZero, CheckSucc) ; 
3719 3637           masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3720 3638           masm.jmpb  (DONE_LABEL) ; 
3721 3639        }
3722 3640  
3723 3641        // The Following code fragment (EmitSync & 65536) improves the performance of
3724 3642        // contended applications and contended synchronization microbenchmarks.
3725 3643        // Unfortunately the emission of the code - even though not executed - causes regressions
3726 3644        // in scimark and jetstream, evidently because of $ effects.  Replacing the code
3727 3645        // with an equal number of never-executed NOPs results in the same regression.
3728 3646        // We leave it off by default.
3729 3647  
3730 3648        if ((EmitSync & 65536) != 0) {
3731 3649           Label LSuccess, LGoSlowPath ;
3732 3650  
3733 3651           masm.bind  (CheckSucc) ;
3734 3652  
3735 3653           // Optional pre-test ... it's safe to elide this
3736 3654           if ((EmitSync & 16) == 0) { 
3737 3655              masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3738 3656              masm.jccb  (Assembler::zero, LGoSlowPath) ; 
3739 3657           }
3740 3658  
3741 3659           // We have a classic Dekker-style idiom:
3742 3660           //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
3743 3661           // There are a number of ways to implement the barrier:
3744 3662           // (1) lock:andl &m->_owner, 0
3745 3663           //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3746 3664           //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3747 3665           //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3748 3666           // (2) If supported, an explicit MFENCE is appealing.
3749 3667           //     In older IA32 processors MFENCE is slower than lock:add or xchg
3750 3668           //     particularly if the write-buffer is full as might be the case if
3751 3669           //     if stores closely precede the fence or fence-equivalent instruction.
3752 3670           //     In more modern implementations MFENCE appears faster, however.
3753 3671           // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3754 3672           //     The $lines underlying the top-of-stack should be in M-state.
3755 3673           //     The locked add instruction is serializing, of course.
3756 3674           // (4) Use xchg, which is serializing
3757 3675           //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3758 3676           // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3759 3677           //     The integer condition codes will tell us if succ was 0.
3760 3678           //     Since _succ and _owner should reside in the same $line and
3761 3679           //     we just stored into _owner, it's likely that the $line
3762 3680           //     remains in M-state for the lock:orl.
3763 3681           //
3764 3682           // We currently use (3), although it's likely that switching to (2)
3765 3683           // is correct for the future.
3766 3684              
3767 3685           masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3768 3686           if (os::is_MP()) { 
3769 3687              if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 
3770 3688                masm.mfence();
3771 3689              } else { 
3772 3690                masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; 
3773 3691              }
3774 3692           }
3775 3693           // Ratify _succ remains non-null
3776 3694           masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3777 3695           masm.jccb  (Assembler::notZero, LSuccess) ; 
3778 3696  
3779 3697           masm.xorptr(boxReg, boxReg) ;                  // box is really EAX
3780 3698           if (os::is_MP()) { masm.lock(); }
3781 3699           masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3782 3700           masm.jccb  (Assembler::notEqual, LSuccess) ;
3783 3701           // Since we're low on registers we installed rsp as a placeholding in _owner.
3784 3702           // Now install Self over rsp.  This is safe as we're transitioning from
3785 3703           // non-null to non=null
3786 3704           masm.get_thread (boxReg) ;
3787 3705           masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3788 3706           // Intentional fall-through into LGoSlowPath ...
3789 3707  
3790 3708           masm.bind  (LGoSlowPath) ; 
3791 3709           masm.orptr(boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3792 3710           masm.jmpb  (DONE_LABEL) ; 
3793 3711  
3794 3712           masm.bind  (LSuccess) ; 
3795 3713           masm.xorptr(boxReg, boxReg) ;                 // set ICC.ZF=1 to indicate success
3796 3714           masm.jmpb  (DONE_LABEL) ; 
3797 3715        }
3798 3716  
3799 3717        masm.bind (Stacked) ;
3800 3718        // It's not inflated and it's not recursively stack-locked and it's not biased.
3801 3719        // It must be stack-locked.
3802 3720        // Try to reset the header to displaced header.
3803 3721        // The "box" value on the stack is stable, so we can reload
3804 3722        // and be assured we observe the same value as above.
3805 3723        masm.movptr(tmpReg, Address(boxReg, 0)) ;
3806 3724        if (os::is_MP()) {   masm.lock();    }
3807 3725        masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3808 3726        // Intention fall-thru into DONE_LABEL
3809 3727  
3810 3728  
3811 3729        // DONE_LABEL is a hot target - we'd really like to place it at the
3812 3730        // start of cache line by padding with NOPs.
3813 3731        // See the AMD and Intel software optimization manuals for the
3814 3732        // most efficient "long" NOP encodings.
3815 3733        // Unfortunately none of our alignment mechanisms suffice.
3816 3734        if ((EmitSync & 65536) == 0) {
3817 3735           masm.bind (CheckSucc) ;
3818 3736        }
3819 3737        masm.bind(DONE_LABEL);
3820 3738  
3821 3739        // Avoid branch to branch on AMD processors
3822 3740        if (EmitSync & 32768) { masm.nop() ; }
3823 3741      }
3824 3742    %}
3825 3743  
3826 3744  
3827 3745    enc_class enc_pop_rdx() %{
3828 3746      emit_opcode(cbuf,0x5A);
3829 3747    %}
3830 3748  
3831 3749    enc_class enc_rethrow() %{
3832 3750      cbuf.set_insts_mark();
3833 3751      emit_opcode(cbuf, 0xE9);        // jmp    entry
3834 3752      emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
3835 3753                     runtime_call_Relocation::spec(), RELOC_IMM32 );
3836 3754    %}
3837 3755  
3838 3756  
3839 3757    // Convert a double to an int.  Java semantics require we do complex
3840 3758    // manglelations in the corner cases.  So we set the rounding mode to
3841 3759    // 'zero', store the darned double down as an int, and reset the
3842 3760    // rounding mode to 'nearest'.  The hardware throws an exception which
3843 3761    // patches up the correct value directly to the stack.
3844 3762    enc_class D2I_encoding( regD src ) %{
3845 3763      // Flip to round-to-zero mode.  We attempted to allow invalid-op
3846 3764      // exceptions here, so that a NAN or other corner-case value will
3847 3765      // thrown an exception (but normal values get converted at full speed).
3848 3766      // However, I2C adapters and other float-stack manglers leave pending
3849 3767      // invalid-op exceptions hanging.  We would have to clear them before
3850 3768      // enabling them and that is more expensive than just testing for the
3851 3769      // invalid value Intel stores down in the corner cases.
3852 3770      emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3853 3771      emit_opcode(cbuf,0x2D);
3854 3772      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3855 3773      // Allocate a word
3856 3774      emit_opcode(cbuf,0x83);            // SUB ESP,4
3857 3775      emit_opcode(cbuf,0xEC);
3858 3776      emit_d8(cbuf,0x04);
3859 3777      // Encoding assumes a double has been pushed into FPR0.
3860 3778      // Store down the double as an int, popping the FPU stack
3861 3779      emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3862 3780      emit_opcode(cbuf,0x1C);
3863 3781      emit_d8(cbuf,0x24);
3864 3782      // Restore the rounding mode; mask the exception
3865 3783      emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3866 3784      emit_opcode(cbuf,0x2D);
3867 3785      emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3868 3786          ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3869 3787          : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3870 3788  
3871 3789      // Load the converted int; adjust CPU stack
3872 3790      emit_opcode(cbuf,0x58);       // POP EAX
3873 3791      emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3874 3792      emit_d32   (cbuf,0x80000000); //         0x80000000
3875 3793      emit_opcode(cbuf,0x75);       // JNE around_slow_call
3876 3794      emit_d8    (cbuf,0x07);       // Size of slow_call
3877 3795      // Push src onto stack slow-path
3878 3796      emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3879 3797      emit_d8    (cbuf,0xC0-1+$src$$reg );
3880 3798      // CALL directly to the runtime
3881 3799      cbuf.set_insts_mark();
3882 3800      emit_opcode(cbuf,0xE8);       // Call into runtime
3883 3801      emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3884 3802      // Carry on here...
3885 3803    %}
3886 3804  
3887 3805    enc_class D2L_encoding( regD src ) %{
3888 3806      emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3889 3807      emit_opcode(cbuf,0x2D);
3890 3808      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3891 3809      // Allocate a word
3892 3810      emit_opcode(cbuf,0x83);            // SUB ESP,8
3893 3811      emit_opcode(cbuf,0xEC);
3894 3812      emit_d8(cbuf,0x08);
3895 3813      // Encoding assumes a double has been pushed into FPR0.
3896 3814      // Store down the double as a long, popping the FPU stack
3897 3815      emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3898 3816      emit_opcode(cbuf,0x3C);
3899 3817      emit_d8(cbuf,0x24);
3900 3818      // Restore the rounding mode; mask the exception
3901 3819      emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3902 3820      emit_opcode(cbuf,0x2D);
3903 3821      emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3904 3822          ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3905 3823          : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3906 3824  
3907 3825      // Load the converted int; adjust CPU stack
3908 3826      emit_opcode(cbuf,0x58);       // POP EAX
3909 3827      emit_opcode(cbuf,0x5A);       // POP EDX
3910 3828      emit_opcode(cbuf,0x81);       // CMP EDX,imm
3911 3829      emit_d8    (cbuf,0xFA);       // rdx
3912 3830      emit_d32   (cbuf,0x80000000); //         0x80000000
3913 3831      emit_opcode(cbuf,0x75);       // JNE around_slow_call
3914 3832      emit_d8    (cbuf,0x07+4);     // Size of slow_call
3915 3833      emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3916 3834      emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3917 3835      emit_opcode(cbuf,0x75);       // JNE around_slow_call
3918 3836      emit_d8    (cbuf,0x07);       // Size of slow_call
3919 3837      // Push src onto stack slow-path
3920 3838      emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3921 3839      emit_d8    (cbuf,0xC0-1+$src$$reg );
3922 3840      // CALL directly to the runtime
3923 3841      cbuf.set_insts_mark();
3924 3842      emit_opcode(cbuf,0xE8);       // Call into runtime
3925 3843      emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3926 3844      // Carry on here...
3927 3845    %}
3928 3846  
3929 3847    enc_class X2L_encoding( regX src ) %{
3930 3848      // Allocate a word
3931 3849      emit_opcode(cbuf,0x83);      // SUB ESP,8
3932 3850      emit_opcode(cbuf,0xEC);
3933 3851      emit_d8(cbuf,0x08);
3934 3852  
3935 3853      emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3936 3854      emit_opcode  (cbuf, 0x0F );
3937 3855      emit_opcode  (cbuf, 0x11 );
3938 3856      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3939 3857  
3940 3858      emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3941 3859      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3942 3860  
3943 3861      emit_opcode(cbuf,0xD9);      // FLDCW  trunc
3944 3862      emit_opcode(cbuf,0x2D);
3945 3863      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3946 3864  
3947 3865      // Encoding assumes a double has been pushed into FPR0.
3948 3866      // Store down the double as a long, popping the FPU stack
3949 3867      emit_opcode(cbuf,0xDF);      // FISTP [ESP]
3950 3868      emit_opcode(cbuf,0x3C);
3951 3869      emit_d8(cbuf,0x24);
3952 3870  
3953 3871      // Restore the rounding mode; mask the exception
3954 3872      emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
3955 3873      emit_opcode(cbuf,0x2D);
3956 3874      emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3957 3875        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3958 3876        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3959 3877  
3960 3878      // Load the converted int; adjust CPU stack
3961 3879      emit_opcode(cbuf,0x58);      // POP EAX
3962 3880  
3963 3881      emit_opcode(cbuf,0x5A);      // POP EDX
3964 3882  
3965 3883      emit_opcode(cbuf,0x81);      // CMP EDX,imm
3966 3884      emit_d8    (cbuf,0xFA);      // rdx
3967 3885      emit_d32   (cbuf,0x80000000);//         0x80000000
3968 3886  
3969 3887      emit_opcode(cbuf,0x75);      // JNE around_slow_call
3970 3888      emit_d8    (cbuf,0x13+4);    // Size of slow_call
3971 3889  
3972 3890      emit_opcode(cbuf,0x85);      // TEST EAX,EAX
3973 3891      emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
3974 3892  
3975 3893      emit_opcode(cbuf,0x75);      // JNE around_slow_call
3976 3894      emit_d8    (cbuf,0x13);      // Size of slow_call
3977 3895  
3978 3896      // Allocate a word
3979 3897      emit_opcode(cbuf,0x83);      // SUB ESP,4
3980 3898      emit_opcode(cbuf,0xEC);
3981 3899      emit_d8(cbuf,0x04);
3982 3900  
3983 3901      emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3984 3902      emit_opcode  (cbuf, 0x0F );
3985 3903      emit_opcode  (cbuf, 0x11 );
3986 3904      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3987 3905  
3988 3906      emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3989 3907      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3990 3908  
3991 3909      emit_opcode(cbuf,0x83);      // ADD ESP,4
3992 3910      emit_opcode(cbuf,0xC4);
3993 3911      emit_d8(cbuf,0x04);
3994 3912  
3995 3913      // CALL directly to the runtime
3996 3914      cbuf.set_insts_mark();
3997 3915      emit_opcode(cbuf,0xE8);       // Call into runtime
3998 3916      emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3999 3917      // Carry on here...
4000 3918    %}
4001 3919  
4002 3920    enc_class XD2L_encoding( regXD src ) %{
4003 3921      // Allocate a word
4004 3922      emit_opcode(cbuf,0x83);      // SUB ESP,8
4005 3923      emit_opcode(cbuf,0xEC);
4006 3924      emit_d8(cbuf,0x08);
4007 3925  
4008 3926      emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
4009 3927      emit_opcode  (cbuf, 0x0F );
4010 3928      emit_opcode  (cbuf, 0x11 );
4011 3929      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4012 3930  
4013 3931      emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
4014 3932      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4015 3933  
4016 3934      emit_opcode(cbuf,0xD9);      // FLDCW  trunc
4017 3935      emit_opcode(cbuf,0x2D);
4018 3936      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
4019 3937  
4020 3938      // Encoding assumes a double has been pushed into FPR0.
4021 3939      // Store down the double as a long, popping the FPU stack
4022 3940      emit_opcode(cbuf,0xDF);      // FISTP [ESP]
4023 3941      emit_opcode(cbuf,0x3C);
4024 3942      emit_d8(cbuf,0x24);
4025 3943  
4026 3944      // Restore the rounding mode; mask the exception
4027 3945      emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
4028 3946      emit_opcode(cbuf,0x2D);
4029 3947      emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
4030 3948        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
4031 3949        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
4032 3950  
4033 3951      // Load the converted int; adjust CPU stack
4034 3952      emit_opcode(cbuf,0x58);      // POP EAX
4035 3953  
4036 3954      emit_opcode(cbuf,0x5A);      // POP EDX
4037 3955  
4038 3956      emit_opcode(cbuf,0x81);      // CMP EDX,imm
4039 3957      emit_d8    (cbuf,0xFA);      // rdx
4040 3958      emit_d32   (cbuf,0x80000000); //         0x80000000
4041 3959  
4042 3960      emit_opcode(cbuf,0x75);      // JNE around_slow_call
4043 3961      emit_d8    (cbuf,0x13+4);    // Size of slow_call
4044 3962  
4045 3963      emit_opcode(cbuf,0x85);      // TEST EAX,EAX
4046 3964      emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
4047 3965  
4048 3966      emit_opcode(cbuf,0x75);      // JNE around_slow_call
4049 3967      emit_d8    (cbuf,0x13);      // Size of slow_call
4050 3968  
4051 3969      // Push src onto stack slow-path
4052 3970      // Allocate a word
4053 3971      emit_opcode(cbuf,0x83);      // SUB ESP,8
4054 3972      emit_opcode(cbuf,0xEC);
4055 3973      emit_d8(cbuf,0x08);
4056 3974  
4057 3975      emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
4058 3976      emit_opcode  (cbuf, 0x0F );
4059 3977      emit_opcode  (cbuf, 0x11 );
4060 3978      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4061 3979  
4062 3980      emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
4063 3981      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4064 3982  
4065 3983      emit_opcode(cbuf,0x83);      // ADD ESP,8
4066 3984      emit_opcode(cbuf,0xC4);
4067 3985      emit_d8(cbuf,0x08);
4068 3986  
4069 3987      // CALL directly to the runtime
4070 3988      cbuf.set_insts_mark();
4071 3989      emit_opcode(cbuf,0xE8);      // Call into runtime
4072 3990      emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4073 3991      // Carry on here...
4074 3992    %}
4075 3993  
4076 3994    enc_class D2X_encoding( regX dst, regD src ) %{
4077 3995      // Allocate a word
4078 3996      emit_opcode(cbuf,0x83);            // SUB ESP,4
4079 3997      emit_opcode(cbuf,0xEC);
4080 3998      emit_d8(cbuf,0x04);
4081 3999      int pop = 0x02;
4082 4000      if ($src$$reg != FPR1L_enc) {
4083 4001        emit_opcode( cbuf, 0xD9 );       // FLD    ST(i-1)
4084 4002        emit_d8( cbuf, 0xC0-1+$src$$reg );
4085 4003        pop = 0x03;
4086 4004      }
4087 4005      store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S  [ESP]
4088 4006  
4089 4007      emit_opcode  (cbuf, 0xF3 );        // MOVSS dst(xmm), [ESP]
4090 4008      emit_opcode  (cbuf, 0x0F );
4091 4009      emit_opcode  (cbuf, 0x10 );
4092 4010      encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
4093 4011  
4094 4012      emit_opcode(cbuf,0x83);            // ADD ESP,4
4095 4013      emit_opcode(cbuf,0xC4);
4096 4014      emit_d8(cbuf,0x04);
4097 4015      // Carry on here...
4098 4016    %}
4099 4017  
4100 4018    enc_class FX2I_encoding( regX src, eRegI dst ) %{
4101 4019      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
4102 4020  
4103 4021      // Compare the result to see if we need to go to the slow path
4104 4022      emit_opcode(cbuf,0x81);       // CMP dst,imm
4105 4023      emit_rm    (cbuf,0x3,0x7,$dst$$reg);
4106 4024      emit_d32   (cbuf,0x80000000); //         0x80000000
4107 4025  
4108 4026      emit_opcode(cbuf,0x75);       // JNE around_slow_call
4109 4027      emit_d8    (cbuf,0x13);       // Size of slow_call
4110 4028      // Store xmm to a temp memory
4111 4029      // location and push it onto stack.
4112 4030  
4113 4031      emit_opcode(cbuf,0x83);  // SUB ESP,4
4114 4032      emit_opcode(cbuf,0xEC);
4115 4033      emit_d8(cbuf, $primary ? 0x8 : 0x4);
4116 4034  
4117 4035      emit_opcode  (cbuf, $primary ? 0xF2 : 0xF3 );   // MOVSS [ESP], xmm
4118 4036      emit_opcode  (cbuf, 0x0F );
4119 4037      emit_opcode  (cbuf, 0x11 );
4120 4038      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4121 4039  
4122 4040      emit_opcode(cbuf, $primary ? 0xDD : 0xD9 );      // FLD [ESP]
4123 4041      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4124 4042  
4125 4043      emit_opcode(cbuf,0x83);    // ADD ESP,4
4126 4044      emit_opcode(cbuf,0xC4);
4127 4045      emit_d8(cbuf, $primary ? 0x8 : 0x4);
4128 4046  
4129 4047      // CALL directly to the runtime
4130 4048      cbuf.set_insts_mark();
4131 4049      emit_opcode(cbuf,0xE8);       // Call into runtime
4132 4050      emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4133 4051  
4134 4052      // Carry on here...
4135 4053    %}
4136 4054  
4137 4055    enc_class X2D_encoding( regD dst, regX src ) %{
4138 4056      // Allocate a word
4139 4057      emit_opcode(cbuf,0x83);     // SUB ESP,4
4140 4058      emit_opcode(cbuf,0xEC);
4141 4059      emit_d8(cbuf,0x04);
4142 4060  
4143 4061      emit_opcode  (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4144 4062      emit_opcode  (cbuf, 0x0F );
4145 4063      emit_opcode  (cbuf, 0x11 );
4146 4064      encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4147 4065  
4148 4066      emit_opcode(cbuf,0xD9 );    // FLD_S [ESP]
4149 4067      encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4150 4068  
4151 4069      emit_opcode(cbuf,0x83);     // ADD ESP,4
4152 4070      emit_opcode(cbuf,0xC4);
4153 4071      emit_d8(cbuf,0x04);
4154 4072  
4155 4073      // Carry on here...
4156 4074    %}
4157 4075  
4158 4076    enc_class AbsXF_encoding(regX dst) %{
4159 4077      address signmask_address=(address)float_signmask_pool;
4160 4078      // andpd:\tANDPS  $dst,[signconst]
4161 4079      emit_opcode(cbuf, 0x0F);
4162 4080      emit_opcode(cbuf, 0x54);
4163 4081      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4164 4082      emit_d32(cbuf, (int)signmask_address);
4165 4083    %}
4166 4084  
4167 4085    enc_class AbsXD_encoding(regXD dst) %{
4168 4086      address signmask_address=(address)double_signmask_pool;
4169 4087      // andpd:\tANDPD  $dst,[signconst]
4170 4088      emit_opcode(cbuf, 0x66);
4171 4089      emit_opcode(cbuf, 0x0F);
4172 4090      emit_opcode(cbuf, 0x54);
4173 4091      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4174 4092      emit_d32(cbuf, (int)signmask_address);
4175 4093    %}
4176 4094  
4177 4095    enc_class NegXF_encoding(regX dst) %{
4178 4096      address signmask_address=(address)float_signflip_pool;
4179 4097      // andpd:\tXORPS  $dst,[signconst]
4180 4098      emit_opcode(cbuf, 0x0F);
4181 4099      emit_opcode(cbuf, 0x57);
4182 4100      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4183 4101      emit_d32(cbuf, (int)signmask_address);
4184 4102    %}
4185 4103  
4186 4104    enc_class NegXD_encoding(regXD dst) %{
4187 4105      address signmask_address=(address)double_signflip_pool;
4188 4106      // andpd:\tXORPD  $dst,[signconst]
4189 4107      emit_opcode(cbuf, 0x66);
4190 4108      emit_opcode(cbuf, 0x0F);
4191 4109      emit_opcode(cbuf, 0x57);
4192 4110      emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4193 4111      emit_d32(cbuf, (int)signmask_address);
4194 4112    %}
4195 4113  
4196 4114    enc_class FMul_ST_reg( eRegF src1 ) %{
4197 4115      // Operand was loaded from memory into fp ST (stack top)
4198 4116      // FMUL   ST,$src  /* D8 C8+i */
4199 4117      emit_opcode(cbuf, 0xD8);
4200 4118      emit_opcode(cbuf, 0xC8 + $src1$$reg);
4201 4119    %}
4202 4120  
4203 4121    enc_class FAdd_ST_reg( eRegF src2 ) %{
4204 4122      // FADDP  ST,src2  /* D8 C0+i */
4205 4123      emit_opcode(cbuf, 0xD8);
4206 4124      emit_opcode(cbuf, 0xC0 + $src2$$reg);
4207 4125      //could use FADDP  src2,fpST  /* DE C0+i */
4208 4126    %}
4209 4127  
4210 4128    enc_class FAddP_reg_ST( eRegF src2 ) %{
4211 4129      // FADDP  src2,ST  /* DE C0+i */
4212 4130      emit_opcode(cbuf, 0xDE);
4213 4131      emit_opcode(cbuf, 0xC0 + $src2$$reg);
4214 4132    %}
4215 4133  
4216 4134    enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
4217 4135      // Operand has been loaded into fp ST (stack top)
4218 4136        // FSUB   ST,$src1
4219 4137        emit_opcode(cbuf, 0xD8);
4220 4138        emit_opcode(cbuf, 0xE0 + $src1$$reg);
4221 4139  
4222 4140        // FDIV
4223 4141        emit_opcode(cbuf, 0xD8);
4224 4142        emit_opcode(cbuf, 0xF0 + $src2$$reg);
4225 4143    %}
4226 4144  
4227 4145    enc_class MulFAddF (eRegF src1, eRegF src2) %{
4228 4146      // Operand was loaded from memory into fp ST (stack top)
4229 4147      // FADD   ST,$src  /* D8 C0+i */
4230 4148      emit_opcode(cbuf, 0xD8);
4231 4149      emit_opcode(cbuf, 0xC0 + $src1$$reg);
4232 4150  
4233 4151      // FMUL  ST,src2  /* D8 C*+i */
4234 4152      emit_opcode(cbuf, 0xD8);
4235 4153      emit_opcode(cbuf, 0xC8 + $src2$$reg);
4236 4154    %}
4237 4155  
4238 4156  
4239 4157    enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
4240 4158      // Operand was loaded from memory into fp ST (stack top)
4241 4159      // FADD   ST,$src  /* D8 C0+i */
4242 4160      emit_opcode(cbuf, 0xD8);
4243 4161      emit_opcode(cbuf, 0xC0 + $src1$$reg);
4244 4162  
4245 4163      // FMULP  src2,ST  /* DE C8+i */
4246 4164      emit_opcode(cbuf, 0xDE);
4247 4165      emit_opcode(cbuf, 0xC8 + $src2$$reg);
4248 4166    %}
4249 4167  
4250 4168    // Atomically load the volatile long
4251 4169    enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
4252 4170      emit_opcode(cbuf,0xDF);
4253 4171      int rm_byte_opcode = 0x05;
4254 4172      int base     = $mem$$base;
4255 4173      int index    = $mem$$index;
4256 4174      int scale    = $mem$$scale;
4257 4175      int displace = $mem$$disp;
4258 4176      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4259 4177      encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4260 4178      store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4261 4179    %}
4262 4180  
4263 4181    enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4264 4182      { // Atomic long load
4265 4183        // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4266 4184        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4267 4185        emit_opcode(cbuf,0x0F);
4268 4186        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4269 4187        int base     = $mem$$base;
4270 4188        int index    = $mem$$index;
4271 4189        int scale    = $mem$$scale;
4272 4190        int displace = $mem$$disp;
4273 4191        bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4274 4192        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4275 4193      }
4276 4194      { // MOVSD $dst,$tmp ! atomic long store
4277 4195        emit_opcode(cbuf,0xF2);
4278 4196        emit_opcode(cbuf,0x0F);
4279 4197        emit_opcode(cbuf,0x11);
4280 4198        int base     = $dst$$base;
4281 4199        int index    = $dst$$index;
4282 4200        int scale    = $dst$$scale;
4283 4201        int displace = $dst$$disp;
4284 4202        bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4285 4203        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4286 4204      }
4287 4205    %}
4288 4206  
4289 4207    enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4290 4208      { // Atomic long load
4291 4209        // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4292 4210        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4293 4211        emit_opcode(cbuf,0x0F);
4294 4212        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4295 4213        int base     = $mem$$base;
4296 4214        int index    = $mem$$index;
4297 4215        int scale    = $mem$$scale;
4298 4216        int displace = $mem$$disp;
4299 4217        bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4300 4218        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4301 4219      }
4302 4220      { // MOVD $dst.lo,$tmp
4303 4221        emit_opcode(cbuf,0x66);
4304 4222        emit_opcode(cbuf,0x0F);
4305 4223        emit_opcode(cbuf,0x7E);
4306 4224        emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4307 4225      }
4308 4226      { // PSRLQ $tmp,32
4309 4227        emit_opcode(cbuf,0x66);
4310 4228        emit_opcode(cbuf,0x0F);
4311 4229        emit_opcode(cbuf,0x73);
4312 4230        emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4313 4231        emit_d8(cbuf, 0x20);
4314 4232      }
4315 4233      { // MOVD $dst.hi,$tmp
4316 4234        emit_opcode(cbuf,0x66);
4317 4235        emit_opcode(cbuf,0x0F);
4318 4236        emit_opcode(cbuf,0x7E);
4319 4237        emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4320 4238      }
4321 4239    %}
4322 4240  
4323 4241    // Volatile Store Long.  Must be atomic, so move it into
4324 4242    // the FP TOS and then do a 64-bit FIST.  Has to probe the
4325 4243    // target address before the store (for null-ptr checks)
4326 4244    // so the memory operand is used twice in the encoding.
4327 4245    enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
4328 4246      store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
4329 4247      cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
4330 4248      emit_opcode(cbuf,0xDF);
4331 4249      int rm_byte_opcode = 0x07;
4332 4250      int base     = $mem$$base;
4333 4251      int index    = $mem$$index;
4334 4252      int scale    = $mem$$scale;
4335 4253      int displace = $mem$$disp;
4336 4254      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4337 4255      encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4338 4256    %}
4339 4257  
4340 4258    enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4341 4259      { // Atomic long load
4342 4260        // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4343 4261        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4344 4262        emit_opcode(cbuf,0x0F);
4345 4263        emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4346 4264        int base     = $src$$base;
4347 4265        int index    = $src$$index;
4348 4266        int scale    = $src$$scale;
4349 4267        int displace = $src$$disp;
4350 4268        bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4351 4269        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4352 4270      }
4353 4271      cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
4354 4272      { // MOVSD $mem,$tmp ! atomic long store
4355 4273        emit_opcode(cbuf,0xF2);
4356 4274        emit_opcode(cbuf,0x0F);
4357 4275        emit_opcode(cbuf,0x11);
4358 4276        int base     = $mem$$base;
4359 4277        int index    = $mem$$index;
4360 4278        int scale    = $mem$$scale;
4361 4279        int displace = $mem$$disp;
4362 4280        bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4363 4281        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4364 4282      }
4365 4283    %}
4366 4284  
4367 4285    enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4368 4286      { // MOVD $tmp,$src.lo
4369 4287        emit_opcode(cbuf,0x66);
4370 4288        emit_opcode(cbuf,0x0F);
4371 4289        emit_opcode(cbuf,0x6E);
4372 4290        emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4373 4291      }
4374 4292      { // MOVD $tmp2,$src.hi
4375 4293        emit_opcode(cbuf,0x66);
4376 4294        emit_opcode(cbuf,0x0F);
4377 4295        emit_opcode(cbuf,0x6E);
4378 4296        emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4379 4297      }
4380 4298      { // PUNPCKLDQ $tmp,$tmp2
4381 4299        emit_opcode(cbuf,0x66);
4382 4300        emit_opcode(cbuf,0x0F);
4383 4301        emit_opcode(cbuf,0x62);
4384 4302        emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4385 4303      }
4386 4304      cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
4387 4305      { // MOVSD $mem,$tmp ! atomic long store
4388 4306        emit_opcode(cbuf,0xF2);
4389 4307        emit_opcode(cbuf,0x0F);
4390 4308        emit_opcode(cbuf,0x11);
4391 4309        int base     = $mem$$base;
4392 4310        int index    = $mem$$index;
4393 4311        int scale    = $mem$$scale;
4394 4312        int displace = $mem$$disp;
4395 4313        bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4396 4314        encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4397 4315      }
4398 4316    %}
4399 4317  
4400 4318    // Safepoint Poll.  This polls the safepoint page, and causes an
4401 4319    // exception if it is not readable. Unfortunately, it kills the condition code
4402 4320    // in the process
4403 4321    // We current use TESTL [spp],EDI
4404 4322    // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
4405 4323  
4406 4324    enc_class Safepoint_Poll() %{
4407 4325      cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
4408 4326      emit_opcode(cbuf,0x85);
4409 4327      emit_rm (cbuf, 0x0, 0x7, 0x5);
4410 4328      emit_d32(cbuf, (intptr_t)os::get_polling_page());
4411 4329    %}
4412 4330  %}
4413 4331  
4414 4332  
4415 4333  //----------FRAME--------------------------------------------------------------
4416 4334  // Definition of frame structure and management information.
4417 4335  //
4418 4336  //  S T A C K   L A Y O U T    Allocators stack-slot number
4419 4337  //                             |   (to get allocators register number
4420 4338  //  G  Owned by    |        |  v    add OptoReg::stack0())
4421 4339  //  r   CALLER     |        |
4422 4340  //  o     |        +--------+      pad to even-align allocators stack-slot
4423 4341  //  w     V        |  pad0  |        numbers; owned by CALLER
4424 4342  //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4425 4343  //  h     ^        |   in   |  5
4426 4344  //        |        |  args  |  4   Holes in incoming args owned by SELF
4427 4345  //  |     |        |        |  3
4428 4346  //  |     |        +--------+
4429 4347  //  V     |        | old out|      Empty on Intel, window on Sparc
4430 4348  //        |    old |preserve|      Must be even aligned.
4431 4349  //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4432 4350  //        |        |   in   |  3   area for Intel ret address
4433 4351  //     Owned by    |preserve|      Empty on Sparc.
4434 4352  //       SELF      +--------+
4435 4353  //        |        |  pad2  |  2   pad to align old SP
4436 4354  //        |        +--------+  1
4437 4355  //        |        | locks  |  0
4438 4356  //        |        +--------+----> OptoReg::stack0(), even aligned
4439 4357  //        |        |  pad1  | 11   pad to align new SP
4440 4358  //        |        +--------+
4441 4359  //        |        |        | 10
4442 4360  //        |        | spills |  9   spills
4443 4361  //        V        |        |  8   (pad0 slot for callee)
4444 4362  //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4445 4363  //        ^        |  out   |  7
4446 4364  //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4447 4365  //     Owned by    +--------+
4448 4366  //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4449 4367  //        |    new |preserve|      Must be even-aligned.
4450 4368  //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4451 4369  //        |        |        |
4452 4370  //
4453 4371  // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4454 4372  //         known from SELF's arguments and the Java calling convention.
4455 4373  //         Region 6-7 is determined per call site.
4456 4374  // Note 2: If the calling convention leaves holes in the incoming argument
4457 4375  //         area, those holes are owned by SELF.  Holes in the outgoing area
4458 4376  //         are owned by the CALLEE.  Holes should not be nessecary in the
4459 4377  //         incoming area, as the Java calling convention is completely under
4460 4378  //         the control of the AD file.  Doubles can be sorted and packed to
4461 4379  //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4462 4380  //         varargs C calling conventions.
4463 4381  // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4464 4382  //         even aligned with pad0 as needed.
4465 4383  //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4466 4384  //         region 6-11 is even aligned; it may be padded out more so that
4467 4385  //         the region from SP to FP meets the minimum stack alignment.
4468 4386  
4469 4387  frame %{
4470 4388    // What direction does stack grow in (assumed to be same for C & Java)
4471 4389    stack_direction(TOWARDS_LOW);
4472 4390  
4473 4391    // These three registers define part of the calling convention
4474 4392    // between compiled code and the interpreter.
4475 4393    inline_cache_reg(EAX);                // Inline Cache Register
4476 4394    interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
4477 4395  
4478 4396    // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
4479 4397    cisc_spilling_operand_name(indOffset32);
4480 4398  
4481 4399    // Number of stack slots consumed by locking an object
4482 4400    sync_stack_slots(1);
4483 4401  
4484 4402    // Compiled code's Frame Pointer
4485 4403    frame_pointer(ESP);
4486 4404    // Interpreter stores its frame pointer in a register which is
4487 4405    // stored to the stack by I2CAdaptors.
4488 4406    // I2CAdaptors convert from interpreted java to compiled java.
4489 4407    interpreter_frame_pointer(EBP);
4490 4408  
4491 4409    // Stack alignment requirement
4492 4410    // Alignment size in bytes (128-bit -> 16 bytes)
4493 4411    stack_alignment(StackAlignmentInBytes);
4494 4412  
4495 4413    // Number of stack slots between incoming argument block and the start of
4496 4414    // a new frame.  The PROLOG must add this many slots to the stack.  The
4497 4415    // EPILOG must remove this many slots.  Intel needs one slot for
4498 4416    // return address and one for rbp, (must save rbp)
4499 4417    in_preserve_stack_slots(2+VerifyStackAtCalls);
4500 4418  
4501 4419    // Number of outgoing stack slots killed above the out_preserve_stack_slots
4502 4420    // for calls to C.  Supports the var-args backing area for register parms.
4503 4421    varargs_C_out_slots_killed(0);
4504 4422  
4505 4423    // The after-PROLOG location of the return address.  Location of
4506 4424    // return address specifies a type (REG or STACK) and a number
4507 4425    // representing the register number (i.e. - use a register name) or
4508 4426    // stack slot.
4509 4427    // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4510 4428    // Otherwise, it is above the locks and verification slot and alignment word
4511 4429    return_addr(STACK - 1 +
4512 4430                round_to(1+VerifyStackAtCalls+
4513 4431                Compile::current()->fixed_slots(),
4514 4432                (StackAlignmentInBytes/wordSize)));
4515 4433  
4516 4434    // Body of function which returns an integer array locating
4517 4435    // arguments either in registers or in stack slots.  Passed an array
4518 4436    // of ideal registers called "sig" and a "length" count.  Stack-slot
4519 4437    // offsets are based on outgoing arguments, i.e. a CALLER setting up
4520 4438    // arguments for a CALLEE.  Incoming stack arguments are
4521 4439    // automatically biased by the preserve_stack_slots field above.
4522 4440    calling_convention %{
4523 4441      // No difference between ingoing/outgoing just pass false
4524 4442      SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4525 4443    %}
4526 4444  
4527 4445  
4528 4446    // Body of function which returns an integer array locating
4529 4447    // arguments either in registers or in stack slots.  Passed an array
4530 4448    // of ideal registers called "sig" and a "length" count.  Stack-slot
4531 4449    // offsets are based on outgoing arguments, i.e. a CALLER setting up
4532 4450    // arguments for a CALLEE.  Incoming stack arguments are
4533 4451    // automatically biased by the preserve_stack_slots field above.
4534 4452    c_calling_convention %{
4535 4453      // This is obviously always outgoing
4536 4454      (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4537 4455    %}
4538 4456  
4539 4457    // Location of C & interpreter return values
4540 4458    c_return_value %{
4541 4459      assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4542 4460      static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4543 4461      static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4544 4462  
4545 4463      // in SSE2+ mode we want to keep the FPU stack clean so pretend
4546 4464      // that C functions return float and double results in XMM0.
4547 4465      if( ideal_reg == Op_RegD && UseSSE>=2 )
4548 4466        return OptoRegPair(XMM0b_num,XMM0a_num);
4549 4467      if( ideal_reg == Op_RegF && UseSSE>=2 )
4550 4468        return OptoRegPair(OptoReg::Bad,XMM0a_num);
4551 4469  
4552 4470      return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4553 4471    %}
4554 4472  
4555 4473    // Location of return values
4556 4474    return_value %{
4557 4475      assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4558 4476      static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4559 4477      static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4560 4478      if( ideal_reg == Op_RegD && UseSSE>=2 )
4561 4479        return OptoRegPair(XMM0b_num,XMM0a_num);
4562 4480      if( ideal_reg == Op_RegF && UseSSE>=1 )
4563 4481        return OptoRegPair(OptoReg::Bad,XMM0a_num);
4564 4482      return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4565 4483    %}
4566 4484  
4567 4485  %}
4568 4486  
4569 4487  //----------ATTRIBUTES---------------------------------------------------------
4570 4488  //----------Operand Attributes-------------------------------------------------
4571 4489  op_attrib op_cost(0);        // Required cost attribute
4572 4490  
4573 4491  //----------Instruction Attributes---------------------------------------------
4574 4492  ins_attrib ins_cost(100);       // Required cost attribute
4575 4493  ins_attrib ins_size(8);         // Required size attribute (in bits)
4576 4494  ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4577 4495  ins_attrib ins_short_branch(0); // Required flag: is this instruction a
4578 4496                                  // non-matching short branch variant of some
4579 4497                                                              // long branch?
4580 4498  ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
4581 4499                                  // specifies the alignment that some part of the instruction (not
4582 4500                                  // necessarily the start) requires.  If > 1, a compute_padding()
4583 4501                                  // function must be provided for the instruction
4584 4502  
4585 4503  //----------OPERANDS-----------------------------------------------------------
4586 4504  // Operand definitions must precede instruction definitions for correct parsing
4587 4505  // in the ADLC because operands constitute user defined types which are used in
4588 4506  // instruction definitions.
4589 4507  
4590 4508  //----------Simple Operands----------------------------------------------------
4591 4509  // Immediate Operands
4592 4510  // Integer Immediate
4593 4511  operand immI() %{
4594 4512    match(ConI);
4595 4513  
4596 4514    op_cost(10);
4597 4515    format %{ %}
4598 4516    interface(CONST_INTER);
4599 4517  %}
4600 4518  
4601 4519  // Constant for test vs zero
4602 4520  operand immI0() %{
4603 4521    predicate(n->get_int() == 0);
4604 4522    match(ConI);
4605 4523  
4606 4524    op_cost(0);
4607 4525    format %{ %}
4608 4526    interface(CONST_INTER);
4609 4527  %}
4610 4528  
4611 4529  // Constant for increment
4612 4530  operand immI1() %{
4613 4531    predicate(n->get_int() == 1);
4614 4532    match(ConI);
4615 4533  
4616 4534    op_cost(0);
4617 4535    format %{ %}
4618 4536    interface(CONST_INTER);
4619 4537  %}
4620 4538  
4621 4539  // Constant for decrement
4622 4540  operand immI_M1() %{
4623 4541    predicate(n->get_int() == -1);
4624 4542    match(ConI);
4625 4543  
4626 4544    op_cost(0);
4627 4545    format %{ %}
4628 4546    interface(CONST_INTER);
4629 4547  %}
4630 4548  
4631 4549  // Valid scale values for addressing modes
4632 4550  operand immI2() %{
4633 4551    predicate(0 <= n->get_int() && (n->get_int() <= 3));
4634 4552    match(ConI);
4635 4553  
4636 4554    format %{ %}
4637 4555    interface(CONST_INTER);
4638 4556  %}
4639 4557  
4640 4558  operand immI8() %{
4641 4559    predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
4642 4560    match(ConI);
4643 4561  
4644 4562    op_cost(5);
4645 4563    format %{ %}
4646 4564    interface(CONST_INTER);
4647 4565  %}
4648 4566  
4649 4567  operand immI16() %{
4650 4568    predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4651 4569    match(ConI);
4652 4570  
4653 4571    op_cost(10);
4654 4572    format %{ %}
4655 4573    interface(CONST_INTER);
4656 4574  %}
4657 4575  
4658 4576  // Constant for long shifts
4659 4577  operand immI_32() %{
4660 4578    predicate( n->get_int() == 32 );
4661 4579    match(ConI);
4662 4580  
4663 4581    op_cost(0);
4664 4582    format %{ %}
4665 4583    interface(CONST_INTER);
4666 4584  %}
4667 4585  
4668 4586  operand immI_1_31() %{
4669 4587    predicate( n->get_int() >= 1 && n->get_int() <= 31 );
4670 4588    match(ConI);
4671 4589  
4672 4590    op_cost(0);
4673 4591    format %{ %}
4674 4592    interface(CONST_INTER);
4675 4593  %}
4676 4594  
4677 4595  operand immI_32_63() %{
4678 4596    predicate( n->get_int() >= 32 && n->get_int() <= 63 );
4679 4597    match(ConI);
4680 4598    op_cost(0);
4681 4599  
4682 4600    format %{ %}
4683 4601    interface(CONST_INTER);
4684 4602  %}
4685 4603  
4686 4604  operand immI_1() %{
4687 4605    predicate( n->get_int() == 1 );
4688 4606    match(ConI);
4689 4607  
4690 4608    op_cost(0);
4691 4609    format %{ %}
4692 4610    interface(CONST_INTER);
4693 4611  %}
4694 4612  
4695 4613  operand immI_2() %{
4696 4614    predicate( n->get_int() == 2 );
4697 4615    match(ConI);
4698 4616  
4699 4617    op_cost(0);
4700 4618    format %{ %}
4701 4619    interface(CONST_INTER);
4702 4620  %}
4703 4621  
4704 4622  operand immI_3() %{
4705 4623    predicate( n->get_int() == 3 );
4706 4624    match(ConI);
4707 4625  
4708 4626    op_cost(0);
4709 4627    format %{ %}
4710 4628    interface(CONST_INTER);
4711 4629  %}
4712 4630  
4713 4631  // Pointer Immediate
4714 4632  operand immP() %{
4715 4633    match(ConP);
4716 4634  
4717 4635    op_cost(10);
4718 4636    format %{ %}
4719 4637    interface(CONST_INTER);
4720 4638  %}
4721 4639  
4722 4640  // NULL Pointer Immediate
4723 4641  operand immP0() %{
4724 4642    predicate( n->get_ptr() == 0 );
4725 4643    match(ConP);
4726 4644    op_cost(0);
4727 4645  
4728 4646    format %{ %}
4729 4647    interface(CONST_INTER);
4730 4648  %}
4731 4649  
4732 4650  // Long Immediate
4733 4651  operand immL() %{
4734 4652    match(ConL);
4735 4653  
4736 4654    op_cost(20);
4737 4655    format %{ %}
4738 4656    interface(CONST_INTER);
4739 4657  %}
4740 4658  
4741 4659  // Long Immediate zero
4742 4660  operand immL0() %{
4743 4661    predicate( n->get_long() == 0L );
4744 4662    match(ConL);
4745 4663    op_cost(0);
4746 4664  
4747 4665    format %{ %}
4748 4666    interface(CONST_INTER);
4749 4667  %}
4750 4668  
4751 4669  // Long Immediate zero
4752 4670  operand immL_M1() %{
4753 4671    predicate( n->get_long() == -1L );
4754 4672    match(ConL);
4755 4673    op_cost(0);
4756 4674  
4757 4675    format %{ %}
4758 4676    interface(CONST_INTER);
4759 4677  %}
4760 4678  
4761 4679  // Long immediate from 0 to 127.
4762 4680  // Used for a shorter form of long mul by 10.
4763 4681  operand immL_127() %{
4764 4682    predicate((0 <= n->get_long()) && (n->get_long() <= 127));
4765 4683    match(ConL);
4766 4684    op_cost(0);
4767 4685  
4768 4686    format %{ %}
4769 4687    interface(CONST_INTER);
4770 4688  %}
4771 4689  
4772 4690  // Long Immediate: low 32-bit mask
4773 4691  operand immL_32bits() %{
4774 4692    predicate(n->get_long() == 0xFFFFFFFFL);
4775 4693    match(ConL);
4776 4694    op_cost(0);
4777 4695  
4778 4696    format %{ %}
4779 4697    interface(CONST_INTER);
4780 4698  %}
4781 4699  
4782 4700  // Long Immediate: low 32-bit mask
4783 4701  operand immL32() %{
4784 4702    predicate(n->get_long() == (int)(n->get_long()));
4785 4703    match(ConL);
4786 4704    op_cost(20);
4787 4705  
4788 4706    format %{ %}
4789 4707    interface(CONST_INTER);
4790 4708  %}
4791 4709  
4792 4710  //Double Immediate zero
4793 4711  operand immD0() %{

↓ open down ↓

2684 lines elided

↑ open up ↑

4794 4712    // Do additional (and counter-intuitive) test against NaN to work around VC++
4795 4713    // bug that generates code such that NaNs compare equal to 0.0
4796 4714    predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4797 4715    match(ConD);
4798 4716  
4799 4717    op_cost(5);
4800 4718    format %{ %}
4801 4719    interface(CONST_INTER);
4802 4720  %}
4803 4721  
4804      -// Double Immediate
     4722 +// Double Immediate one
4805 4723  operand immD1() %{
4806 4724    predicate( UseSSE<=1 && n->getd() == 1.0 );
4807 4725    match(ConD);
4808 4726  
4809 4727    op_cost(5);
4810 4728    format %{ %}
4811 4729    interface(CONST_INTER);
4812 4730  %}
4813 4731  
4814 4732  // Double Immediate

4815 4733  operand immD() %{
4816 4734    predicate(UseSSE<=1);
4817 4735    match(ConD);
4818 4736  
4819 4737    op_cost(5);
4820 4738    format %{ %}
4821 4739    interface(CONST_INTER);
4822 4740  %}
4823 4741  
4824 4742  operand immXD() %{
4825 4743    predicate(UseSSE>=2);
4826 4744    match(ConD);
4827 4745  
4828 4746    op_cost(5);
4829 4747    format %{ %}
4830 4748    interface(CONST_INTER);
4831 4749  %}
4832 4750  
4833 4751  // Double Immediate zero
4834 4752  operand immXD0() %{
4835 4753    // Do additional (and counter-intuitive) test against NaN to work around VC++
4836 4754    // bug that generates code such that NaNs compare equal to 0.0 AND do not

↓ open down ↓

22 lines elided

↑ open up ↑

4837 4755    // compare equal to -0.0.
4838 4756    predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4839 4757    match(ConD);
4840 4758  
4841 4759    format %{ %}
4842 4760    interface(CONST_INTER);
4843 4761  %}
4844 4762  
4845 4763  // Float Immediate zero
4846 4764  operand immF0() %{
4847      -  predicate( UseSSE == 0 && n->getf() == 0.0 );
     4765 +  predicate(UseSSE == 0 && n->getf() == 0.0F);
     4766 +  match(ConF);
     4767 +
     4768 +  op_cost(5);
     4769 +  format %{ %}
     4770 +  interface(CONST_INTER);
     4771 +%}
     4772 +
     4773 +// Float Immediate one
     4774 +operand immF1() %{
     4775 +  predicate(UseSSE == 0 && n->getf() == 1.0F);
4848 4776    match(ConF);
4849 4777  
4850 4778    op_cost(5);
4851 4779    format %{ %}
4852 4780    interface(CONST_INTER);
4853 4781  %}
4854 4782  
4855 4783  // Float Immediate
4856 4784  operand immF() %{
4857 4785    predicate( UseSSE == 0 );

4858 4786    match(ConF);
4859 4787  
4860 4788    op_cost(5);
4861 4789    format %{ %}
4862 4790    interface(CONST_INTER);
4863 4791  %}
4864 4792  
4865 4793  // Float Immediate
4866 4794  operand immXF() %{
4867 4795    predicate(UseSSE >= 1);
4868 4796    match(ConF);
4869 4797  
4870 4798    op_cost(5);
4871 4799    format %{ %}
4872 4800    interface(CONST_INTER);
4873 4801  %}
4874 4802  
4875 4803  // Float Immediate zero.  Zero and not -0.0
4876 4804  operand immXF0() %{
4877 4805    predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4878 4806    match(ConF);
4879 4807  
4880 4808    op_cost(5);
4881 4809    format %{ %}
4882 4810    interface(CONST_INTER);
4883 4811  %}
4884 4812  
4885 4813  // Immediates for special shifts (sign extend)
4886 4814  
4887 4815  // Constants for increment
4888 4816  operand immI_16() %{
4889 4817    predicate( n->get_int() == 16 );
4890 4818    match(ConI);
4891 4819  
4892 4820    format %{ %}
4893 4821    interface(CONST_INTER);
4894 4822  %}
4895 4823  
4896 4824  operand immI_24() %{
4897 4825    predicate( n->get_int() == 24 );
4898 4826    match(ConI);
4899 4827  
4900 4828    format %{ %}
4901 4829    interface(CONST_INTER);
4902 4830  %}
4903 4831  
4904 4832  // Constant for byte-wide masking
4905 4833  operand immI_255() %{
4906 4834    predicate( n->get_int() == 255 );
4907 4835    match(ConI);
4908 4836  
4909 4837    format %{ %}
4910 4838    interface(CONST_INTER);
4911 4839  %}
4912 4840  
4913 4841  // Constant for short-wide masking
4914 4842  operand immI_65535() %{
4915 4843    predicate(n->get_int() == 65535);
4916 4844    match(ConI);
4917 4845  
4918 4846    format %{ %}
4919 4847    interface(CONST_INTER);
4920 4848  %}
4921 4849  
4922 4850  // Register Operands
4923 4851  // Integer Register
4924 4852  operand eRegI() %{
4925 4853    constraint(ALLOC_IN_RC(e_reg));
4926 4854    match(RegI);
4927 4855    match(xRegI);
4928 4856    match(eAXRegI);
4929 4857    match(eBXRegI);
4930 4858    match(eCXRegI);
4931 4859    match(eDXRegI);
4932 4860    match(eDIRegI);
4933 4861    match(eSIRegI);
4934 4862  
4935 4863    format %{ %}
4936 4864    interface(REG_INTER);
4937 4865  %}
4938 4866  
4939 4867  // Subset of Integer Register
4940 4868  operand xRegI(eRegI reg) %{
4941 4869    constraint(ALLOC_IN_RC(x_reg));
4942 4870    match(reg);
4943 4871    match(eAXRegI);
4944 4872    match(eBXRegI);
4945 4873    match(eCXRegI);
4946 4874    match(eDXRegI);
4947 4875  
4948 4876    format %{ %}
4949 4877    interface(REG_INTER);
4950 4878  %}
4951 4879  
4952 4880  // Special Registers
4953 4881  operand eAXRegI(xRegI reg) %{
4954 4882    constraint(ALLOC_IN_RC(eax_reg));
4955 4883    match(reg);
4956 4884    match(eRegI);
4957 4885  
4958 4886    format %{ "EAX" %}
4959 4887    interface(REG_INTER);
4960 4888  %}
4961 4889  
4962 4890  // Special Registers
4963 4891  operand eBXRegI(xRegI reg) %{
4964 4892    constraint(ALLOC_IN_RC(ebx_reg));
4965 4893    match(reg);
4966 4894    match(eRegI);
4967 4895  
4968 4896    format %{ "EBX" %}
4969 4897    interface(REG_INTER);
4970 4898  %}
4971 4899  
4972 4900  operand eCXRegI(xRegI reg) %{
4973 4901    constraint(ALLOC_IN_RC(ecx_reg));
4974 4902    match(reg);
4975 4903    match(eRegI);
4976 4904  
4977 4905    format %{ "ECX" %}
4978 4906    interface(REG_INTER);
4979 4907  %}
4980 4908  
4981 4909  operand eDXRegI(xRegI reg) %{
4982 4910    constraint(ALLOC_IN_RC(edx_reg));
4983 4911    match(reg);
4984 4912    match(eRegI);
4985 4913  
4986 4914    format %{ "EDX" %}
4987 4915    interface(REG_INTER);
4988 4916  %}
4989 4917  
4990 4918  operand eDIRegI(xRegI reg) %{
4991 4919    constraint(ALLOC_IN_RC(edi_reg));
4992 4920    match(reg);
4993 4921    match(eRegI);
4994 4922  
4995 4923    format %{ "EDI" %}
4996 4924    interface(REG_INTER);
4997 4925  %}
4998 4926  
4999 4927  operand naxRegI() %{
5000 4928    constraint(ALLOC_IN_RC(nax_reg));
5001 4929    match(RegI);
5002 4930    match(eCXRegI);
5003 4931    match(eDXRegI);
5004 4932    match(eSIRegI);
5005 4933    match(eDIRegI);
5006 4934  
5007 4935    format %{ %}
5008 4936    interface(REG_INTER);
5009 4937  %}
5010 4938  
5011 4939  operand nadxRegI() %{
5012 4940    constraint(ALLOC_IN_RC(nadx_reg));
5013 4941    match(RegI);
5014 4942    match(eBXRegI);
5015 4943    match(eCXRegI);
5016 4944    match(eSIRegI);
5017 4945    match(eDIRegI);
5018 4946  
5019 4947    format %{ %}
5020 4948    interface(REG_INTER);
5021 4949  %}
5022 4950  
5023 4951  operand ncxRegI() %{
5024 4952    constraint(ALLOC_IN_RC(ncx_reg));
5025 4953    match(RegI);
5026 4954    match(eAXRegI);
5027 4955    match(eDXRegI);
5028 4956    match(eSIRegI);
5029 4957    match(eDIRegI);
5030 4958  
5031 4959    format %{ %}
5032 4960    interface(REG_INTER);
5033 4961  %}
5034 4962  
5035 4963  // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
5036 4964  // //
5037 4965  operand eSIRegI(xRegI reg) %{
5038 4966     constraint(ALLOC_IN_RC(esi_reg));
5039 4967     match(reg);
5040 4968     match(eRegI);
5041 4969  
5042 4970     format %{ "ESI" %}
5043 4971     interface(REG_INTER);
5044 4972  %}
5045 4973  
5046 4974  // Pointer Register
5047 4975  operand anyRegP() %{
5048 4976    constraint(ALLOC_IN_RC(any_reg));
5049 4977    match(RegP);
5050 4978    match(eAXRegP);
5051 4979    match(eBXRegP);
5052 4980    match(eCXRegP);
5053 4981    match(eDIRegP);
5054 4982    match(eRegP);
5055 4983  
5056 4984    format %{ %}
5057 4985    interface(REG_INTER);
5058 4986  %}
5059 4987  
5060 4988  operand eRegP() %{
5061 4989    constraint(ALLOC_IN_RC(e_reg));
5062 4990    match(RegP);
5063 4991    match(eAXRegP);
5064 4992    match(eBXRegP);
5065 4993    match(eCXRegP);
5066 4994    match(eDIRegP);
5067 4995  
5068 4996    format %{ %}
5069 4997    interface(REG_INTER);
5070 4998  %}
5071 4999  
5072 5000  // On windows95, EBP is not safe to use for implicit null tests.
5073 5001  operand eRegP_no_EBP() %{
5074 5002    constraint(ALLOC_IN_RC(e_reg_no_rbp));
5075 5003    match(RegP);
5076 5004    match(eAXRegP);
5077 5005    match(eBXRegP);
5078 5006    match(eCXRegP);
5079 5007    match(eDIRegP);
5080 5008  
5081 5009    op_cost(100);
5082 5010    format %{ %}
5083 5011    interface(REG_INTER);
5084 5012  %}
5085 5013  
5086 5014  operand naxRegP() %{
5087 5015    constraint(ALLOC_IN_RC(nax_reg));
5088 5016    match(RegP);
5089 5017    match(eBXRegP);
5090 5018    match(eDXRegP);
5091 5019    match(eCXRegP);
5092 5020    match(eSIRegP);
5093 5021    match(eDIRegP);
5094 5022  
5095 5023    format %{ %}
5096 5024    interface(REG_INTER);
5097 5025  %}
5098 5026  
5099 5027  operand nabxRegP() %{
5100 5028    constraint(ALLOC_IN_RC(nabx_reg));
5101 5029    match(RegP);
5102 5030    match(eCXRegP);
5103 5031    match(eDXRegP);
5104 5032    match(eSIRegP);
5105 5033    match(eDIRegP);
5106 5034  
5107 5035    format %{ %}
5108 5036    interface(REG_INTER);
5109 5037  %}
5110 5038  
5111 5039  operand pRegP() %{
5112 5040    constraint(ALLOC_IN_RC(p_reg));
5113 5041    match(RegP);
5114 5042    match(eBXRegP);
5115 5043    match(eDXRegP);
5116 5044    match(eSIRegP);
5117 5045    match(eDIRegP);
5118 5046  
5119 5047    format %{ %}
5120 5048    interface(REG_INTER);
5121 5049  %}
5122 5050  
5123 5051  // Special Registers
5124 5052  // Return a pointer value
5125 5053  operand eAXRegP(eRegP reg) %{
5126 5054    constraint(ALLOC_IN_RC(eax_reg));
5127 5055    match(reg);
5128 5056    format %{ "EAX" %}
5129 5057    interface(REG_INTER);
5130 5058  %}
5131 5059  
5132 5060  // Used in AtomicAdd
5133 5061  operand eBXRegP(eRegP reg) %{
5134 5062    constraint(ALLOC_IN_RC(ebx_reg));
5135 5063    match(reg);
5136 5064    format %{ "EBX" %}
5137 5065    interface(REG_INTER);
5138 5066  %}
5139 5067  
5140 5068  // Tail-call (interprocedural jump) to interpreter
5141 5069  operand eCXRegP(eRegP reg) %{
5142 5070    constraint(ALLOC_IN_RC(ecx_reg));
5143 5071    match(reg);
5144 5072    format %{ "ECX" %}
5145 5073    interface(REG_INTER);
5146 5074  %}
5147 5075  
5148 5076  operand eSIRegP(eRegP reg) %{
5149 5077    constraint(ALLOC_IN_RC(esi_reg));
5150 5078    match(reg);
5151 5079    format %{ "ESI" %}
5152 5080    interface(REG_INTER);
5153 5081  %}
5154 5082  
5155 5083  // Used in rep stosw
5156 5084  operand eDIRegP(eRegP reg) %{
5157 5085    constraint(ALLOC_IN_RC(edi_reg));
5158 5086    match(reg);
5159 5087    format %{ "EDI" %}
5160 5088    interface(REG_INTER);
5161 5089  %}
5162 5090  
5163 5091  operand eBPRegP() %{
5164 5092    constraint(ALLOC_IN_RC(ebp_reg));
5165 5093    match(RegP);
5166 5094    format %{ "EBP" %}
5167 5095    interface(REG_INTER);
5168 5096  %}
5169 5097  
5170 5098  operand eRegL() %{
5171 5099    constraint(ALLOC_IN_RC(long_reg));
5172 5100    match(RegL);
5173 5101    match(eADXRegL);
5174 5102  
5175 5103    format %{ %}
5176 5104    interface(REG_INTER);
5177 5105  %}
5178 5106  
5179 5107  operand eADXRegL( eRegL reg ) %{
5180 5108    constraint(ALLOC_IN_RC(eadx_reg));
5181 5109    match(reg);
5182 5110  
5183 5111    format %{ "EDX:EAX" %}
5184 5112    interface(REG_INTER);
5185 5113  %}
5186 5114  
5187 5115  operand eBCXRegL( eRegL reg ) %{
5188 5116    constraint(ALLOC_IN_RC(ebcx_reg));
5189 5117    match(reg);
5190 5118  
5191 5119    format %{ "EBX:ECX" %}
5192 5120    interface(REG_INTER);
5193 5121  %}
5194 5122  
5195 5123  // Special case for integer high multiply
5196 5124  operand eADXRegL_low_only() %{
5197 5125    constraint(ALLOC_IN_RC(eadx_reg));
5198 5126    match(RegL);
5199 5127  
5200 5128    format %{ "EAX" %}
5201 5129    interface(REG_INTER);
5202 5130  %}
5203 5131  
5204 5132  // Flags register, used as output of compare instructions
5205 5133  operand eFlagsReg() %{
5206 5134    constraint(ALLOC_IN_RC(int_flags));
5207 5135    match(RegFlags);
5208 5136  
5209 5137    format %{ "EFLAGS" %}
5210 5138    interface(REG_INTER);
5211 5139  %}
5212 5140  
5213 5141  // Flags register, used as output of FLOATING POINT compare instructions
5214 5142  operand eFlagsRegU() %{
5215 5143    constraint(ALLOC_IN_RC(int_flags));
5216 5144    match(RegFlags);
5217 5145  
5218 5146    format %{ "EFLAGS_U" %}
5219 5147    interface(REG_INTER);
5220 5148  %}
5221 5149  
5222 5150  operand eFlagsRegUCF() %{
5223 5151    constraint(ALLOC_IN_RC(int_flags));
5224 5152    match(RegFlags);
5225 5153    predicate(false);
5226 5154  
5227 5155    format %{ "EFLAGS_U_CF" %}
5228 5156    interface(REG_INTER);
5229 5157  %}
5230 5158  
5231 5159  // Condition Code Register used by long compare
5232 5160  operand flagsReg_long_LTGE() %{
5233 5161    constraint(ALLOC_IN_RC(int_flags));
5234 5162    match(RegFlags);
5235 5163    format %{ "FLAGS_LTGE" %}
5236 5164    interface(REG_INTER);
5237 5165  %}
5238 5166  operand flagsReg_long_EQNE() %{
5239 5167    constraint(ALLOC_IN_RC(int_flags));
5240 5168    match(RegFlags);
5241 5169    format %{ "FLAGS_EQNE" %}
5242 5170    interface(REG_INTER);
5243 5171  %}
5244 5172  operand flagsReg_long_LEGT() %{
5245 5173    constraint(ALLOC_IN_RC(int_flags));
5246 5174    match(RegFlags);
5247 5175    format %{ "FLAGS_LEGT" %}
5248 5176    interface(REG_INTER);
5249 5177  %}
5250 5178  
5251 5179  // Float register operands
5252 5180  operand regD() %{
5253 5181    predicate( UseSSE < 2 );
5254 5182    constraint(ALLOC_IN_RC(dbl_reg));
5255 5183    match(RegD);
5256 5184    match(regDPR1);
5257 5185    match(regDPR2);
5258 5186    format %{ %}
5259 5187    interface(REG_INTER);
5260 5188  %}
5261 5189  
5262 5190  operand regDPR1(regD reg) %{
5263 5191    predicate( UseSSE < 2 );
5264 5192    constraint(ALLOC_IN_RC(dbl_reg0));
5265 5193    match(reg);
5266 5194    format %{ "FPR1" %}
5267 5195    interface(REG_INTER);
5268 5196  %}
5269 5197  
5270 5198  operand regDPR2(regD reg) %{
5271 5199    predicate( UseSSE < 2 );
5272 5200    constraint(ALLOC_IN_RC(dbl_reg1));
5273 5201    match(reg);
5274 5202    format %{ "FPR2" %}
5275 5203    interface(REG_INTER);
5276 5204  %}
5277 5205  
5278 5206  operand regnotDPR1(regD reg) %{
5279 5207    predicate( UseSSE < 2 );
5280 5208    constraint(ALLOC_IN_RC(dbl_notreg0));
5281 5209    match(reg);
5282 5210    format %{ %}
5283 5211    interface(REG_INTER);
5284 5212  %}
5285 5213  
5286 5214  // XMM Double register operands
5287 5215  operand regXD() %{
5288 5216    predicate( UseSSE>=2 );
5289 5217    constraint(ALLOC_IN_RC(xdb_reg));
5290 5218    match(RegD);
5291 5219    match(regXD6);
5292 5220    match(regXD7);
5293 5221    format %{ %}
5294 5222    interface(REG_INTER);
5295 5223  %}
5296 5224  
5297 5225  // XMM6 double register operands
5298 5226  operand regXD6(regXD reg) %{
5299 5227    predicate( UseSSE>=2 );
5300 5228    constraint(ALLOC_IN_RC(xdb_reg6));
5301 5229    match(reg);
5302 5230    format %{ "XMM6" %}
5303 5231    interface(REG_INTER);
5304 5232  %}
5305 5233  
5306 5234  // XMM7 double register operands
5307 5235  operand regXD7(regXD reg) %{
5308 5236    predicate( UseSSE>=2 );
5309 5237    constraint(ALLOC_IN_RC(xdb_reg7));
5310 5238    match(reg);
5311 5239    format %{ "XMM7" %}
5312 5240    interface(REG_INTER);
5313 5241  %}
5314 5242  
5315 5243  // Float register operands
5316 5244  operand regF() %{
5317 5245    predicate( UseSSE < 2 );
5318 5246    constraint(ALLOC_IN_RC(flt_reg));
5319 5247    match(RegF);
5320 5248    match(regFPR1);
5321 5249    format %{ %}
5322 5250    interface(REG_INTER);
5323 5251  %}
5324 5252  
5325 5253  // Float register operands
5326 5254  operand regFPR1(regF reg) %{
5327 5255    predicate( UseSSE < 2 );
5328 5256    constraint(ALLOC_IN_RC(flt_reg0));
5329 5257    match(reg);
5330 5258    format %{ "FPR1" %}
5331 5259    interface(REG_INTER);
5332 5260  %}
5333 5261  
5334 5262  // XMM register operands
5335 5263  operand regX() %{
5336 5264    predicate( UseSSE>=1 );
5337 5265    constraint(ALLOC_IN_RC(xmm_reg));
5338 5266    match(RegF);
5339 5267    format %{ %}
5340 5268    interface(REG_INTER);
5341 5269  %}
5342 5270  
5343 5271  
5344 5272  //----------Memory Operands----------------------------------------------------
5345 5273  // Direct Memory Operand
5346 5274  operand direct(immP addr) %{
5347 5275    match(addr);
5348 5276  
5349 5277    format %{ "[$addr]" %}
5350 5278    interface(MEMORY_INTER) %{
5351 5279      base(0xFFFFFFFF);
5352 5280      index(0x4);
5353 5281      scale(0x0);
5354 5282      disp($addr);
5355 5283    %}
5356 5284  %}
5357 5285  
5358 5286  // Indirect Memory Operand
5359 5287  operand indirect(eRegP reg) %{
5360 5288    constraint(ALLOC_IN_RC(e_reg));
5361 5289    match(reg);
5362 5290  
5363 5291    format %{ "[$reg]" %}
5364 5292    interface(MEMORY_INTER) %{
5365 5293      base($reg);
5366 5294      index(0x4);
5367 5295      scale(0x0);
5368 5296      disp(0x0);
5369 5297    %}
5370 5298  %}
5371 5299  
5372 5300  // Indirect Memory Plus Short Offset Operand
5373 5301  operand indOffset8(eRegP reg, immI8 off) %{
5374 5302    match(AddP reg off);
5375 5303  
5376 5304    format %{ "[$reg + $off]" %}
5377 5305    interface(MEMORY_INTER) %{
5378 5306      base($reg);
5379 5307      index(0x4);
5380 5308      scale(0x0);
5381 5309      disp($off);
5382 5310    %}
5383 5311  %}
5384 5312  
5385 5313  // Indirect Memory Plus Long Offset Operand
5386 5314  operand indOffset32(eRegP reg, immI off) %{
5387 5315    match(AddP reg off);
5388 5316  
5389 5317    format %{ "[$reg + $off]" %}
5390 5318    interface(MEMORY_INTER) %{
5391 5319      base($reg);
5392 5320      index(0x4);
5393 5321      scale(0x0);
5394 5322      disp($off);
5395 5323    %}
5396 5324  %}
5397 5325  
5398 5326  // Indirect Memory Plus Long Offset Operand
5399 5327  operand indOffset32X(eRegI reg, immP off) %{
5400 5328    match(AddP off reg);
5401 5329  
5402 5330    format %{ "[$reg + $off]" %}
5403 5331    interface(MEMORY_INTER) %{
5404 5332      base($reg);
5405 5333      index(0x4);
5406 5334      scale(0x0);
5407 5335      disp($off);
5408 5336    %}
5409 5337  %}
5410 5338  
5411 5339  // Indirect Memory Plus Index Register Plus Offset Operand
5412 5340  operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
5413 5341    match(AddP (AddP reg ireg) off);
5414 5342  
5415 5343    op_cost(10);
5416 5344    format %{"[$reg + $off + $ireg]" %}
5417 5345    interface(MEMORY_INTER) %{
5418 5346      base($reg);
5419 5347      index($ireg);
5420 5348      scale(0x0);
5421 5349      disp($off);
5422 5350    %}
5423 5351  %}
5424 5352  
5425 5353  // Indirect Memory Plus Index Register Plus Offset Operand
5426 5354  operand indIndex(eRegP reg, eRegI ireg) %{
5427 5355    match(AddP reg ireg);
5428 5356  
5429 5357    op_cost(10);
5430 5358    format %{"[$reg + $ireg]" %}
5431 5359    interface(MEMORY_INTER) %{
5432 5360      base($reg);
5433 5361      index($ireg);
5434 5362      scale(0x0);
5435 5363      disp(0x0);
5436 5364    %}
5437 5365  %}
5438 5366  
5439 5367  // // -------------------------------------------------------------------------
5440 5368  // // 486 architecture doesn't support "scale * index + offset" with out a base
5441 5369  // // -------------------------------------------------------------------------
5442 5370  // // Scaled Memory Operands
5443 5371  // // Indirect Memory Times Scale Plus Offset Operand
5444 5372  // operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
5445 5373  //   match(AddP off (LShiftI ireg scale));
5446 5374  //
5447 5375  //   op_cost(10);
5448 5376  //   format %{"[$off + $ireg << $scale]" %}
5449 5377  //   interface(MEMORY_INTER) %{
5450 5378  //     base(0x4);
5451 5379  //     index($ireg);
5452 5380  //     scale($scale);
5453 5381  //     disp($off);
5454 5382  //   %}
5455 5383  // %}
5456 5384  
5457 5385  // Indirect Memory Times Scale Plus Index Register
5458 5386  operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
5459 5387    match(AddP reg (LShiftI ireg scale));
5460 5388  
5461 5389    op_cost(10);
5462 5390    format %{"[$reg + $ireg << $scale]" %}
5463 5391    interface(MEMORY_INTER) %{
5464 5392      base($reg);
5465 5393      index($ireg);
5466 5394      scale($scale);
5467 5395      disp(0x0);
5468 5396    %}
5469 5397  %}
5470 5398  
5471 5399  // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5472 5400  operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
5473 5401    match(AddP (AddP reg (LShiftI ireg scale)) off);
5474 5402  
5475 5403    op_cost(10);
5476 5404    format %{"[$reg + $off + $ireg << $scale]" %}
5477 5405    interface(MEMORY_INTER) %{
5478 5406      base($reg);
5479 5407      index($ireg);
5480 5408      scale($scale);
5481 5409      disp($off);
5482 5410    %}
5483 5411  %}
5484 5412  
5485 5413  //----------Load Long Memory Operands------------------------------------------
5486 5414  // The load-long idiom will use it's address expression again after loading
5487 5415  // the first word of the long.  If the load-long destination overlaps with
5488 5416  // registers used in the addressing expression, the 2nd half will be loaded
5489 5417  // from a clobbered address.  Fix this by requiring that load-long use
5490 5418  // address registers that do not overlap with the load-long target.
5491 5419  
5492 5420  // load-long support
5493 5421  operand load_long_RegP() %{
5494 5422    constraint(ALLOC_IN_RC(esi_reg));
5495 5423    match(RegP);
5496 5424    match(eSIRegP);
5497 5425    op_cost(100);
5498 5426    format %{  %}
5499 5427    interface(REG_INTER);
5500 5428  %}
5501 5429  
5502 5430  // Indirect Memory Operand Long
5503 5431  operand load_long_indirect(load_long_RegP reg) %{
5504 5432    constraint(ALLOC_IN_RC(esi_reg));
5505 5433    match(reg);
5506 5434  
5507 5435    format %{ "[$reg]" %}
5508 5436    interface(MEMORY_INTER) %{
5509 5437      base($reg);
5510 5438      index(0x4);
5511 5439      scale(0x0);
5512 5440      disp(0x0);
5513 5441    %}
5514 5442  %}
5515 5443  
5516 5444  // Indirect Memory Plus Long Offset Operand
5517 5445  operand load_long_indOffset32(load_long_RegP reg, immI off) %{
5518 5446    match(AddP reg off);
5519 5447  
5520 5448    format %{ "[$reg + $off]" %}
5521 5449    interface(MEMORY_INTER) %{
5522 5450      base($reg);
5523 5451      index(0x4);
5524 5452      scale(0x0);
5525 5453      disp($off);
5526 5454    %}
5527 5455  %}
5528 5456  
5529 5457  opclass load_long_memory(load_long_indirect, load_long_indOffset32);
5530 5458  
5531 5459  
5532 5460  //----------Special Memory Operands--------------------------------------------
5533 5461  // Stack Slot Operand - This operand is used for loading and storing temporary
5534 5462  //                      values on the stack where a match requires a value to
5535 5463  //                      flow through memory.
5536 5464  operand stackSlotP(sRegP reg) %{
5537 5465    constraint(ALLOC_IN_RC(stack_slots));
5538 5466    // No match rule because this operand is only generated in matching
5539 5467    format %{ "[$reg]" %}
5540 5468    interface(MEMORY_INTER) %{
5541 5469      base(0x4);   // ESP
5542 5470      index(0x4);  // No Index
5543 5471      scale(0x0);  // No Scale
5544 5472      disp($reg);  // Stack Offset
5545 5473    %}
5546 5474  %}
5547 5475  
5548 5476  operand stackSlotI(sRegI reg) %{
5549 5477    constraint(ALLOC_IN_RC(stack_slots));
5550 5478    // No match rule because this operand is only generated in matching
5551 5479    format %{ "[$reg]" %}
5552 5480    interface(MEMORY_INTER) %{
5553 5481      base(0x4);   // ESP
5554 5482      index(0x4);  // No Index
5555 5483      scale(0x0);  // No Scale
5556 5484      disp($reg);  // Stack Offset
5557 5485    %}
5558 5486  %}
5559 5487  
5560 5488  operand stackSlotF(sRegF reg) %{
5561 5489    constraint(ALLOC_IN_RC(stack_slots));
5562 5490    // No match rule because this operand is only generated in matching
5563 5491    format %{ "[$reg]" %}
5564 5492    interface(MEMORY_INTER) %{
5565 5493      base(0x4);   // ESP
5566 5494      index(0x4);  // No Index
5567 5495      scale(0x0);  // No Scale
5568 5496      disp($reg);  // Stack Offset
5569 5497    %}
5570 5498  %}
5571 5499  
5572 5500  operand stackSlotD(sRegD reg) %{
5573 5501    constraint(ALLOC_IN_RC(stack_slots));
5574 5502    // No match rule because this operand is only generated in matching
5575 5503    format %{ "[$reg]" %}
5576 5504    interface(MEMORY_INTER) %{
5577 5505      base(0x4);   // ESP
5578 5506      index(0x4);  // No Index
5579 5507      scale(0x0);  // No Scale
5580 5508      disp($reg);  // Stack Offset
5581 5509    %}
5582 5510  %}
5583 5511  
5584 5512  operand stackSlotL(sRegL reg) %{
5585 5513    constraint(ALLOC_IN_RC(stack_slots));
5586 5514    // No match rule because this operand is only generated in matching
5587 5515    format %{ "[$reg]" %}
5588 5516    interface(MEMORY_INTER) %{
5589 5517      base(0x4);   // ESP
5590 5518      index(0x4);  // No Index
5591 5519      scale(0x0);  // No Scale
5592 5520      disp($reg);  // Stack Offset
5593 5521    %}
5594 5522  %}
5595 5523  
5596 5524  //----------Memory Operands - Win95 Implicit Null Variants----------------
5597 5525  // Indirect Memory Operand
5598 5526  operand indirect_win95_safe(eRegP_no_EBP reg)
5599 5527  %{
5600 5528    constraint(ALLOC_IN_RC(e_reg));
5601 5529    match(reg);
5602 5530  
5603 5531    op_cost(100);
5604 5532    format %{ "[$reg]" %}
5605 5533    interface(MEMORY_INTER) %{
5606 5534      base($reg);
5607 5535      index(0x4);
5608 5536      scale(0x0);
5609 5537      disp(0x0);
5610 5538    %}
5611 5539  %}
5612 5540  
5613 5541  // Indirect Memory Plus Short Offset Operand
5614 5542  operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
5615 5543  %{
5616 5544    match(AddP reg off);
5617 5545  
5618 5546    op_cost(100);
5619 5547    format %{ "[$reg + $off]" %}
5620 5548    interface(MEMORY_INTER) %{
5621 5549      base($reg);
5622 5550      index(0x4);
5623 5551      scale(0x0);
5624 5552      disp($off);
5625 5553    %}
5626 5554  %}
5627 5555  
5628 5556  // Indirect Memory Plus Long Offset Operand
5629 5557  operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
5630 5558  %{
5631 5559    match(AddP reg off);
5632 5560  
5633 5561    op_cost(100);
5634 5562    format %{ "[$reg + $off]" %}
5635 5563    interface(MEMORY_INTER) %{
5636 5564      base($reg);
5637 5565      index(0x4);
5638 5566      scale(0x0);
5639 5567      disp($off);
5640 5568    %}
5641 5569  %}
5642 5570  
5643 5571  // Indirect Memory Plus Index Register Plus Offset Operand
5644 5572  operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
5645 5573  %{
5646 5574    match(AddP (AddP reg ireg) off);
5647 5575  
5648 5576    op_cost(100);
5649 5577    format %{"[$reg + $off + $ireg]" %}
5650 5578    interface(MEMORY_INTER) %{
5651 5579      base($reg);
5652 5580      index($ireg);
5653 5581      scale(0x0);
5654 5582      disp($off);
5655 5583    %}
5656 5584  %}
5657 5585  
5658 5586  // Indirect Memory Times Scale Plus Index Register
5659 5587  operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
5660 5588  %{
5661 5589    match(AddP reg (LShiftI ireg scale));
5662 5590  
5663 5591    op_cost(100);
5664 5592    format %{"[$reg + $ireg << $scale]" %}
5665 5593    interface(MEMORY_INTER) %{
5666 5594      base($reg);
5667 5595      index($ireg);
5668 5596      scale($scale);
5669 5597      disp(0x0);
5670 5598    %}
5671 5599  %}
5672 5600  
5673 5601  // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5674 5602  operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
5675 5603  %{
5676 5604    match(AddP (AddP reg (LShiftI ireg scale)) off);
5677 5605  
5678 5606    op_cost(100);
5679 5607    format %{"[$reg + $off + $ireg << $scale]" %}
5680 5608    interface(MEMORY_INTER) %{
5681 5609      base($reg);
5682 5610      index($ireg);
5683 5611      scale($scale);
5684 5612      disp($off);
5685 5613    %}
5686 5614  %}
5687 5615  
5688 5616  //----------Conditional Branch Operands----------------------------------------
5689 5617  // Comparison Op  - This is the operation of the comparison, and is limited to
5690 5618  //                  the following set of codes:
5691 5619  //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5692 5620  //
5693 5621  // Other attributes of the comparison, such as unsignedness, are specified
5694 5622  // by the comparison instruction that sets a condition code flags register.
5695 5623  // That result is represented by a flags operand whose subtype is appropriate
5696 5624  // to the unsignedness (etc.) of the comparison.
5697 5625  //
5698 5626  // Later, the instruction which matches both the Comparison Op (a Bool) and
5699 5627  // the flags (produced by the Cmp) specifies the coding of the comparison op
5700 5628  // by matching a specific subtype of Bool operand below, such as cmpOpU.
5701 5629  
5702 5630  // Comparision Code
5703 5631  operand cmpOp() %{
5704 5632    match(Bool);
5705 5633  
5706 5634    format %{ "" %}
5707 5635    interface(COND_INTER) %{
5708 5636      equal(0x4, "e");
5709 5637      not_equal(0x5, "ne");
5710 5638      less(0xC, "l");
5711 5639      greater_equal(0xD, "ge");
5712 5640      less_equal(0xE, "le");
5713 5641      greater(0xF, "g");
5714 5642    %}
5715 5643  %}
5716 5644  
5717 5645  // Comparison Code, unsigned compare.  Used by FP also, with
5718 5646  // C2 (unordered) turned into GT or LT already.  The other bits
5719 5647  // C0 and C3 are turned into Carry & Zero flags.
5720 5648  operand cmpOpU() %{
5721 5649    match(Bool);
5722 5650  
5723 5651    format %{ "" %}
5724 5652    interface(COND_INTER) %{
5725 5653      equal(0x4, "e");
5726 5654      not_equal(0x5, "ne");
5727 5655      less(0x2, "b");
5728 5656      greater_equal(0x3, "nb");
5729 5657      less_equal(0x6, "be");
5730 5658      greater(0x7, "nbe");
5731 5659    %}
5732 5660  %}
5733 5661  
5734 5662  // Floating comparisons that don't require any fixup for the unordered case
5735 5663  operand cmpOpUCF() %{
5736 5664    match(Bool);
5737 5665    predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5738 5666              n->as_Bool()->_test._test == BoolTest::ge ||
5739 5667              n->as_Bool()->_test._test == BoolTest::le ||
5740 5668              n->as_Bool()->_test._test == BoolTest::gt);
5741 5669    format %{ "" %}
5742 5670    interface(COND_INTER) %{
5743 5671      equal(0x4, "e");
5744 5672      not_equal(0x5, "ne");
5745 5673      less(0x2, "b");
5746 5674      greater_equal(0x3, "nb");
5747 5675      less_equal(0x6, "be");
5748 5676      greater(0x7, "nbe");
5749 5677    %}
5750 5678  %}
5751 5679  
5752 5680  
5753 5681  // Floating comparisons that can be fixed up with extra conditional jumps
5754 5682  operand cmpOpUCF2() %{
5755 5683    match(Bool);
5756 5684    predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5757 5685              n->as_Bool()->_test._test == BoolTest::eq);
5758 5686    format %{ "" %}
5759 5687    interface(COND_INTER) %{
5760 5688      equal(0x4, "e");
5761 5689      not_equal(0x5, "ne");
5762 5690      less(0x2, "b");
5763 5691      greater_equal(0x3, "nb");
5764 5692      less_equal(0x6, "be");
5765 5693      greater(0x7, "nbe");
5766 5694    %}
5767 5695  %}
5768 5696  
5769 5697  // Comparison Code for FP conditional move
5770 5698  operand cmpOp_fcmov() %{
5771 5699    match(Bool);
5772 5700  
5773 5701    format %{ "" %}
5774 5702    interface(COND_INTER) %{
5775 5703      equal        (0x0C8);
5776 5704      not_equal    (0x1C8);
5777 5705      less         (0x0C0);
5778 5706      greater_equal(0x1C0);
5779 5707      less_equal   (0x0D0);
5780 5708      greater      (0x1D0);
5781 5709    %}
5782 5710  %}
5783 5711  
5784 5712  // Comparision Code used in long compares
5785 5713  operand cmpOp_commute() %{
5786 5714    match(Bool);
5787 5715  
5788 5716    format %{ "" %}
5789 5717    interface(COND_INTER) %{
5790 5718      equal(0x4, "e");
5791 5719      not_equal(0x5, "ne");
5792 5720      less(0xF, "g");
5793 5721      greater_equal(0xE, "le");
5794 5722      less_equal(0xD, "ge");
5795 5723      greater(0xC, "l");
5796 5724    %}
5797 5725  %}
5798 5726  
5799 5727  //----------OPERAND CLASSES----------------------------------------------------
5800 5728  // Operand Classes are groups of operands that are used as to simplify
5801 5729  // instruction definitions by not requiring the AD writer to specify separate
5802 5730  // instructions for every form of operand when the instruction accepts
5803 5731  // multiple operand types with the same basic encoding and format.  The classic
5804 5732  // case of this is memory operands.
5805 5733  
5806 5734  opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
5807 5735                 indIndex, indIndexScale, indIndexScaleOffset);
5808 5736  
5809 5737  // Long memory operations are encoded in 2 instructions and a +4 offset.
5810 5738  // This means some kind of offset is always required and you cannot use
5811 5739  // an oop as the offset (done when working on static globals).
5812 5740  opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
5813 5741                      indIndex, indIndexScale, indIndexScaleOffset);
5814 5742  
5815 5743  
5816 5744  //----------PIPELINE-----------------------------------------------------------
5817 5745  // Rules which define the behavior of the target architectures pipeline.
5818 5746  pipeline %{
5819 5747  
5820 5748  //----------ATTRIBUTES---------------------------------------------------------
5821 5749  attributes %{
5822 5750    variable_size_instructions;        // Fixed size instructions
5823 5751    max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5824 5752    instruction_unit_size = 1;         // An instruction is 1 bytes long
5825 5753    instruction_fetch_unit_size = 16;  // The processor fetches one line
5826 5754    instruction_fetch_units = 1;       // of 16 bytes
5827 5755  
5828 5756    // List of nop instructions
5829 5757    nops( MachNop );
5830 5758  %}
5831 5759  
5832 5760  //----------RESOURCES----------------------------------------------------------
5833 5761  // Resources are the functional units available to the machine
5834 5762  
5835 5763  // Generic P2/P3 pipeline
5836 5764  // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5837 5765  // 3 instructions decoded per cycle.
5838 5766  // 2 load/store ops per cycle, 1 branch, 1 FPU,
5839 5767  // 2 ALU op, only ALU0 handles mul/div instructions.
5840 5768  resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5841 5769             MS0, MS1, MEM = MS0 | MS1,
5842 5770             BR, FPU,
5843 5771             ALU0, ALU1, ALU = ALU0 | ALU1 );
5844 5772  
5845 5773  //----------PIPELINE DESCRIPTION-----------------------------------------------
5846 5774  // Pipeline Description specifies the stages in the machine's pipeline
5847 5775  
5848 5776  // Generic P2/P3 pipeline
5849 5777  pipe_desc(S0, S1, S2, S3, S4, S5);
5850 5778  
5851 5779  //----------PIPELINE CLASSES---------------------------------------------------
5852 5780  // Pipeline Classes describe the stages in which input and output are
5853 5781  // referenced by the hardware pipeline.
5854 5782  
5855 5783  // Naming convention: ialu or fpu
5856 5784  // Then: _reg
5857 5785  // Then: _reg if there is a 2nd register
5858 5786  // Then: _long if it's a pair of instructions implementing a long
5859 5787  // Then: _fat if it requires the big decoder
5860 5788  //   Or: _mem if it requires the big decoder and a memory unit.
5861 5789  
5862 5790  // Integer ALU reg operation
5863 5791  pipe_class ialu_reg(eRegI dst) %{
5864 5792      single_instruction;
5865 5793      dst    : S4(write);
5866 5794      dst    : S3(read);
5867 5795      DECODE : S0;        // any decoder
5868 5796      ALU    : S3;        // any alu
5869 5797  %}
5870 5798  
5871 5799  // Long ALU reg operation
5872 5800  pipe_class ialu_reg_long(eRegL dst) %{
5873 5801      instruction_count(2);
5874 5802      dst    : S4(write);
5875 5803      dst    : S3(read);
5876 5804      DECODE : S0(2);     // any 2 decoders
5877 5805      ALU    : S3(2);     // both alus
5878 5806  %}
5879 5807  
5880 5808  // Integer ALU reg operation using big decoder
5881 5809  pipe_class ialu_reg_fat(eRegI dst) %{
5882 5810      single_instruction;
5883 5811      dst    : S4(write);
5884 5812      dst    : S3(read);
5885 5813      D0     : S0;        // big decoder only
5886 5814      ALU    : S3;        // any alu
5887 5815  %}
5888 5816  
5889 5817  // Long ALU reg operation using big decoder
5890 5818  pipe_class ialu_reg_long_fat(eRegL dst) %{
5891 5819      instruction_count(2);
5892 5820      dst    : S4(write);
5893 5821      dst    : S3(read);
5894 5822      D0     : S0(2);     // big decoder only; twice
5895 5823      ALU    : S3(2);     // any 2 alus
5896 5824  %}
5897 5825  
5898 5826  // Integer ALU reg-reg operation
5899 5827  pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
5900 5828      single_instruction;
5901 5829      dst    : S4(write);
5902 5830      src    : S3(read);
5903 5831      DECODE : S0;        // any decoder
5904 5832      ALU    : S3;        // any alu
5905 5833  %}
5906 5834  
5907 5835  // Long ALU reg-reg operation
5908 5836  pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
5909 5837      instruction_count(2);
5910 5838      dst    : S4(write);
5911 5839      src    : S3(read);
5912 5840      DECODE : S0(2);     // any 2 decoders
5913 5841      ALU    : S3(2);     // both alus
5914 5842  %}
5915 5843  
5916 5844  // Integer ALU reg-reg operation
5917 5845  pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
5918 5846      single_instruction;
5919 5847      dst    : S4(write);
5920 5848      src    : S3(read);
5921 5849      D0     : S0;        // big decoder only
5922 5850      ALU    : S3;        // any alu
5923 5851  %}
5924 5852  
5925 5853  // Long ALU reg-reg operation
5926 5854  pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
5927 5855      instruction_count(2);
5928 5856      dst    : S4(write);
5929 5857      src    : S3(read);
5930 5858      D0     : S0(2);     // big decoder only; twice
5931 5859      ALU    : S3(2);     // both alus
5932 5860  %}
5933 5861  
5934 5862  // Integer ALU reg-mem operation
5935 5863  pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
5936 5864      single_instruction;
5937 5865      dst    : S5(write);
5938 5866      mem    : S3(read);
5939 5867      D0     : S0;        // big decoder only
5940 5868      ALU    : S4;        // any alu
5941 5869      MEM    : S3;        // any mem
5942 5870  %}
5943 5871  
5944 5872  // Long ALU reg-mem operation
5945 5873  pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
5946 5874      instruction_count(2);
5947 5875      dst    : S5(write);
5948 5876      mem    : S3(read);
5949 5877      D0     : S0(2);     // big decoder only; twice
5950 5878      ALU    : S4(2);     // any 2 alus
5951 5879      MEM    : S3(2);     // both mems
5952 5880  %}
5953 5881  
5954 5882  // Integer mem operation (prefetch)
5955 5883  pipe_class ialu_mem(memory mem)
5956 5884  %{
5957 5885      single_instruction;
5958 5886      mem    : S3(read);
5959 5887      D0     : S0;        // big decoder only
5960 5888      MEM    : S3;        // any mem
5961 5889  %}
5962 5890  
5963 5891  // Integer Store to Memory
5964 5892  pipe_class ialu_mem_reg(memory mem, eRegI src) %{
5965 5893      single_instruction;
5966 5894      mem    : S3(read);
5967 5895      src    : S5(read);
5968 5896      D0     : S0;        // big decoder only
5969 5897      ALU    : S4;        // any alu
5970 5898      MEM    : S3;
5971 5899  %}
5972 5900  
5973 5901  // Long Store to Memory
5974 5902  pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
5975 5903      instruction_count(2);
5976 5904      mem    : S3(read);
5977 5905      src    : S5(read);
5978 5906      D0     : S0(2);     // big decoder only; twice
5979 5907      ALU    : S4(2);     // any 2 alus
5980 5908      MEM    : S3(2);     // Both mems
5981 5909  %}
5982 5910  
5983 5911  // Integer Store to Memory
5984 5912  pipe_class ialu_mem_imm(memory mem) %{
5985 5913      single_instruction;
5986 5914      mem    : S3(read);
5987 5915      D0     : S0;        // big decoder only
5988 5916      ALU    : S4;        // any alu
5989 5917      MEM    : S3;
5990 5918  %}
5991 5919  
5992 5920  // Integer ALU0 reg-reg operation
5993 5921  pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
5994 5922      single_instruction;
5995 5923      dst    : S4(write);
5996 5924      src    : S3(read);
5997 5925      D0     : S0;        // Big decoder only
5998 5926      ALU0   : S3;        // only alu0
5999 5927  %}
6000 5928  
6001 5929  // Integer ALU0 reg-mem operation
6002 5930  pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
6003 5931      single_instruction;
6004 5932      dst    : S5(write);
6005 5933      mem    : S3(read);
6006 5934      D0     : S0;        // big decoder only
6007 5935      ALU0   : S4;        // ALU0 only
6008 5936      MEM    : S3;        // any mem
6009 5937  %}
6010 5938  
6011 5939  // Integer ALU reg-reg operation
6012 5940  pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
6013 5941      single_instruction;
6014 5942      cr     : S4(write);
6015 5943      src1   : S3(read);
6016 5944      src2   : S3(read);
6017 5945      DECODE : S0;        // any decoder
6018 5946      ALU    : S3;        // any alu
6019 5947  %}
6020 5948  
6021 5949  // Integer ALU reg-imm operation
6022 5950  pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
6023 5951      single_instruction;
6024 5952      cr     : S4(write);
6025 5953      src1   : S3(read);
6026 5954      DECODE : S0;        // any decoder
6027 5955      ALU    : S3;        // any alu
6028 5956  %}
6029 5957  
6030 5958  // Integer ALU reg-mem operation
6031 5959  pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
6032 5960      single_instruction;
6033 5961      cr     : S4(write);
6034 5962      src1   : S3(read);
6035 5963      src2   : S3(read);
6036 5964      D0     : S0;        // big decoder only
6037 5965      ALU    : S4;        // any alu
6038 5966      MEM    : S3;
6039 5967  %}
6040 5968  
6041 5969  // Conditional move reg-reg
6042 5970  pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
6043 5971      instruction_count(4);
6044 5972      y      : S4(read);
6045 5973      q      : S3(read);
6046 5974      p      : S3(read);
6047 5975      DECODE : S0(4);     // any decoder
6048 5976  %}
6049 5977  
6050 5978  // Conditional move reg-reg
6051 5979  pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
6052 5980      single_instruction;
6053 5981      dst    : S4(write);
6054 5982      src    : S3(read);
6055 5983      cr     : S3(read);
6056 5984      DECODE : S0;        // any decoder
6057 5985  %}
6058 5986  
6059 5987  // Conditional move reg-mem
6060 5988  pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
6061 5989      single_instruction;
6062 5990      dst    : S4(write);
6063 5991      src    : S3(read);
6064 5992      cr     : S3(read);
6065 5993      DECODE : S0;        // any decoder
6066 5994      MEM    : S3;
6067 5995  %}
6068 5996  
6069 5997  // Conditional move reg-reg long
6070 5998  pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
6071 5999      single_instruction;
6072 6000      dst    : S4(write);
6073 6001      src    : S3(read);
6074 6002      cr     : S3(read);
6075 6003      DECODE : S0(2);     // any 2 decoders
6076 6004  %}
6077 6005  
6078 6006  // Conditional move double reg-reg
6079 6007  pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
6080 6008      single_instruction;
6081 6009      dst    : S4(write);
6082 6010      src    : S3(read);
6083 6011      cr     : S3(read);
6084 6012      DECODE : S0;        // any decoder
6085 6013  %}
6086 6014  
6087 6015  // Float reg-reg operation
6088 6016  pipe_class fpu_reg(regD dst) %{
6089 6017      instruction_count(2);
6090 6018      dst    : S3(read);
6091 6019      DECODE : S0(2);     // any 2 decoders
6092 6020      FPU    : S3;
6093 6021  %}
6094 6022  
6095 6023  // Float reg-reg operation
6096 6024  pipe_class fpu_reg_reg(regD dst, regD src) %{
6097 6025      instruction_count(2);
6098 6026      dst    : S4(write);
6099 6027      src    : S3(read);
6100 6028      DECODE : S0(2);     // any 2 decoders
6101 6029      FPU    : S3;
6102 6030  %}
6103 6031  
6104 6032  // Float reg-reg operation
6105 6033  pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
6106 6034      instruction_count(3);
6107 6035      dst    : S4(write);
6108 6036      src1   : S3(read);
6109 6037      src2   : S3(read);
6110 6038      DECODE : S0(3);     // any 3 decoders
6111 6039      FPU    : S3(2);
6112 6040  %}
6113 6041  
6114 6042  // Float reg-reg operation
6115 6043  pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
6116 6044      instruction_count(4);
6117 6045      dst    : S4(write);
6118 6046      src1   : S3(read);
6119 6047      src2   : S3(read);
6120 6048      src3   : S3(read);
6121 6049      DECODE : S0(4);     // any 3 decoders
6122 6050      FPU    : S3(2);
6123 6051  %}
6124 6052  
6125 6053  // Float reg-reg operation
6126 6054  pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
6127 6055      instruction_count(4);
6128 6056      dst    : S4(write);
6129 6057      src1   : S3(read);
6130 6058      src2   : S3(read);
6131 6059      src3   : S3(read);
6132 6060      DECODE : S1(3);     // any 3 decoders
6133 6061      D0     : S0;        // Big decoder only
6134 6062      FPU    : S3(2);
6135 6063      MEM    : S3;
6136 6064  %}
6137 6065  
6138 6066  // Float reg-mem operation
6139 6067  pipe_class fpu_reg_mem(regD dst, memory mem) %{
6140 6068      instruction_count(2);
6141 6069      dst    : S5(write);
6142 6070      mem    : S3(read);
6143 6071      D0     : S0;        // big decoder only
6144 6072      DECODE : S1;        // any decoder for FPU POP
6145 6073      FPU    : S4;
6146 6074      MEM    : S3;        // any mem
6147 6075  %}
6148 6076  
6149 6077  // Float reg-mem operation
6150 6078  pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
6151 6079      instruction_count(3);
6152 6080      dst    : S5(write);
6153 6081      src1   : S3(read);
6154 6082      mem    : S3(read);
6155 6083      D0     : S0;        // big decoder only
6156 6084      DECODE : S1(2);     // any decoder for FPU POP
6157 6085      FPU    : S4;
6158 6086      MEM    : S3;        // any mem
6159 6087  %}
6160 6088  
6161 6089  // Float mem-reg operation
6162 6090  pipe_class fpu_mem_reg(memory mem, regD src) %{
6163 6091      instruction_count(2);
6164 6092      src    : S5(read);
6165 6093      mem    : S3(read);
6166 6094      DECODE : S0;        // any decoder for FPU PUSH
6167 6095      D0     : S1;        // big decoder only
6168 6096      FPU    : S4;
6169 6097      MEM    : S3;        // any mem
6170 6098  %}
6171 6099  
6172 6100  pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
6173 6101      instruction_count(3);
6174 6102      src1   : S3(read);
6175 6103      src2   : S3(read);
6176 6104      mem    : S3(read);
6177 6105      DECODE : S0(2);     // any decoder for FPU PUSH
6178 6106      D0     : S1;        // big decoder only
6179 6107      FPU    : S4;
6180 6108      MEM    : S3;        // any mem
6181 6109  %}
6182 6110  
6183 6111  pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
6184 6112      instruction_count(3);
6185 6113      src1   : S3(read);
6186 6114      src2   : S3(read);
6187 6115      mem    : S4(read);
6188 6116      DECODE : S0;        // any decoder for FPU PUSH
6189 6117      D0     : S0(2);     // big decoder only
6190 6118      FPU    : S4;
6191 6119      MEM    : S3(2);     // any mem
6192 6120  %}
6193 6121  
6194 6122  pipe_class fpu_mem_mem(memory dst, memory src1) %{
6195 6123      instruction_count(2);
6196 6124      src1   : S3(read);
6197 6125      dst    : S4(read);
6198 6126      D0     : S0(2);     // big decoder only
6199 6127      MEM    : S3(2);     // any mem
6200 6128  %}
6201 6129  
6202 6130  pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
6203 6131      instruction_count(3);
6204 6132      src1   : S3(read);
6205 6133      src2   : S3(read);
6206 6134      dst    : S4(read);
6207 6135      D0     : S0(3);     // big decoder only
6208 6136      FPU    : S4;
6209 6137      MEM    : S3(3);     // any mem
6210 6138  %}
6211 6139  
6212 6140  pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
6213 6141      instruction_count(3);
6214 6142      src1   : S4(read);
6215 6143      mem    : S4(read);
6216 6144      DECODE : S0;        // any decoder for FPU PUSH
6217 6145      D0     : S0(2);     // big decoder only
6218 6146      FPU    : S4;
6219 6147      MEM    : S3(2);     // any mem
6220 6148  %}
6221 6149  
6222 6150  // Float load constant
6223 6151  pipe_class fpu_reg_con(regD dst) %{
6224 6152      instruction_count(2);
6225 6153      dst    : S5(write);
6226 6154      D0     : S0;        // big decoder only for the load
6227 6155      DECODE : S1;        // any decoder for FPU POP
6228 6156      FPU    : S4;
6229 6157      MEM    : S3;        // any mem
6230 6158  %}
6231 6159  
6232 6160  // Float load constant
6233 6161  pipe_class fpu_reg_reg_con(regD dst, regD src) %{
6234 6162      instruction_count(3);
6235 6163      dst    : S5(write);
6236 6164      src    : S3(read);
6237 6165      D0     : S0;        // big decoder only for the load
6238 6166      DECODE : S1(2);     // any decoder for FPU POP
6239 6167      FPU    : S4;
6240 6168      MEM    : S3;        // any mem
6241 6169  %}
6242 6170  
6243 6171  // UnConditional branch
6244 6172  pipe_class pipe_jmp( label labl ) %{
6245 6173      single_instruction;
6246 6174      BR   : S3;
6247 6175  %}
6248 6176  
6249 6177  // Conditional branch
6250 6178  pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
6251 6179      single_instruction;
6252 6180      cr    : S1(read);
6253 6181      BR    : S3;
6254 6182  %}
6255 6183  
6256 6184  // Allocation idiom
6257 6185  pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
6258 6186      instruction_count(1); force_serialization;
6259 6187      fixed_latency(6);
6260 6188      heap_ptr : S3(read);
6261 6189      DECODE   : S0(3);
6262 6190      D0       : S2;
6263 6191      MEM      : S3;
6264 6192      ALU      : S3(2);
6265 6193      dst      : S5(write);
6266 6194      BR       : S5;
6267 6195  %}
6268 6196  
6269 6197  // Generic big/slow expanded idiom
6270 6198  pipe_class pipe_slow(  ) %{
6271 6199      instruction_count(10); multiple_bundles; force_serialization;
6272 6200      fixed_latency(100);
6273 6201      D0  : S0(2);
6274 6202      MEM : S3(2);
6275 6203  %}
6276 6204  
6277 6205  // The real do-nothing guy
6278 6206  pipe_class empty( ) %{
6279 6207      instruction_count(0);
6280 6208  %}
6281 6209  
6282 6210  // Define the class for the Nop node
6283 6211  define %{
6284 6212     MachNop = empty;
6285 6213  %}
6286 6214  
6287 6215  %}
6288 6216  
6289 6217  //----------INSTRUCTIONS-------------------------------------------------------
6290 6218  //
6291 6219  // match      -- States which machine-independent subtree may be replaced
6292 6220  //               by this instruction.
6293 6221  // ins_cost   -- The estimated cost of this instruction is used by instruction
6294 6222  //               selection to identify a minimum cost tree of machine
6295 6223  //               instructions that matches a tree of machine-independent
6296 6224  //               instructions.
6297 6225  // format     -- A string providing the disassembly for this instruction.
6298 6226  //               The value of an instruction's operand may be inserted
6299 6227  //               by referring to it with a '$' prefix.
6300 6228  // opcode     -- Three instruction opcodes may be provided.  These are referred
6301 6229  //               to within an encode class as $primary, $secondary, and $tertiary
6302 6230  //               respectively.  The primary opcode is commonly used to
6303 6231  //               indicate the type of machine instruction, while secondary
6304 6232  //               and tertiary are often used for prefix options or addressing
6305 6233  //               modes.
6306 6234  // ins_encode -- A list of encode classes with parameters. The encode class
6307 6235  //               name must have been defined in an 'enc_class' specification
6308 6236  //               in the encode section of the architecture description.
6309 6237  
6310 6238  //----------BSWAP-Instruction--------------------------------------------------
6311 6239  instruct bytes_reverse_int(eRegI dst) %{
6312 6240    match(Set dst (ReverseBytesI dst));
6313 6241  
6314 6242    format %{ "BSWAP  $dst" %}
6315 6243    opcode(0x0F, 0xC8);
6316 6244    ins_encode( OpcP, OpcSReg(dst) );
6317 6245    ins_pipe( ialu_reg );
6318 6246  %}
6319 6247  
6320 6248  instruct bytes_reverse_long(eRegL dst) %{
6321 6249    match(Set dst (ReverseBytesL dst));
6322 6250  
6323 6251    format %{ "BSWAP  $dst.lo\n\t"
6324 6252              "BSWAP  $dst.hi\n\t"
6325 6253              "XCHG   $dst.lo $dst.hi" %}
6326 6254  
6327 6255    ins_cost(125);
6328 6256    ins_encode( bswap_long_bytes(dst) );
6329 6257    ins_pipe( ialu_reg_reg);
6330 6258  %}
6331 6259  
6332 6260  instruct bytes_reverse_unsigned_short(eRegI dst) %{
6333 6261    match(Set dst (ReverseBytesUS dst));
6334 6262  
6335 6263    format %{ "BSWAP  $dst\n\t" 
6336 6264              "SHR    $dst,16\n\t" %}
6337 6265    ins_encode %{
6338 6266      __ bswapl($dst$$Register);
6339 6267      __ shrl($dst$$Register, 16); 
6340 6268    %}
6341 6269    ins_pipe( ialu_reg );
6342 6270  %}
6343 6271  
6344 6272  instruct bytes_reverse_short(eRegI dst) %{
6345 6273    match(Set dst (ReverseBytesS dst));
6346 6274  
6347 6275    format %{ "BSWAP  $dst\n\t" 
6348 6276              "SAR    $dst,16\n\t" %}
6349 6277    ins_encode %{
6350 6278      __ bswapl($dst$$Register);
6351 6279      __ sarl($dst$$Register, 16); 
6352 6280    %}
6353 6281    ins_pipe( ialu_reg );
6354 6282  %}
6355 6283  
6356 6284  
6357 6285  //---------- Zeros Count Instructions ------------------------------------------
6358 6286  
6359 6287  instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6360 6288    predicate(UseCountLeadingZerosInstruction);
6361 6289    match(Set dst (CountLeadingZerosI src));
6362 6290    effect(KILL cr);
6363 6291  
6364 6292    format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
6365 6293    ins_encode %{
6366 6294      __ lzcntl($dst$$Register, $src$$Register);
6367 6295    %}
6368 6296    ins_pipe(ialu_reg);
6369 6297  %}
6370 6298  
6371 6299  instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
6372 6300    predicate(!UseCountLeadingZerosInstruction);
6373 6301    match(Set dst (CountLeadingZerosI src));
6374 6302    effect(KILL cr);
6375 6303  
6376 6304    format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
6377 6305              "JNZ    skip\n\t"
6378 6306              "MOV    $dst, -1\n"
6379 6307        "skip:\n\t"
6380 6308              "NEG    $dst\n\t"
6381 6309              "ADD    $dst, 31" %}
6382 6310    ins_encode %{
6383 6311      Register Rdst = $dst$$Register;
6384 6312      Register Rsrc = $src$$Register;
6385 6313      Label skip;
6386 6314      __ bsrl(Rdst, Rsrc);
6387 6315      __ jccb(Assembler::notZero, skip);
6388 6316      __ movl(Rdst, -1);
6389 6317      __ bind(skip);
6390 6318      __ negl(Rdst);
6391 6319      __ addl(Rdst, BitsPerInt - 1);
6392 6320    %}
6393 6321    ins_pipe(ialu_reg);
6394 6322  %}
6395 6323  
6396 6324  instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6397 6325    predicate(UseCountLeadingZerosInstruction);
6398 6326    match(Set dst (CountLeadingZerosL src));
6399 6327    effect(TEMP dst, KILL cr);
6400 6328  
6401 6329    format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
6402 6330              "JNC    done\n\t"
6403 6331              "LZCNT  $dst, $src.lo\n\t"
6404 6332              "ADD    $dst, 32\n"
6405 6333        "done:" %}
6406 6334    ins_encode %{
6407 6335      Register Rdst = $dst$$Register;
6408 6336      Register Rsrc = $src$$Register;
6409 6337      Label done;
6410 6338      __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
6411 6339      __ jccb(Assembler::carryClear, done);
6412 6340      __ lzcntl(Rdst, Rsrc);
6413 6341      __ addl(Rdst, BitsPerInt);
6414 6342      __ bind(done);
6415 6343    %}
6416 6344    ins_pipe(ialu_reg);
6417 6345  %}
6418 6346  
6419 6347  instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
6420 6348    predicate(!UseCountLeadingZerosInstruction);
6421 6349    match(Set dst (CountLeadingZerosL src));
6422 6350    effect(TEMP dst, KILL cr);
6423 6351  
6424 6352    format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
6425 6353              "JZ     msw_is_zero\n\t"
6426 6354              "ADD    $dst, 32\n\t"
6427 6355              "JMP    not_zero\n"
6428 6356        "msw_is_zero:\n\t"
6429 6357              "BSR    $dst, $src.lo\n\t"
6430 6358              "JNZ    not_zero\n\t"
6431 6359              "MOV    $dst, -1\n"
6432 6360        "not_zero:\n\t"
6433 6361              "NEG    $dst\n\t"
6434 6362              "ADD    $dst, 63\n" %}
6435 6363   ins_encode %{
6436 6364      Register Rdst = $dst$$Register;
6437 6365      Register Rsrc = $src$$Register;
6438 6366      Label msw_is_zero;
6439 6367      Label not_zero;
6440 6368      __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
6441 6369      __ jccb(Assembler::zero, msw_is_zero);
6442 6370      __ addl(Rdst, BitsPerInt);
6443 6371      __ jmpb(not_zero);
6444 6372      __ bind(msw_is_zero);
6445 6373      __ bsrl(Rdst, Rsrc);
6446 6374      __ jccb(Assembler::notZero, not_zero);
6447 6375      __ movl(Rdst, -1);
6448 6376      __ bind(not_zero);
6449 6377      __ negl(Rdst);
6450 6378      __ addl(Rdst, BitsPerLong - 1);
6451 6379    %}
6452 6380    ins_pipe(ialu_reg);
6453 6381  %}
6454 6382  
6455 6383  instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6456 6384    match(Set dst (CountTrailingZerosI src));
6457 6385    effect(KILL cr);
6458 6386  
6459 6387    format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
6460 6388              "JNZ    done\n\t"
6461 6389              "MOV    $dst, 32\n"
6462 6390        "done:" %}
6463 6391    ins_encode %{
6464 6392      Register Rdst = $dst$$Register;
6465 6393      Label done;
6466 6394      __ bsfl(Rdst, $src$$Register);
6467 6395      __ jccb(Assembler::notZero, done);
6468 6396      __ movl(Rdst, BitsPerInt);
6469 6397      __ bind(done);
6470 6398    %}
6471 6399    ins_pipe(ialu_reg);
6472 6400  %}
6473 6401  
6474 6402  instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6475 6403    match(Set dst (CountTrailingZerosL src));
6476 6404    effect(TEMP dst, KILL cr);
6477 6405  
6478 6406    format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
6479 6407              "JNZ    done\n\t"
6480 6408              "BSF    $dst, $src.hi\n\t"
6481 6409              "JNZ    msw_not_zero\n\t"
6482 6410              "MOV    $dst, 32\n"
6483 6411        "msw_not_zero:\n\t"
6484 6412              "ADD    $dst, 32\n"
6485 6413        "done:" %}
6486 6414    ins_encode %{
6487 6415      Register Rdst = $dst$$Register;
6488 6416      Register Rsrc = $src$$Register;
6489 6417      Label msw_not_zero;
6490 6418      Label done;
6491 6419      __ bsfl(Rdst, Rsrc);
6492 6420      __ jccb(Assembler::notZero, done);
6493 6421      __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
6494 6422      __ jccb(Assembler::notZero, msw_not_zero);
6495 6423      __ movl(Rdst, BitsPerInt);
6496 6424      __ bind(msw_not_zero);
6497 6425      __ addl(Rdst, BitsPerInt);
6498 6426      __ bind(done);
6499 6427    %}
6500 6428    ins_pipe(ialu_reg);
6501 6429  %}
6502 6430  
6503 6431  
6504 6432  //---------- Population Count Instructions -------------------------------------
6505 6433  
6506 6434  instruct popCountI(eRegI dst, eRegI src) %{
6507 6435    predicate(UsePopCountInstruction);
6508 6436    match(Set dst (PopCountI src));
6509 6437  
6510 6438    format %{ "POPCNT $dst, $src" %}
6511 6439    ins_encode %{
6512 6440      __ popcntl($dst$$Register, $src$$Register);
6513 6441    %}
6514 6442    ins_pipe(ialu_reg);
6515 6443  %}
6516 6444  
6517 6445  instruct popCountI_mem(eRegI dst, memory mem) %{
6518 6446    predicate(UsePopCountInstruction);
6519 6447    match(Set dst (PopCountI (LoadI mem)));
6520 6448  
6521 6449    format %{ "POPCNT $dst, $mem" %}
6522 6450    ins_encode %{
6523 6451      __ popcntl($dst$$Register, $mem$$Address);
6524 6452    %}
6525 6453    ins_pipe(ialu_reg);
6526 6454  %}
6527 6455  
6528 6456  // Note: Long.bitCount(long) returns an int.
6529 6457  instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
6530 6458    predicate(UsePopCountInstruction);
6531 6459    match(Set dst (PopCountL src));
6532 6460    effect(KILL cr, TEMP tmp, TEMP dst);
6533 6461  
6534 6462    format %{ "POPCNT $dst, $src.lo\n\t"
6535 6463              "POPCNT $tmp, $src.hi\n\t"
6536 6464              "ADD    $dst, $tmp" %}
6537 6465    ins_encode %{
6538 6466      __ popcntl($dst$$Register, $src$$Register);
6539 6467      __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
6540 6468      __ addl($dst$$Register, $tmp$$Register);
6541 6469    %}
6542 6470    ins_pipe(ialu_reg);
6543 6471  %}
6544 6472  
6545 6473  // Note: Long.bitCount(long) returns an int.
6546 6474  instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
6547 6475    predicate(UsePopCountInstruction);
6548 6476    match(Set dst (PopCountL (LoadL mem)));
6549 6477    effect(KILL cr, TEMP tmp, TEMP dst);
6550 6478  
6551 6479    format %{ "POPCNT $dst, $mem\n\t"
6552 6480              "POPCNT $tmp, $mem+4\n\t"
6553 6481              "ADD    $dst, $tmp" %}
6554 6482    ins_encode %{
6555 6483      //__ popcntl($dst$$Register, $mem$$Address$$first);
6556 6484      //__ popcntl($tmp$$Register, $mem$$Address$$second);
6557 6485      __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
6558 6486      __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
6559 6487      __ addl($dst$$Register, $tmp$$Register);
6560 6488    %}
6561 6489    ins_pipe(ialu_reg);
6562 6490  %}
6563 6491  
6564 6492  
6565 6493  //----------Load/Store/Move Instructions---------------------------------------
6566 6494  //----------Load Instructions--------------------------------------------------
6567 6495  // Load Byte (8bit signed)
6568 6496  instruct loadB(xRegI dst, memory mem) %{
6569 6497    match(Set dst (LoadB mem));
6570 6498  
6571 6499    ins_cost(125);
6572 6500    format %{ "MOVSX8 $dst,$mem\t# byte" %}
6573 6501  
6574 6502    ins_encode %{
6575 6503      __ movsbl($dst$$Register, $mem$$Address);
6576 6504    %}
6577 6505  
6578 6506    ins_pipe(ialu_reg_mem);
6579 6507  %}
6580 6508  
6581 6509  // Load Byte (8bit signed) into Long Register
6582 6510  instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6583 6511    match(Set dst (ConvI2L (LoadB mem)));
6584 6512    effect(KILL cr);
6585 6513  
6586 6514    ins_cost(375);
6587 6515    format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
6588 6516              "MOV    $dst.hi,$dst.lo\n\t"
6589 6517              "SAR    $dst.hi,7" %}
6590 6518  
6591 6519    ins_encode %{
6592 6520      __ movsbl($dst$$Register, $mem$$Address);
6593 6521      __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6594 6522      __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
6595 6523    %}
6596 6524  
6597 6525    ins_pipe(ialu_reg_mem);
6598 6526  %}
6599 6527  
6600 6528  // Load Unsigned Byte (8bit UNsigned)
6601 6529  instruct loadUB(xRegI dst, memory mem) %{
6602 6530    match(Set dst (LoadUB mem));
6603 6531  
6604 6532    ins_cost(125);
6605 6533    format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
6606 6534  
6607 6535    ins_encode %{
6608 6536      __ movzbl($dst$$Register, $mem$$Address);
6609 6537    %}
6610 6538  
6611 6539    ins_pipe(ialu_reg_mem);
6612 6540  %}
6613 6541  
6614 6542  // Load Unsigned Byte (8 bit UNsigned) into Long Register
6615 6543  instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6616 6544    match(Set dst (ConvI2L (LoadUB mem)));
6617 6545    effect(KILL cr);
6618 6546  
6619 6547    ins_cost(250);
6620 6548    format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
6621 6549              "XOR    $dst.hi,$dst.hi" %}
6622 6550  
6623 6551    ins_encode %{
6624 6552      Register Rdst = $dst$$Register;
6625 6553      __ movzbl(Rdst, $mem$$Address);
6626 6554      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6627 6555    %}
6628 6556  
6629 6557    ins_pipe(ialu_reg_mem);
6630 6558  %}
6631 6559  
6632 6560  // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
6633 6561  instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
6634 6562    match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6635 6563    effect(KILL cr);
6636 6564  
6637 6565    format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
6638 6566              "XOR    $dst.hi,$dst.hi\n\t"
6639 6567              "AND    $dst.lo,$mask" %}
6640 6568    ins_encode %{
6641 6569      Register Rdst = $dst$$Register;
6642 6570      __ movzbl(Rdst, $mem$$Address);
6643 6571      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6644 6572      __ andl(Rdst, $mask$$constant);
6645 6573    %}
6646 6574    ins_pipe(ialu_reg_mem);
6647 6575  %}
6648 6576  
6649 6577  // Load Short (16bit signed)
6650 6578  instruct loadS(eRegI dst, memory mem) %{
6651 6579    match(Set dst (LoadS mem));
6652 6580  
6653 6581    ins_cost(125);
6654 6582    format %{ "MOVSX  $dst,$mem\t# short" %}
6655 6583  
6656 6584    ins_encode %{
6657 6585      __ movswl($dst$$Register, $mem$$Address);
6658 6586    %}
6659 6587  
6660 6588    ins_pipe(ialu_reg_mem);
6661 6589  %}
6662 6590  
6663 6591  // Load Short (16 bit signed) to Byte (8 bit signed)
6664 6592  instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6665 6593    match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6666 6594  
6667 6595    ins_cost(125);
6668 6596    format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
6669 6597    ins_encode %{
6670 6598      __ movsbl($dst$$Register, $mem$$Address);
6671 6599    %}
6672 6600    ins_pipe(ialu_reg_mem);
6673 6601  %}
6674 6602  
6675 6603  // Load Short (16bit signed) into Long Register
6676 6604  instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6677 6605    match(Set dst (ConvI2L (LoadS mem)));
6678 6606    effect(KILL cr);
6679 6607  
6680 6608    ins_cost(375);
6681 6609    format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
6682 6610              "MOV    $dst.hi,$dst.lo\n\t"
6683 6611              "SAR    $dst.hi,15" %}
6684 6612  
6685 6613    ins_encode %{
6686 6614      __ movswl($dst$$Register, $mem$$Address);
6687 6615      __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6688 6616      __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
6689 6617    %}
6690 6618  
6691 6619    ins_pipe(ialu_reg_mem);
6692 6620  %}
6693 6621  
6694 6622  // Load Unsigned Short/Char (16bit unsigned)
6695 6623  instruct loadUS(eRegI dst, memory mem) %{
6696 6624    match(Set dst (LoadUS mem));
6697 6625  
6698 6626    ins_cost(125);
6699 6627    format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
6700 6628  
6701 6629    ins_encode %{
6702 6630      __ movzwl($dst$$Register, $mem$$Address);
6703 6631    %}
6704 6632  
6705 6633    ins_pipe(ialu_reg_mem);
6706 6634  %}
6707 6635  
6708 6636  // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6709 6637  instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6710 6638    match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6711 6639  
6712 6640    ins_cost(125);
6713 6641    format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
6714 6642    ins_encode %{
6715 6643      __ movsbl($dst$$Register, $mem$$Address);
6716 6644    %}
6717 6645    ins_pipe(ialu_reg_mem);
6718 6646  %}
6719 6647  
6720 6648  // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6721 6649  instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6722 6650    match(Set dst (ConvI2L (LoadUS mem)));
6723 6651    effect(KILL cr);
6724 6652  
6725 6653    ins_cost(250);
6726 6654    format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
6727 6655              "XOR    $dst.hi,$dst.hi" %}
6728 6656  
6729 6657    ins_encode %{
6730 6658      __ movzwl($dst$$Register, $mem$$Address);
6731 6659      __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6732 6660    %}
6733 6661  
6734 6662    ins_pipe(ialu_reg_mem);
6735 6663  %}
6736 6664  
6737 6665  // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6738 6666  instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6739 6667    match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6740 6668    effect(KILL cr);
6741 6669  
6742 6670    format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
6743 6671              "XOR    $dst.hi,$dst.hi" %}
6744 6672    ins_encode %{
6745 6673      Register Rdst = $dst$$Register;
6746 6674      __ movzbl(Rdst, $mem$$Address);
6747 6675      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6748 6676    %}
6749 6677    ins_pipe(ialu_reg_mem);
6750 6678  %}
6751 6679  
6752 6680  // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
6753 6681  instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
6754 6682    match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6755 6683    effect(KILL cr);
6756 6684  
6757 6685    format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6758 6686              "XOR    $dst.hi,$dst.hi\n\t"
6759 6687              "AND    $dst.lo,$mask" %}
6760 6688    ins_encode %{
6761 6689      Register Rdst = $dst$$Register;
6762 6690      __ movzwl(Rdst, $mem$$Address);
6763 6691      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6764 6692      __ andl(Rdst, $mask$$constant);
6765 6693    %}
6766 6694    ins_pipe(ialu_reg_mem);
6767 6695  %}
6768 6696  
6769 6697  // Load Integer
6770 6698  instruct loadI(eRegI dst, memory mem) %{
6771 6699    match(Set dst (LoadI mem));
6772 6700  
6773 6701    ins_cost(125);
6774 6702    format %{ "MOV    $dst,$mem\t# int" %}
6775 6703  
6776 6704    ins_encode %{
6777 6705      __ movl($dst$$Register, $mem$$Address);
6778 6706    %}
6779 6707  
6780 6708    ins_pipe(ialu_reg_mem);
6781 6709  %}
6782 6710  
6783 6711  // Load Integer (32 bit signed) to Byte (8 bit signed)
6784 6712  instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6785 6713    match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6786 6714  
6787 6715    ins_cost(125);
6788 6716    format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
6789 6717    ins_encode %{
6790 6718      __ movsbl($dst$$Register, $mem$$Address);
6791 6719    %}
6792 6720    ins_pipe(ialu_reg_mem);
6793 6721  %}
6794 6722  
6795 6723  // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6796 6724  instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
6797 6725    match(Set dst (AndI (LoadI mem) mask));
6798 6726  
6799 6727    ins_cost(125);
6800 6728    format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
6801 6729    ins_encode %{
6802 6730      __ movzbl($dst$$Register, $mem$$Address);
6803 6731    %}
6804 6732    ins_pipe(ialu_reg_mem);
6805 6733  %}
6806 6734  
6807 6735  // Load Integer (32 bit signed) to Short (16 bit signed)
6808 6736  instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
6809 6737    match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6810 6738  
6811 6739    ins_cost(125);
6812 6740    format %{ "MOVSX  $dst, $mem\t# int -> short" %}
6813 6741    ins_encode %{
6814 6742      __ movswl($dst$$Register, $mem$$Address);
6815 6743    %}
6816 6744    ins_pipe(ialu_reg_mem);
6817 6745  %}
6818 6746  
6819 6747  // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6820 6748  instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
6821 6749    match(Set dst (AndI (LoadI mem) mask));
6822 6750  
6823 6751    ins_cost(125);
6824 6752    format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
6825 6753    ins_encode %{
6826 6754      __ movzwl($dst$$Register, $mem$$Address);
6827 6755    %}
6828 6756    ins_pipe(ialu_reg_mem);
6829 6757  %}
6830 6758  
6831 6759  // Load Integer into Long Register
6832 6760  instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6833 6761    match(Set dst (ConvI2L (LoadI mem)));
6834 6762    effect(KILL cr);
6835 6763  
6836 6764    ins_cost(375);
6837 6765    format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
6838 6766              "MOV    $dst.hi,$dst.lo\n\t"
6839 6767              "SAR    $dst.hi,31" %}
6840 6768  
6841 6769    ins_encode %{
6842 6770      __ movl($dst$$Register, $mem$$Address);
6843 6771      __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6844 6772      __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
6845 6773    %}
6846 6774  
6847 6775    ins_pipe(ialu_reg_mem);
6848 6776  %}
6849 6777  
6850 6778  // Load Integer with mask 0xFF into Long Register
6851 6779  instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6852 6780    match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6853 6781    effect(KILL cr);
6854 6782  
6855 6783    format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
6856 6784              "XOR    $dst.hi,$dst.hi" %}
6857 6785    ins_encode %{
6858 6786      Register Rdst = $dst$$Register;
6859 6787      __ movzbl(Rdst, $mem$$Address);
6860 6788      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6861 6789    %}
6862 6790    ins_pipe(ialu_reg_mem);
6863 6791  %}
6864 6792  
6865 6793  // Load Integer with mask 0xFFFF into Long Register
6866 6794  instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
6867 6795    match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6868 6796    effect(KILL cr);
6869 6797  
6870 6798    format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
6871 6799              "XOR    $dst.hi,$dst.hi" %}
6872 6800    ins_encode %{
6873 6801      Register Rdst = $dst$$Register;
6874 6802      __ movzwl(Rdst, $mem$$Address);
6875 6803      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6876 6804    %}
6877 6805    ins_pipe(ialu_reg_mem);
6878 6806  %}
6879 6807  
6880 6808  // Load Integer with 32-bit mask into Long Register
6881 6809  instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
6882 6810    match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6883 6811    effect(KILL cr);
6884 6812  
6885 6813    format %{ "MOV    $dst.lo,$mem\t# int & 32-bit mask -> long\n\t"
6886 6814              "XOR    $dst.hi,$dst.hi\n\t"
6887 6815              "AND    $dst.lo,$mask" %}
6888 6816    ins_encode %{
6889 6817      Register Rdst = $dst$$Register;
6890 6818      __ movl(Rdst, $mem$$Address);
6891 6819      __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6892 6820      __ andl(Rdst, $mask$$constant);
6893 6821    %}
6894 6822    ins_pipe(ialu_reg_mem);
6895 6823  %}
6896 6824  
6897 6825  // Load Unsigned Integer into Long Register
6898 6826  instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6899 6827    match(Set dst (LoadUI2L mem));
6900 6828    effect(KILL cr);
6901 6829  
6902 6830    ins_cost(250);
6903 6831    format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
6904 6832              "XOR    $dst.hi,$dst.hi" %}
6905 6833  
6906 6834    ins_encode %{
6907 6835      __ movl($dst$$Register, $mem$$Address);
6908 6836      __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6909 6837    %}
6910 6838  
6911 6839    ins_pipe(ialu_reg_mem);
6912 6840  %}
6913 6841  
6914 6842  // Load Long.  Cannot clobber address while loading, so restrict address
6915 6843  // register to ESI
6916 6844  instruct loadL(eRegL dst, load_long_memory mem) %{
6917 6845    predicate(!((LoadLNode*)n)->require_atomic_access());
6918 6846    match(Set dst (LoadL mem));
6919 6847  
6920 6848    ins_cost(250);
6921 6849    format %{ "MOV    $dst.lo,$mem\t# long\n\t"
6922 6850              "MOV    $dst.hi,$mem+4" %}
6923 6851  
6924 6852    ins_encode %{
6925 6853      Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
6926 6854      Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
6927 6855      __ movl($dst$$Register, Amemlo);
6928 6856      __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6929 6857    %}
6930 6858  
6931 6859    ins_pipe(ialu_reg_long_mem);
6932 6860  %}
6933 6861  
6934 6862  // Volatile Load Long.  Must be atomic, so do 64-bit FILD
6935 6863  // then store it down to the stack and reload on the int
6936 6864  // side.
6937 6865  instruct loadL_volatile(stackSlotL dst, memory mem) %{
6938 6866    predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6939 6867    match(Set dst (LoadL mem));
6940 6868  
6941 6869    ins_cost(200);
6942 6870    format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
6943 6871              "FISTp  $dst" %}
6944 6872    ins_encode(enc_loadL_volatile(mem,dst));
6945 6873    ins_pipe( fpu_reg_mem );
6946 6874  %}
6947 6875  
6948 6876  instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6949 6877    predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6950 6878    match(Set dst (LoadL mem));
6951 6879    effect(TEMP tmp);
6952 6880    ins_cost(180);
6953 6881    format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6954 6882              "MOVSD  $dst,$tmp" %}
6955 6883    ins_encode(enc_loadLX_volatile(mem, dst, tmp));
6956 6884    ins_pipe( pipe_slow );
6957 6885  %}
6958 6886  
6959 6887  instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6960 6888    predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6961 6889    match(Set dst (LoadL mem));
6962 6890    effect(TEMP tmp);
6963 6891    ins_cost(160);
6964 6892    format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6965 6893              "MOVD   $dst.lo,$tmp\n\t"
6966 6894              "PSRLQ  $tmp,32\n\t"
6967 6895              "MOVD   $dst.hi,$tmp" %}
6968 6896    ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
6969 6897    ins_pipe( pipe_slow );
6970 6898  %}
6971 6899  
6972 6900  // Load Range
6973 6901  instruct loadRange(eRegI dst, memory mem) %{
6974 6902    match(Set dst (LoadRange mem));
6975 6903  
6976 6904    ins_cost(125);
6977 6905    format %{ "MOV    $dst,$mem" %}
6978 6906    opcode(0x8B);
6979 6907    ins_encode( OpcP, RegMem(dst,mem));
6980 6908    ins_pipe( ialu_reg_mem );
6981 6909  %}
6982 6910  
6983 6911  
6984 6912  // Load Pointer
6985 6913  instruct loadP(eRegP dst, memory mem) %{
6986 6914    match(Set dst (LoadP mem));
6987 6915  
6988 6916    ins_cost(125);
6989 6917    format %{ "MOV    $dst,$mem" %}
6990 6918    opcode(0x8B);
6991 6919    ins_encode( OpcP, RegMem(dst,mem));
6992 6920    ins_pipe( ialu_reg_mem );
6993 6921  %}
6994 6922  
6995 6923  // Load Klass Pointer
6996 6924  instruct loadKlass(eRegP dst, memory mem) %{
6997 6925    match(Set dst (LoadKlass mem));
6998 6926  
6999 6927    ins_cost(125);
7000 6928    format %{ "MOV    $dst,$mem" %}
7001 6929    opcode(0x8B);
7002 6930    ins_encode( OpcP, RegMem(dst,mem));
7003 6931    ins_pipe( ialu_reg_mem );
7004 6932  %}
7005 6933  
7006 6934  // Load Double
7007 6935  instruct loadD(regD dst, memory mem) %{
7008 6936    predicate(UseSSE<=1);
7009 6937    match(Set dst (LoadD mem));
7010 6938  
7011 6939    ins_cost(150);
7012 6940    format %{ "FLD_D  ST,$mem\n\t"
7013 6941              "FSTP   $dst" %}
7014 6942    opcode(0xDD);               /* DD /0 */
7015 6943    ins_encode( OpcP, RMopc_Mem(0x00,mem),
7016 6944                Pop_Reg_D(dst) );
7017 6945    ins_pipe( fpu_reg_mem );
7018 6946  %}
7019 6947  
7020 6948  // Load Double to XMM
7021 6949  instruct loadXD(regXD dst, memory mem) %{
7022 6950    predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
7023 6951    match(Set dst (LoadD mem));
7024 6952    ins_cost(145);
7025 6953    format %{ "MOVSD  $dst,$mem" %}
7026 6954    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
7027 6955    ins_pipe( pipe_slow );
7028 6956  %}
7029 6957  
7030 6958  instruct loadXD_partial(regXD dst, memory mem) %{
7031 6959    predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
7032 6960    match(Set dst (LoadD mem));
7033 6961    ins_cost(145);
7034 6962    format %{ "MOVLPD $dst,$mem" %}
7035 6963    ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
7036 6964    ins_pipe( pipe_slow );
7037 6965  %}
7038 6966  
7039 6967  // Load to XMM register (single-precision floating point)
7040 6968  // MOVSS instruction
7041 6969  instruct loadX(regX dst, memory mem) %{
7042 6970    predicate(UseSSE>=1);
7043 6971    match(Set dst (LoadF mem));
7044 6972    ins_cost(145);
7045 6973    format %{ "MOVSS  $dst,$mem" %}
7046 6974    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
7047 6975    ins_pipe( pipe_slow );
7048 6976  %}
7049 6977  
7050 6978  // Load Float
7051 6979  instruct loadF(regF dst, memory mem) %{
7052 6980    predicate(UseSSE==0);
7053 6981    match(Set dst (LoadF mem));
7054 6982  
7055 6983    ins_cost(150);
7056 6984    format %{ "FLD_S  ST,$mem\n\t"
7057 6985              "FSTP   $dst" %}
7058 6986    opcode(0xD9);               /* D9 /0 */
7059 6987    ins_encode( OpcP, RMopc_Mem(0x00,mem),
7060 6988                Pop_Reg_F(dst) );
7061 6989    ins_pipe( fpu_reg_mem );
7062 6990  %}
7063 6991  
7064 6992  // Load Aligned Packed Byte to XMM register
7065 6993  instruct loadA8B(regXD dst, memory mem) %{
7066 6994    predicate(UseSSE>=1);
7067 6995    match(Set dst (Load8B mem));
7068 6996    ins_cost(125);
7069 6997    format %{ "MOVQ  $dst,$mem\t! packed8B" %}
7070 6998    ins_encode( movq_ld(dst, mem));
7071 6999    ins_pipe( pipe_slow );
7072 7000  %}
7073 7001  
7074 7002  // Load Aligned Packed Short to XMM register
7075 7003  instruct loadA4S(regXD dst, memory mem) %{
7076 7004    predicate(UseSSE>=1);
7077 7005    match(Set dst (Load4S mem));
7078 7006    ins_cost(125);
7079 7007    format %{ "MOVQ  $dst,$mem\t! packed4S" %}
7080 7008    ins_encode( movq_ld(dst, mem));
7081 7009    ins_pipe( pipe_slow );
7082 7010  %}
7083 7011  
7084 7012  // Load Aligned Packed Char to XMM register
7085 7013  instruct loadA4C(regXD dst, memory mem) %{
7086 7014    predicate(UseSSE>=1);
7087 7015    match(Set dst (Load4C mem));
7088 7016    ins_cost(125);
7089 7017    format %{ "MOVQ  $dst,$mem\t! packed4C" %}
7090 7018    ins_encode( movq_ld(dst, mem));
7091 7019    ins_pipe( pipe_slow );
7092 7020  %}
7093 7021  
7094 7022  // Load Aligned Packed Integer to XMM register
7095 7023  instruct load2IU(regXD dst, memory mem) %{
7096 7024    predicate(UseSSE>=1);
7097 7025    match(Set dst (Load2I mem));
7098 7026    ins_cost(125);
7099 7027    format %{ "MOVQ  $dst,$mem\t! packed2I" %}
7100 7028    ins_encode( movq_ld(dst, mem));
7101 7029    ins_pipe( pipe_slow );
7102 7030  %}
7103 7031  
7104 7032  // Load Aligned Packed Single to XMM
7105 7033  instruct loadA2F(regXD dst, memory mem) %{
7106 7034    predicate(UseSSE>=1);
7107 7035    match(Set dst (Load2F mem));
7108 7036    ins_cost(145);
7109 7037    format %{ "MOVQ  $dst,$mem\t! packed2F" %}
7110 7038    ins_encode( movq_ld(dst, mem));
7111 7039    ins_pipe( pipe_slow );
7112 7040  %}
7113 7041  
7114 7042  // Load Effective Address
7115 7043  instruct leaP8(eRegP dst, indOffset8 mem) %{
7116 7044    match(Set dst mem);
7117 7045  
7118 7046    ins_cost(110);
7119 7047    format %{ "LEA    $dst,$mem" %}
7120 7048    opcode(0x8D);
7121 7049    ins_encode( OpcP, RegMem(dst,mem));
7122 7050    ins_pipe( ialu_reg_reg_fat );
7123 7051  %}
7124 7052  
7125 7053  instruct leaP32(eRegP dst, indOffset32 mem) %{
7126 7054    match(Set dst mem);
7127 7055  
7128 7056    ins_cost(110);
7129 7057    format %{ "LEA    $dst,$mem" %}
7130 7058    opcode(0x8D);
7131 7059    ins_encode( OpcP, RegMem(dst,mem));
7132 7060    ins_pipe( ialu_reg_reg_fat );
7133 7061  %}
7134 7062  
7135 7063  instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
7136 7064    match(Set dst mem);
7137 7065  
7138 7066    ins_cost(110);
7139 7067    format %{ "LEA    $dst,$mem" %}
7140 7068    opcode(0x8D);
7141 7069    ins_encode( OpcP, RegMem(dst,mem));
7142 7070    ins_pipe( ialu_reg_reg_fat );
7143 7071  %}
7144 7072  
7145 7073  instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
7146 7074    match(Set dst mem);
7147 7075  
7148 7076    ins_cost(110);
7149 7077    format %{ "LEA    $dst,$mem" %}
7150 7078    opcode(0x8D);
7151 7079    ins_encode( OpcP, RegMem(dst,mem));
7152 7080    ins_pipe( ialu_reg_reg_fat );
7153 7081  %}
7154 7082  
7155 7083  instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
7156 7084    match(Set dst mem);
7157 7085  
7158 7086    ins_cost(110);
7159 7087    format %{ "LEA    $dst,$mem" %}
7160 7088    opcode(0x8D);
7161 7089    ins_encode( OpcP, RegMem(dst,mem));
7162 7090    ins_pipe( ialu_reg_reg_fat );
7163 7091  %}
7164 7092  
7165 7093  // Load Constant
7166 7094  instruct loadConI(eRegI dst, immI src) %{
7167 7095    match(Set dst src);
7168 7096  
7169 7097    format %{ "MOV    $dst,$src" %}
7170 7098    ins_encode( LdImmI(dst, src) );
7171 7099    ins_pipe( ialu_reg_fat );
7172 7100  %}
7173 7101  
7174 7102  // Load Constant zero
7175 7103  instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
7176 7104    match(Set dst src);
7177 7105    effect(KILL cr);
7178 7106  
7179 7107    ins_cost(50);
7180 7108    format %{ "XOR    $dst,$dst" %}
7181 7109    opcode(0x33);  /* + rd */
7182 7110    ins_encode( OpcP, RegReg( dst, dst ) );
7183 7111    ins_pipe( ialu_reg );
7184 7112  %}
7185 7113  
7186 7114  instruct loadConP(eRegP dst, immP src) %{
7187 7115    match(Set dst src);
7188 7116  
7189 7117    format %{ "MOV    $dst,$src" %}
7190 7118    opcode(0xB8);  /* + rd */
7191 7119    ins_encode( LdImmP(dst, src) );
7192 7120    ins_pipe( ialu_reg_fat );
7193 7121  %}
7194 7122  
7195 7123  instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
7196 7124    match(Set dst src);
7197 7125    effect(KILL cr);
7198 7126    ins_cost(200);
7199 7127    format %{ "MOV    $dst.lo,$src.lo\n\t"
7200 7128              "MOV    $dst.hi,$src.hi" %}
7201 7129    opcode(0xB8);
7202 7130    ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
7203 7131    ins_pipe( ialu_reg_long_fat );
7204 7132  %}
7205 7133  
7206 7134  instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
7207 7135    match(Set dst src);

↓ open down ↓

2350 lines elided

↑ open up ↑

7208 7136    effect(KILL cr);
7209 7137    ins_cost(150);
7210 7138    format %{ "XOR    $dst.lo,$dst.lo\n\t"
7211 7139              "XOR    $dst.hi,$dst.hi" %}
7212 7140    opcode(0x33,0x33);
7213 7141    ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
7214 7142    ins_pipe( ialu_reg_long );
7215 7143  %}
7216 7144  
7217 7145  // The instruction usage is guarded by predicate in operand immF().
7218      -instruct loadConF(regF dst, immF src) %{
7219      -  match(Set dst src);
     7146 +instruct loadConF(regF dst, immF con) %{
     7147 +  match(Set dst con);
7220 7148    ins_cost(125);
     7149 +  format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
     7150 +            "FSTP   $dst" %}
     7151 +  ins_encode %{
     7152 +    __ fld_s($constantaddress($con));
     7153 +    __ fstp_d($dst$$reg);
     7154 +  %}
     7155 +  ins_pipe(fpu_reg_con);
     7156 +%}
7221 7157  
7222      -  format %{ "FLD_S  ST,$src\n\t"
     7158 +// The instruction usage is guarded by predicate in operand immF0().
     7159 +instruct loadConF0(regF dst, immF0 con) %{
     7160 +  match(Set dst con);
     7161 +  ins_cost(125);
     7162 +  format %{ "FLDZ   ST\n\t"
7223 7163              "FSTP   $dst" %}
7224      -  opcode(0xD9, 0x00);       /* D9 /0 */
7225      -  ins_encode(LdImmF(src), Pop_Reg_F(dst) );
7226      -  ins_pipe( fpu_reg_con );
     7164 +  ins_encode %{
     7165 +    __ fldz();
     7166 +    __ fstp_d($dst$$reg);
     7167 +  %}
     7168 +  ins_pipe(fpu_reg_con);
     7169 +%}
     7170 +
     7171 +// The instruction usage is guarded by predicate in operand immF1().
     7172 +instruct loadConF1(regF dst, immF1 con) %{
     7173 +  match(Set dst con);
     7174 +  ins_cost(125);
     7175 +  format %{ "FLD1   ST\n\t"
     7176 +            "FSTP   $dst" %}
     7177 +  ins_encode %{
     7178 +    __ fld1();
     7179 +    __ fstp_d($dst$$reg);
     7180 +  %}
     7181 +  ins_pipe(fpu_reg_con);
7227 7182  %}
7228 7183  
7229 7184  // The instruction usage is guarded by predicate in operand immXF().
7230 7185  instruct loadConX(regX dst, immXF con) %{
7231 7186    match(Set dst con);
7232 7187    ins_cost(125);
7233      -  format %{ "MOVSS  $dst,[$con]" %}
7234      -  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), LdImmX(dst, con));
7235      -  ins_pipe( pipe_slow );
     7188 +  format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
     7189 +  ins_encode %{
     7190 +    __ movflt($dst$$XMMRegister, $constantaddress($con));
     7191 +  %}
     7192 +  ins_pipe(pipe_slow);
7236 7193  %}
7237 7194  
7238 7195  // The instruction usage is guarded by predicate in operand immXF0().
7239 7196  instruct loadConX0(regX dst, immXF0 src) %{
7240 7197    match(Set dst src);
7241 7198    ins_cost(100);
7242 7199    format %{ "XORPS  $dst,$dst\t# float 0.0" %}
7243      -  ins_encode( Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7244      -  ins_pipe( pipe_slow );
     7200 +  ins_encode %{
     7201 +    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
     7202 +  %}
     7203 +  ins_pipe(pipe_slow);
7245 7204  %}
7246 7205  
7247 7206  // The instruction usage is guarded by predicate in operand immD().
7248      -instruct loadConD(regD dst, immD src) %{
7249      -  match(Set dst src);
     7207 +instruct loadConD(regD dst, immD con) %{
     7208 +  match(Set dst con);
     7209 +  ins_cost(125);
     7210 +
     7211 +  format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
     7212 +            "FSTP   $dst" %}
     7213 +  ins_encode %{
     7214 +    __ fld_d($constantaddress($con));
     7215 +    __ fstp_d($dst$$reg);
     7216 +  %}
     7217 +  ins_pipe(fpu_reg_con);
     7218 +%}
     7219 +
     7220 +// The instruction usage is guarded by predicate in operand immD0().
     7221 +instruct loadConD0(regD dst, immD0 con) %{
     7222 +  match(Set dst con);
     7223 +  ins_cost(125);
     7224 +
     7225 +  format %{ "FLDZ   ST\n\t"
     7226 +            "FSTP   $dst" %}
     7227 +  ins_encode %{
     7228 +    __ fldz();
     7229 +    __ fstp_d($dst$$reg);
     7230 +  %}
     7231 +  ins_pipe(fpu_reg_con);
     7232 +%}
     7233 +
     7234 +// The instruction usage is guarded by predicate in operand immD1().
     7235 +instruct loadConD1(regD dst, immD1 con) %{
     7236 +  match(Set dst con);
7250 7237    ins_cost(125);
7251 7238  
7252      -  format %{ "FLD_D  ST,$src\n\t"
     7239 +  format %{ "FLD1   ST\n\t"
7253 7240              "FSTP   $dst" %}
7254      -  ins_encode(LdImmD(src), Pop_Reg_D(dst) );
7255      -  ins_pipe( fpu_reg_con );
     7241 +  ins_encode %{
     7242 +    __ fld1();
     7243 +    __ fstp_d($dst$$reg);
     7244 +  %}
     7245 +  ins_pipe(fpu_reg_con);
7256 7246  %}
7257 7247  
7258 7248  // The instruction usage is guarded by predicate in operand immXD().
7259 7249  instruct loadConXD(regXD dst, immXD con) %{
7260 7250    match(Set dst con);
7261 7251    ins_cost(125);
7262      -  format %{ "MOVSD  $dst,[$con]" %}
7263      -  ins_encode(load_conXD(dst, con));
7264      -  ins_pipe( pipe_slow );
     7252 +  format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
     7253 +  ins_encode %{
     7254 +    __ movdbl($dst$$XMMRegister, $constantaddress($con));
     7255 +  %}
     7256 +  ins_pipe(pipe_slow);
7265 7257  %}
7266 7258  
7267 7259  // The instruction usage is guarded by predicate in operand immXD0().
7268 7260  instruct loadConXD0(regXD dst, immXD0 src) %{
7269 7261    match(Set dst src);
7270 7262    ins_cost(100);
7271 7263    format %{ "XORPD  $dst,$dst\t# double 0.0" %}
7272 7264    ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7273 7265    ins_pipe( pipe_slow );
7274 7266  %}

7275 7267  
7276 7268  // Load Stack Slot
7277 7269  instruct loadSSI(eRegI dst, stackSlotI src) %{
7278 7270    match(Set dst src);
7279 7271    ins_cost(125);
7280 7272  
7281 7273    format %{ "MOV    $dst,$src" %}
7282 7274    opcode(0x8B);
7283 7275    ins_encode( OpcP, RegMem(dst,src));
7284 7276    ins_pipe( ialu_reg_mem );
7285 7277  %}
7286 7278  
7287 7279  instruct loadSSL(eRegL dst, stackSlotL src) %{
7288 7280    match(Set dst src);
7289 7281  
7290 7282    ins_cost(200);
7291 7283    format %{ "MOV    $dst,$src.lo\n\t"
7292 7284              "MOV    $dst+4,$src.hi" %}
7293 7285    opcode(0x8B, 0x8B);
7294 7286    ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
7295 7287    ins_pipe( ialu_mem_long_reg );
7296 7288  %}
7297 7289  
7298 7290  // Load Stack Slot
7299 7291  instruct loadSSP(eRegP dst, stackSlotP src) %{
7300 7292    match(Set dst src);
7301 7293    ins_cost(125);
7302 7294  
7303 7295    format %{ "MOV    $dst,$src" %}
7304 7296    opcode(0x8B);
7305 7297    ins_encode( OpcP, RegMem(dst,src));
7306 7298    ins_pipe( ialu_reg_mem );
7307 7299  %}
7308 7300  
7309 7301  // Load Stack Slot
7310 7302  instruct loadSSF(regF dst, stackSlotF src) %{
7311 7303    match(Set dst src);
7312 7304    ins_cost(125);
7313 7305  
7314 7306    format %{ "FLD_S  $src\n\t"
7315 7307              "FSTP   $dst" %}
7316 7308    opcode(0xD9);               /* D9 /0, FLD m32real */
7317 7309    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7318 7310                Pop_Reg_F(dst) );
7319 7311    ins_pipe( fpu_reg_mem );
7320 7312  %}
7321 7313  
7322 7314  // Load Stack Slot
7323 7315  instruct loadSSD(regD dst, stackSlotD src) %{
7324 7316    match(Set dst src);
7325 7317    ins_cost(125);
7326 7318  
7327 7319    format %{ "FLD_D  $src\n\t"
7328 7320              "FSTP   $dst" %}
7329 7321    opcode(0xDD);               /* DD /0, FLD m64real */
7330 7322    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7331 7323                Pop_Reg_D(dst) );
7332 7324    ins_pipe( fpu_reg_mem );
7333 7325  %}
7334 7326  
7335 7327  // Prefetch instructions.
7336 7328  // Must be safe to execute with invalid address (cannot fault).
7337 7329  
7338 7330  instruct prefetchr0( memory mem ) %{
7339 7331    predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7340 7332    match(PrefetchRead mem);
7341 7333    ins_cost(0);
7342 7334    size(0);
7343 7335    format %{ "PREFETCHR (non-SSE is empty encoding)" %}
7344 7336    ins_encode();
7345 7337    ins_pipe(empty);
7346 7338  %}
7347 7339  
7348 7340  instruct prefetchr( memory mem ) %{
7349 7341    predicate(UseSSE==0 && VM_Version::supports_3dnow() || ReadPrefetchInstr==3);
7350 7342    match(PrefetchRead mem);
7351 7343    ins_cost(100);
7352 7344  
7353 7345    format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
7354 7346    opcode(0x0F, 0x0d);     /* Opcode 0F 0d /0 */
7355 7347    ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7356 7348    ins_pipe(ialu_mem);
7357 7349  %}
7358 7350  
7359 7351  instruct prefetchrNTA( memory mem ) %{
7360 7352    predicate(UseSSE>=1 && ReadPrefetchInstr==0);
7361 7353    match(PrefetchRead mem);
7362 7354    ins_cost(100);
7363 7355  
7364 7356    format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
7365 7357    opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7366 7358    ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7367 7359    ins_pipe(ialu_mem);
7368 7360  %}
7369 7361  
7370 7362  instruct prefetchrT0( memory mem ) %{
7371 7363    predicate(UseSSE>=1 && ReadPrefetchInstr==1);
7372 7364    match(PrefetchRead mem);
7373 7365    ins_cost(100);
7374 7366  
7375 7367    format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
7376 7368    opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7377 7369    ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7378 7370    ins_pipe(ialu_mem);
7379 7371  %}
7380 7372  
7381 7373  instruct prefetchrT2( memory mem ) %{
7382 7374    predicate(UseSSE>=1 && ReadPrefetchInstr==2);
7383 7375    match(PrefetchRead mem);
7384 7376    ins_cost(100);
7385 7377  
7386 7378    format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
7387 7379    opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7388 7380    ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7389 7381    ins_pipe(ialu_mem);
7390 7382  %}
7391 7383  
7392 7384  instruct prefetchw0( memory mem ) %{
7393 7385    predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7394 7386    match(PrefetchWrite mem);
7395 7387    ins_cost(0);
7396 7388    size(0);
7397 7389    format %{ "Prefetch (non-SSE is empty encoding)" %}
7398 7390    ins_encode();
7399 7391    ins_pipe(empty);
7400 7392  %}
7401 7393  
7402 7394  instruct prefetchw( memory mem ) %{
7403 7395    predicate(UseSSE==0 && VM_Version::supports_3dnow() || AllocatePrefetchInstr==3);
7404 7396    match( PrefetchWrite mem );
7405 7397    ins_cost(100);
7406 7398  
7407 7399    format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
7408 7400    opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
7409 7401    ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7410 7402    ins_pipe(ialu_mem);
7411 7403  %}
7412 7404  
7413 7405  instruct prefetchwNTA( memory mem ) %{
7414 7406    predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
7415 7407    match(PrefetchWrite mem);
7416 7408    ins_cost(100);
7417 7409  
7418 7410    format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
7419 7411    opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7420 7412    ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7421 7413    ins_pipe(ialu_mem);
7422 7414  %}
7423 7415  
7424 7416  instruct prefetchwT0( memory mem ) %{
7425 7417    predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
7426 7418    match(PrefetchWrite mem);
7427 7419    ins_cost(100);
7428 7420  
7429 7421    format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %}
7430 7422    opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7431 7423    ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7432 7424    ins_pipe(ialu_mem);
7433 7425  %}
7434 7426  
7435 7427  instruct prefetchwT2( memory mem ) %{
7436 7428    predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
7437 7429    match(PrefetchWrite mem);
7438 7430    ins_cost(100);
7439 7431  
7440 7432    format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %}
7441 7433    opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7442 7434    ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7443 7435    ins_pipe(ialu_mem);
7444 7436  %}
7445 7437  
7446 7438  //----------Store Instructions-------------------------------------------------
7447 7439  
7448 7440  // Store Byte
7449 7441  instruct storeB(memory mem, xRegI src) %{
7450 7442    match(Set mem (StoreB mem src));
7451 7443  
7452 7444    ins_cost(125);
7453 7445    format %{ "MOV8   $mem,$src" %}
7454 7446    opcode(0x88);
7455 7447    ins_encode( OpcP, RegMem( src, mem ) );
7456 7448    ins_pipe( ialu_mem_reg );
7457 7449  %}
7458 7450  
7459 7451  // Store Char/Short
7460 7452  instruct storeC(memory mem, eRegI src) %{
7461 7453    match(Set mem (StoreC mem src));
7462 7454  
7463 7455    ins_cost(125);
7464 7456    format %{ "MOV16  $mem,$src" %}
7465 7457    opcode(0x89, 0x66);
7466 7458    ins_encode( OpcS, OpcP, RegMem( src, mem ) );
7467 7459    ins_pipe( ialu_mem_reg );
7468 7460  %}
7469 7461  
7470 7462  // Store Integer
7471 7463  instruct storeI(memory mem, eRegI src) %{
7472 7464    match(Set mem (StoreI mem src));
7473 7465  
7474 7466    ins_cost(125);
7475 7467    format %{ "MOV    $mem,$src" %}
7476 7468    opcode(0x89);
7477 7469    ins_encode( OpcP, RegMem( src, mem ) );
7478 7470    ins_pipe( ialu_mem_reg );
7479 7471  %}
7480 7472  
7481 7473  // Store Long
7482 7474  instruct storeL(long_memory mem, eRegL src) %{
7483 7475    predicate(!((StoreLNode*)n)->require_atomic_access());
7484 7476    match(Set mem (StoreL mem src));
7485 7477  
7486 7478    ins_cost(200);
7487 7479    format %{ "MOV    $mem,$src.lo\n\t"
7488 7480              "MOV    $mem+4,$src.hi" %}
7489 7481    opcode(0x89, 0x89);
7490 7482    ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
7491 7483    ins_pipe( ialu_mem_long_reg );
7492 7484  %}
7493 7485  
7494 7486  // Store Long to Integer
7495 7487  instruct storeL2I(memory mem, eRegL src) %{
7496 7488    match(Set mem (StoreI mem (ConvL2I src)));
7497 7489  
7498 7490    format %{ "MOV    $mem,$src.lo\t# long -> int" %}
7499 7491    ins_encode %{
7500 7492      __ movl($mem$$Address, $src$$Register);
7501 7493    %}
7502 7494    ins_pipe(ialu_mem_reg);
7503 7495  %}
7504 7496  
7505 7497  // Volatile Store Long.  Must be atomic, so move it into
7506 7498  // the FP TOS and then do a 64-bit FIST.  Has to probe the
7507 7499  // target address before the store (for null-ptr checks)
7508 7500  // so the memory operand is used twice in the encoding.
7509 7501  instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
7510 7502    predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7511 7503    match(Set mem (StoreL mem src));
7512 7504    effect( KILL cr );
7513 7505    ins_cost(400);
7514 7506    format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7515 7507              "FILD   $src\n\t"
7516 7508              "FISTp  $mem\t # 64-bit atomic volatile long store" %}
7517 7509    opcode(0x3B);
7518 7510    ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7519 7511    ins_pipe( fpu_reg_mem );
7520 7512  %}
7521 7513  
7522 7514  instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7523 7515    predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7524 7516    match(Set mem (StoreL mem src));
7525 7517    effect( TEMP tmp, KILL cr );
7526 7518    ins_cost(380);
7527 7519    format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7528 7520              "MOVSD  $tmp,$src\n\t"
7529 7521              "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7530 7522    opcode(0x3B);
7531 7523    ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
7532 7524    ins_pipe( pipe_slow );
7533 7525  %}
7534 7526  
7535 7527  instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7536 7528    predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7537 7529    match(Set mem (StoreL mem src));
7538 7530    effect( TEMP tmp2 , TEMP tmp, KILL cr );
7539 7531    ins_cost(360);
7540 7532    format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7541 7533              "MOVD   $tmp,$src.lo\n\t"
7542 7534              "MOVD   $tmp2,$src.hi\n\t"
7543 7535              "PUNPCKLDQ $tmp,$tmp2\n\t"
7544 7536              "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7545 7537    opcode(0x3B);
7546 7538    ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
7547 7539    ins_pipe( pipe_slow );
7548 7540  %}
7549 7541  
7550 7542  // Store Pointer; for storing unknown oops and raw pointers
7551 7543  instruct storeP(memory mem, anyRegP src) %{
7552 7544    match(Set mem (StoreP mem src));
7553 7545  
7554 7546    ins_cost(125);
7555 7547    format %{ "MOV    $mem,$src" %}
7556 7548    opcode(0x89);
7557 7549    ins_encode( OpcP, RegMem( src, mem ) );
7558 7550    ins_pipe( ialu_mem_reg );
7559 7551  %}
7560 7552  
7561 7553  // Store Integer Immediate
7562 7554  instruct storeImmI(memory mem, immI src) %{
7563 7555    match(Set mem (StoreI mem src));
7564 7556  
7565 7557    ins_cost(150);
7566 7558    format %{ "MOV    $mem,$src" %}
7567 7559    opcode(0xC7);               /* C7 /0 */
7568 7560    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7569 7561    ins_pipe( ialu_mem_imm );
7570 7562  %}
7571 7563  
7572 7564  // Store Short/Char Immediate
7573 7565  instruct storeImmI16(memory mem, immI16 src) %{
7574 7566    predicate(UseStoreImmI16);
7575 7567    match(Set mem (StoreC mem src));
7576 7568  
7577 7569    ins_cost(150);
7578 7570    format %{ "MOV16  $mem,$src" %}
7579 7571    opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
7580 7572    ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
7581 7573    ins_pipe( ialu_mem_imm );
7582 7574  %}
7583 7575  
7584 7576  // Store Pointer Immediate; null pointers or constant oops that do not
7585 7577  // need card-mark barriers.
7586 7578  instruct storeImmP(memory mem, immP src) %{
7587 7579    match(Set mem (StoreP mem src));
7588 7580  
7589 7581    ins_cost(150);
7590 7582    format %{ "MOV    $mem,$src" %}
7591 7583    opcode(0xC7);               /* C7 /0 */
7592 7584    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7593 7585    ins_pipe( ialu_mem_imm );
7594 7586  %}
7595 7587  
7596 7588  // Store Byte Immediate
7597 7589  instruct storeImmB(memory mem, immI8 src) %{
7598 7590    match(Set mem (StoreB mem src));
7599 7591  
7600 7592    ins_cost(150);
7601 7593    format %{ "MOV8   $mem,$src" %}
7602 7594    opcode(0xC6);               /* C6 /0 */
7603 7595    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7604 7596    ins_pipe( ialu_mem_imm );
7605 7597  %}
7606 7598  
7607 7599  // Store Aligned Packed Byte XMM register to memory
7608 7600  instruct storeA8B(memory mem, regXD src) %{
7609 7601    predicate(UseSSE>=1);
7610 7602    match(Set mem (Store8B mem src));
7611 7603    ins_cost(145);
7612 7604    format %{ "MOVQ  $mem,$src\t! packed8B" %}
7613 7605    ins_encode( movq_st(mem, src));
7614 7606    ins_pipe( pipe_slow );
7615 7607  %}
7616 7608  
7617 7609  // Store Aligned Packed Char/Short XMM register to memory
7618 7610  instruct storeA4C(memory mem, regXD src) %{
7619 7611    predicate(UseSSE>=1);
7620 7612    match(Set mem (Store4C mem src));
7621 7613    ins_cost(145);
7622 7614    format %{ "MOVQ  $mem,$src\t! packed4C" %}
7623 7615    ins_encode( movq_st(mem, src));
7624 7616    ins_pipe( pipe_slow );
7625 7617  %}
7626 7618  
7627 7619  // Store Aligned Packed Integer XMM register to memory
7628 7620  instruct storeA2I(memory mem, regXD src) %{
7629 7621    predicate(UseSSE>=1);
7630 7622    match(Set mem (Store2I mem src));
7631 7623    ins_cost(145);
7632 7624    format %{ "MOVQ  $mem,$src\t! packed2I" %}
7633 7625    ins_encode( movq_st(mem, src));
7634 7626    ins_pipe( pipe_slow );
7635 7627  %}
7636 7628  
7637 7629  // Store CMS card-mark Immediate
7638 7630  instruct storeImmCM(memory mem, immI8 src) %{
7639 7631    match(Set mem (StoreCM mem src));
7640 7632  
7641 7633    ins_cost(150);
7642 7634    format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
7643 7635    opcode(0xC6);               /* C6 /0 */
7644 7636    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7645 7637    ins_pipe( ialu_mem_imm );
7646 7638  %}
7647 7639  
7648 7640  // Store Double
7649 7641  instruct storeD( memory mem, regDPR1 src) %{
7650 7642    predicate(UseSSE<=1);
7651 7643    match(Set mem (StoreD mem src));
7652 7644  
7653 7645    ins_cost(100);
7654 7646    format %{ "FST_D  $mem,$src" %}
7655 7647    opcode(0xDD);       /* DD /2 */
7656 7648    ins_encode( enc_FP_store(mem,src) );
7657 7649    ins_pipe( fpu_mem_reg );
7658 7650  %}
7659 7651  
7660 7652  // Store double does rounding on x86
7661 7653  instruct storeD_rounded( memory mem, regDPR1 src) %{
7662 7654    predicate(UseSSE<=1);
7663 7655    match(Set mem (StoreD mem (RoundDouble src)));
7664 7656  
7665 7657    ins_cost(100);
7666 7658    format %{ "FST_D  $mem,$src\t# round" %}
7667 7659    opcode(0xDD);       /* DD /2 */
7668 7660    ins_encode( enc_FP_store(mem,src) );
7669 7661    ins_pipe( fpu_mem_reg );
7670 7662  %}
7671 7663  
7672 7664  // Store XMM register to memory (double-precision floating points)
7673 7665  // MOVSD instruction
7674 7666  instruct storeXD(memory mem, regXD src) %{
7675 7667    predicate(UseSSE>=2);
7676 7668    match(Set mem (StoreD mem src));
7677 7669    ins_cost(95);
7678 7670    format %{ "MOVSD  $mem,$src" %}
7679 7671    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7680 7672    ins_pipe( pipe_slow );
7681 7673  %}
7682 7674  
7683 7675  // Store XMM register to memory (single-precision floating point)
7684 7676  // MOVSS instruction
7685 7677  instruct storeX(memory mem, regX src) %{
7686 7678    predicate(UseSSE>=1);
7687 7679    match(Set mem (StoreF mem src));
7688 7680    ins_cost(95);
7689 7681    format %{ "MOVSS  $mem,$src" %}
7690 7682    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7691 7683    ins_pipe( pipe_slow );
7692 7684  %}
7693 7685  
7694 7686  // Store Aligned Packed Single Float XMM register to memory
7695 7687  instruct storeA2F(memory mem, regXD src) %{
7696 7688    predicate(UseSSE>=1);
7697 7689    match(Set mem (Store2F mem src));
7698 7690    ins_cost(145);
7699 7691    format %{ "MOVQ  $mem,$src\t! packed2F" %}
7700 7692    ins_encode( movq_st(mem, src));
7701 7693    ins_pipe( pipe_slow );
7702 7694  %}
7703 7695  
7704 7696  // Store Float
7705 7697  instruct storeF( memory mem, regFPR1 src) %{
7706 7698    predicate(UseSSE==0);
7707 7699    match(Set mem (StoreF mem src));
7708 7700  
7709 7701    ins_cost(100);
7710 7702    format %{ "FST_S  $mem,$src" %}
7711 7703    opcode(0xD9);       /* D9 /2 */
7712 7704    ins_encode( enc_FP_store(mem,src) );
7713 7705    ins_pipe( fpu_mem_reg );
7714 7706  %}
7715 7707  
7716 7708  // Store Float does rounding on x86
7717 7709  instruct storeF_rounded( memory mem, regFPR1 src) %{
7718 7710    predicate(UseSSE==0);
7719 7711    match(Set mem (StoreF mem (RoundFloat src)));
7720 7712  
7721 7713    ins_cost(100);
7722 7714    format %{ "FST_S  $mem,$src\t# round" %}
7723 7715    opcode(0xD9);       /* D9 /2 */
7724 7716    ins_encode( enc_FP_store(mem,src) );
7725 7717    ins_pipe( fpu_mem_reg );
7726 7718  %}
7727 7719  
7728 7720  // Store Float does rounding on x86
7729 7721  instruct storeF_Drounded( memory mem, regDPR1 src) %{
7730 7722    predicate(UseSSE<=1);
7731 7723    match(Set mem (StoreF mem (ConvD2F src)));
7732 7724  
7733 7725    ins_cost(100);
7734 7726    format %{ "FST_S  $mem,$src\t# D-round" %}
7735 7727    opcode(0xD9);       /* D9 /2 */
7736 7728    ins_encode( enc_FP_store(mem,src) );
7737 7729    ins_pipe( fpu_mem_reg );
7738 7730  %}
7739 7731  
7740 7732  // Store immediate Float value (it is faster than store from FPU register)
7741 7733  // The instruction usage is guarded by predicate in operand immF().
7742 7734  instruct storeF_imm( memory mem, immF src) %{
7743 7735    match(Set mem (StoreF mem src));
7744 7736  
7745 7737    ins_cost(50);
7746 7738    format %{ "MOV    $mem,$src\t# store float" %}
7747 7739    opcode(0xC7);               /* C7 /0 */
7748 7740    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
7749 7741    ins_pipe( ialu_mem_imm );
7750 7742  %}
7751 7743  
7752 7744  // Store immediate Float value (it is faster than store from XMM register)
7753 7745  // The instruction usage is guarded by predicate in operand immXF().
7754 7746  instruct storeX_imm( memory mem, immXF src) %{
7755 7747    match(Set mem (StoreF mem src));
7756 7748  
7757 7749    ins_cost(50);
7758 7750    format %{ "MOV    $mem,$src\t# store float" %}
7759 7751    opcode(0xC7);               /* C7 /0 */
7760 7752    ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32XF_as_bits( src ));
7761 7753    ins_pipe( ialu_mem_imm );
7762 7754  %}
7763 7755  
7764 7756  // Store Integer to stack slot
7765 7757  instruct storeSSI(stackSlotI dst, eRegI src) %{
7766 7758    match(Set dst src);
7767 7759  
7768 7760    ins_cost(100);
7769 7761    format %{ "MOV    $dst,$src" %}
7770 7762    opcode(0x89);
7771 7763    ins_encode( OpcPRegSS( dst, src ) );
7772 7764    ins_pipe( ialu_mem_reg );
7773 7765  %}
7774 7766  
7775 7767  // Store Integer to stack slot
7776 7768  instruct storeSSP(stackSlotP dst, eRegP src) %{
7777 7769    match(Set dst src);
7778 7770  
7779 7771    ins_cost(100);
7780 7772    format %{ "MOV    $dst,$src" %}
7781 7773    opcode(0x89);
7782 7774    ins_encode( OpcPRegSS( dst, src ) );
7783 7775    ins_pipe( ialu_mem_reg );
7784 7776  %}
7785 7777  
7786 7778  // Store Long to stack slot
7787 7779  instruct storeSSL(stackSlotL dst, eRegL src) %{
7788 7780    match(Set dst src);
7789 7781  
7790 7782    ins_cost(200);
7791 7783    format %{ "MOV    $dst,$src.lo\n\t"
7792 7784              "MOV    $dst+4,$src.hi" %}
7793 7785    opcode(0x89, 0x89);
7794 7786    ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
7795 7787    ins_pipe( ialu_mem_long_reg );
7796 7788  %}
7797 7789  
7798 7790  //----------MemBar Instructions-----------------------------------------------
7799 7791  // Memory barrier flavors
7800 7792  
7801 7793  instruct membar_acquire() %{
7802 7794    match(MemBarAcquire);
7803 7795    ins_cost(400);
7804 7796  
7805 7797    size(0);
7806 7798    format %{ "MEMBAR-acquire ! (empty encoding)" %}
7807 7799    ins_encode();
7808 7800    ins_pipe(empty);
7809 7801  %}
7810 7802  
7811 7803  instruct membar_acquire_lock() %{
7812 7804    match(MemBarAcquire);
7813 7805    predicate(Matcher::prior_fast_lock(n));
7814 7806    ins_cost(0);
7815 7807  
7816 7808    size(0);
7817 7809    format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7818 7810    ins_encode( );
7819 7811    ins_pipe(empty);
7820 7812  %}
7821 7813  
7822 7814  instruct membar_release() %{
7823 7815    match(MemBarRelease);
7824 7816    ins_cost(400);
7825 7817  
7826 7818    size(0);
7827 7819    format %{ "MEMBAR-release ! (empty encoding)" %}
7828 7820    ins_encode( );
7829 7821    ins_pipe(empty);
7830 7822  %}
7831 7823  
7832 7824  instruct membar_release_lock() %{
7833 7825    match(MemBarRelease);
7834 7826    predicate(Matcher::post_fast_unlock(n));
7835 7827    ins_cost(0);
7836 7828  
7837 7829    size(0);
7838 7830    format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7839 7831    ins_encode( );
7840 7832    ins_pipe(empty);
7841 7833  %}
7842 7834  
7843 7835  instruct membar_volatile(eFlagsReg cr) %{
7844 7836    match(MemBarVolatile);
7845 7837    effect(KILL cr);
7846 7838    ins_cost(400);
7847 7839  
7848 7840    format %{ 
7849 7841      $$template
7850 7842      if (os::is_MP()) {
7851 7843        $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
7852 7844      } else {
7853 7845        $$emit$$"MEMBAR-volatile ! (empty encoding)"
7854 7846      }
7855 7847    %}
7856 7848    ins_encode %{
7857 7849      __ membar(Assembler::StoreLoad);
7858 7850    %}
7859 7851    ins_pipe(pipe_slow);
7860 7852  %}
7861 7853  
7862 7854  instruct unnecessary_membar_volatile() %{
7863 7855    match(MemBarVolatile);
7864 7856    predicate(Matcher::post_store_load_barrier(n));
7865 7857    ins_cost(0);
7866 7858  
7867 7859    size(0);
7868 7860    format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7869 7861    ins_encode( );
7870 7862    ins_pipe(empty);
7871 7863  %}
7872 7864  
7873 7865  //----------Move Instructions--------------------------------------------------
7874 7866  instruct castX2P(eAXRegP dst, eAXRegI src) %{
7875 7867    match(Set dst (CastX2P src));
7876 7868    format %{ "# X2P  $dst, $src" %}
7877 7869    ins_encode( /*empty encoding*/ );
7878 7870    ins_cost(0);
7879 7871    ins_pipe(empty);
7880 7872  %}
7881 7873  
7882 7874  instruct castP2X(eRegI dst, eRegP src ) %{
7883 7875    match(Set dst (CastP2X src));
7884 7876    ins_cost(50);
7885 7877    format %{ "MOV    $dst, $src\t# CastP2X" %}
7886 7878    ins_encode( enc_Copy( dst, src) );
7887 7879    ins_pipe( ialu_reg_reg );
7888 7880  %}
7889 7881  
7890 7882  //----------Conditional Move---------------------------------------------------
7891 7883  // Conditional move
7892 7884  instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
7893 7885    predicate(VM_Version::supports_cmov() );
7894 7886    match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7895 7887    ins_cost(200);
7896 7888    format %{ "CMOV$cop $dst,$src" %}
7897 7889    opcode(0x0F,0x40);
7898 7890    ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7899 7891    ins_pipe( pipe_cmov_reg );
7900 7892  %}
7901 7893  
7902 7894  instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
7903 7895    predicate(VM_Version::supports_cmov() );
7904 7896    match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7905 7897    ins_cost(200);
7906 7898    format %{ "CMOV$cop $dst,$src" %}
7907 7899    opcode(0x0F,0x40);
7908 7900    ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7909 7901    ins_pipe( pipe_cmov_reg );
7910 7902  %}
7911 7903  
7912 7904  instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
7913 7905    predicate(VM_Version::supports_cmov() );
7914 7906    match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7915 7907    ins_cost(200);
7916 7908    expand %{
7917 7909      cmovI_regU(cop, cr, dst, src);
7918 7910    %}
7919 7911  %}
7920 7912  
7921 7913  // Conditional move
7922 7914  instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
7923 7915    predicate(VM_Version::supports_cmov() );
7924 7916    match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7925 7917    ins_cost(250);
7926 7918    format %{ "CMOV$cop $dst,$src" %}
7927 7919    opcode(0x0F,0x40);
7928 7920    ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7929 7921    ins_pipe( pipe_cmov_mem );
7930 7922  %}
7931 7923  
7932 7924  // Conditional move
7933 7925  instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
7934 7926    predicate(VM_Version::supports_cmov() );
7935 7927    match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7936 7928    ins_cost(250);
7937 7929    format %{ "CMOV$cop $dst,$src" %}
7938 7930    opcode(0x0F,0x40);
7939 7931    ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7940 7932    ins_pipe( pipe_cmov_mem );
7941 7933  %}
7942 7934  
7943 7935  instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
7944 7936    predicate(VM_Version::supports_cmov() );
7945 7937    match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7946 7938    ins_cost(250);
7947 7939    expand %{
7948 7940      cmovI_memU(cop, cr, dst, src);
7949 7941    %}
7950 7942  %}
7951 7943  
7952 7944  // Conditional move
7953 7945  instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7954 7946    predicate(VM_Version::supports_cmov() );
7955 7947    match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7956 7948    ins_cost(200);
7957 7949    format %{ "CMOV$cop $dst,$src\t# ptr" %}
7958 7950    opcode(0x0F,0x40);
7959 7951    ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7960 7952    ins_pipe( pipe_cmov_reg );
7961 7953  %}
7962 7954  
7963 7955  // Conditional move (non-P6 version)
7964 7956  // Note:  a CMoveP is generated for  stubs and native wrappers
7965 7957  //        regardless of whether we are on a P6, so we
7966 7958  //        emulate a cmov here
7967 7959  instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7968 7960    match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7969 7961    ins_cost(300);
7970 7962    format %{ "Jn$cop   skip\n\t"
7971 7963            "MOV    $dst,$src\t# pointer\n"
7972 7964        "skip:" %}
7973 7965    opcode(0x8b);
7974 7966    ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
7975 7967    ins_pipe( pipe_cmov_reg );
7976 7968  %}
7977 7969  
7978 7970  // Conditional move
7979 7971  instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7980 7972    predicate(VM_Version::supports_cmov() );
7981 7973    match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7982 7974    ins_cost(200);
7983 7975    format %{ "CMOV$cop $dst,$src\t# ptr" %}
7984 7976    opcode(0x0F,0x40);
7985 7977    ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7986 7978    ins_pipe( pipe_cmov_reg );
7987 7979  %}
7988 7980  
7989 7981  instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7990 7982    predicate(VM_Version::supports_cmov() );
7991 7983    match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7992 7984    ins_cost(200);
7993 7985    expand %{
7994 7986      cmovP_regU(cop, cr, dst, src);
7995 7987    %}
7996 7988  %}
7997 7989  
7998 7990  // DISABLED: Requires the ADLC to emit a bottom_type call that
7999 7991  // correctly meets the two pointer arguments; one is an incoming
8000 7992  // register but the other is a memory operand.  ALSO appears to
8001 7993  // be buggy with implicit null checks.
8002 7994  //
8003 7995  //// Conditional move
8004 7996  //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
8005 7997  //  predicate(VM_Version::supports_cmov() );
8006 7998  //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8007 7999  //  ins_cost(250);
8008 8000  //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8009 8001  //  opcode(0x0F,0x40);
8010 8002  //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8011 8003  //  ins_pipe( pipe_cmov_mem );
8012 8004  //%}
8013 8005  //
8014 8006  //// Conditional move
8015 8007  //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
8016 8008  //  predicate(VM_Version::supports_cmov() );
8017 8009  //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8018 8010  //  ins_cost(250);
8019 8011  //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8020 8012  //  opcode(0x0F,0x40);
8021 8013  //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8022 8014  //  ins_pipe( pipe_cmov_mem );
8023 8015  //%}
8024 8016  
8025 8017  // Conditional move
8026 8018  instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
8027 8019    predicate(UseSSE<=1);
8028 8020    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8029 8021    ins_cost(200);
8030 8022    format %{ "FCMOV$cop $dst,$src\t# double" %}
8031 8023    opcode(0xDA);
8032 8024    ins_encode( enc_cmov_d(cop,src) );
8033 8025    ins_pipe( pipe_cmovD_reg );
8034 8026  %}
8035 8027  
8036 8028  // Conditional move
8037 8029  instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
8038 8030    predicate(UseSSE==0);
8039 8031    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8040 8032    ins_cost(200);
8041 8033    format %{ "FCMOV$cop $dst,$src\t# float" %}
8042 8034    opcode(0xDA);
8043 8035    ins_encode( enc_cmov_d(cop,src) );
8044 8036    ins_pipe( pipe_cmovD_reg );
8045 8037  %}
8046 8038  
8047 8039  // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8048 8040  instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
8049 8041    predicate(UseSSE<=1);
8050 8042    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8051 8043    ins_cost(200);
8052 8044    format %{ "Jn$cop   skip\n\t"
8053 8045              "MOV    $dst,$src\t# double\n"
8054 8046        "skip:" %}
8055 8047    opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
8056 8048    ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
8057 8049    ins_pipe( pipe_cmovD_reg );
8058 8050  %}
8059 8051  
8060 8052  // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8061 8053  instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
8062 8054    predicate(UseSSE==0);
8063 8055    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8064 8056    ins_cost(200);
8065 8057    format %{ "Jn$cop    skip\n\t"
8066 8058              "MOV    $dst,$src\t# float\n"
8067 8059        "skip:" %}
8068 8060    opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
8069 8061    ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
8070 8062    ins_pipe( pipe_cmovD_reg );
8071 8063  %}
8072 8064  
8073 8065  // No CMOVE with SSE/SSE2
8074 8066  instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
8075 8067    predicate (UseSSE>=1);
8076 8068    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8077 8069    ins_cost(200);
8078 8070    format %{ "Jn$cop   skip\n\t"
8079 8071              "MOVSS  $dst,$src\t# float\n"
8080 8072        "skip:" %}
8081 8073    ins_encode %{
8082 8074      Label skip;
8083 8075      // Invert sense of branch from sense of CMOV
8084 8076      __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8085 8077      __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8086 8078      __ bind(skip);
8087 8079    %}
8088 8080    ins_pipe( pipe_slow );
8089 8081  %}
8090 8082  
8091 8083  // No CMOVE with SSE/SSE2
8092 8084  instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
8093 8085    predicate (UseSSE>=2);
8094 8086    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8095 8087    ins_cost(200);
8096 8088    format %{ "Jn$cop   skip\n\t"
8097 8089              "MOVSD  $dst,$src\t# float\n"
8098 8090        "skip:" %}
8099 8091    ins_encode %{
8100 8092      Label skip;
8101 8093      // Invert sense of branch from sense of CMOV
8102 8094      __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8103 8095      __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8104 8096      __ bind(skip);
8105 8097    %}
8106 8098    ins_pipe( pipe_slow );
8107 8099  %}
8108 8100  
8109 8101  // unsigned version
8110 8102  instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
8111 8103    predicate (UseSSE>=1);
8112 8104    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8113 8105    ins_cost(200);
8114 8106    format %{ "Jn$cop   skip\n\t"
8115 8107              "MOVSS  $dst,$src\t# float\n"
8116 8108        "skip:" %}
8117 8109    ins_encode %{
8118 8110      Label skip;
8119 8111      // Invert sense of branch from sense of CMOV
8120 8112      __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8121 8113      __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8122 8114      __ bind(skip);
8123 8115    %}
8124 8116    ins_pipe( pipe_slow );
8125 8117  %}
8126 8118  
8127 8119  instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
8128 8120    predicate (UseSSE>=1);
8129 8121    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8130 8122    ins_cost(200);
8131 8123    expand %{
8132 8124      fcmovX_regU(cop, cr, dst, src);
8133 8125    %}
8134 8126  %}
8135 8127  
8136 8128  // unsigned version
8137 8129  instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
8138 8130    predicate (UseSSE>=2);
8139 8131    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8140 8132    ins_cost(200);
8141 8133    format %{ "Jn$cop   skip\n\t"
8142 8134              "MOVSD  $dst,$src\t# float\n"
8143 8135        "skip:" %}
8144 8136    ins_encode %{
8145 8137      Label skip;
8146 8138      // Invert sense of branch from sense of CMOV
8147 8139      __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8148 8140      __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8149 8141      __ bind(skip);
8150 8142    %}
8151 8143    ins_pipe( pipe_slow );
8152 8144  %}
8153 8145  
8154 8146  instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
8155 8147    predicate (UseSSE>=2);
8156 8148    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8157 8149    ins_cost(200);
8158 8150    expand %{
8159 8151      fcmovXD_regU(cop, cr, dst, src);
8160 8152    %}
8161 8153  %}
8162 8154  
8163 8155  instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
8164 8156    predicate(VM_Version::supports_cmov() );
8165 8157    match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8166 8158    ins_cost(200);
8167 8159    format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8168 8160              "CMOV$cop $dst.hi,$src.hi" %}
8169 8161    opcode(0x0F,0x40);
8170 8162    ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8171 8163    ins_pipe( pipe_cmov_reg_long );
8172 8164  %}
8173 8165  
8174 8166  instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
8175 8167    predicate(VM_Version::supports_cmov() );
8176 8168    match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8177 8169    ins_cost(200);
8178 8170    format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8179 8171              "CMOV$cop $dst.hi,$src.hi" %}
8180 8172    opcode(0x0F,0x40);
8181 8173    ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8182 8174    ins_pipe( pipe_cmov_reg_long );
8183 8175  %}
8184 8176  
8185 8177  instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
8186 8178    predicate(VM_Version::supports_cmov() );
8187 8179    match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8188 8180    ins_cost(200);
8189 8181    expand %{
8190 8182      cmovL_regU(cop, cr, dst, src);
8191 8183    %}
8192 8184  %}
8193 8185  
8194 8186  //----------Arithmetic Instructions--------------------------------------------
8195 8187  //----------Addition Instructions----------------------------------------------
8196 8188  // Integer Addition Instructions
8197 8189  instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8198 8190    match(Set dst (AddI dst src));
8199 8191    effect(KILL cr);
8200 8192  
8201 8193    size(2);
8202 8194    format %{ "ADD    $dst,$src" %}
8203 8195    opcode(0x03);
8204 8196    ins_encode( OpcP, RegReg( dst, src) );
8205 8197    ins_pipe( ialu_reg_reg );
8206 8198  %}
8207 8199  
8208 8200  instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8209 8201    match(Set dst (AddI dst src));
8210 8202    effect(KILL cr);
8211 8203  
8212 8204    format %{ "ADD    $dst,$src" %}
8213 8205    opcode(0x81, 0x00); /* /0 id */
8214 8206    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8215 8207    ins_pipe( ialu_reg );
8216 8208  %}
8217 8209  
8218 8210  instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
8219 8211    predicate(UseIncDec);
8220 8212    match(Set dst (AddI dst src));
8221 8213    effect(KILL cr);
8222 8214  
8223 8215    size(1);
8224 8216    format %{ "INC    $dst" %}
8225 8217    opcode(0x40); /*  */
8226 8218    ins_encode( Opc_plus( primary, dst ) );
8227 8219    ins_pipe( ialu_reg );
8228 8220  %}
8229 8221  
8230 8222  instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
8231 8223    match(Set dst (AddI src0 src1));
8232 8224    ins_cost(110);
8233 8225  
8234 8226    format %{ "LEA    $dst,[$src0 + $src1]" %}
8235 8227    opcode(0x8D); /* 0x8D /r */
8236 8228    ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8237 8229    ins_pipe( ialu_reg_reg );
8238 8230  %}
8239 8231  
8240 8232  instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
8241 8233    match(Set dst (AddP src0 src1));
8242 8234    ins_cost(110);
8243 8235  
8244 8236    format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
8245 8237    opcode(0x8D); /* 0x8D /r */
8246 8238    ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8247 8239    ins_pipe( ialu_reg_reg );
8248 8240  %}
8249 8241  
8250 8242  instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
8251 8243    predicate(UseIncDec);
8252 8244    match(Set dst (AddI dst src));
8253 8245    effect(KILL cr);
8254 8246  
8255 8247    size(1);
8256 8248    format %{ "DEC    $dst" %}
8257 8249    opcode(0x48); /*  */
8258 8250    ins_encode( Opc_plus( primary, dst ) );
8259 8251    ins_pipe( ialu_reg );
8260 8252  %}
8261 8253  
8262 8254  instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
8263 8255    match(Set dst (AddP dst src));
8264 8256    effect(KILL cr);
8265 8257  
8266 8258    size(2);
8267 8259    format %{ "ADD    $dst,$src" %}
8268 8260    opcode(0x03);
8269 8261    ins_encode( OpcP, RegReg( dst, src) );
8270 8262    ins_pipe( ialu_reg_reg );
8271 8263  %}
8272 8264  
8273 8265  instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
8274 8266    match(Set dst (AddP dst src));
8275 8267    effect(KILL cr);
8276 8268  
8277 8269    format %{ "ADD    $dst,$src" %}
8278 8270    opcode(0x81,0x00); /* Opcode 81 /0 id */
8279 8271    // ins_encode( RegImm( dst, src) );
8280 8272    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8281 8273    ins_pipe( ialu_reg );
8282 8274  %}
8283 8275  
8284 8276  instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8285 8277    match(Set dst (AddI dst (LoadI src)));
8286 8278    effect(KILL cr);
8287 8279  
8288 8280    ins_cost(125);
8289 8281    format %{ "ADD    $dst,$src" %}
8290 8282    opcode(0x03);
8291 8283    ins_encode( OpcP, RegMem( dst, src) );
8292 8284    ins_pipe( ialu_reg_mem );
8293 8285  %}
8294 8286  
8295 8287  instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8296 8288    match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8297 8289    effect(KILL cr);
8298 8290  
8299 8291    ins_cost(150);
8300 8292    format %{ "ADD    $dst,$src" %}
8301 8293    opcode(0x01);  /* Opcode 01 /r */
8302 8294    ins_encode( OpcP, RegMem( src, dst ) );
8303 8295    ins_pipe( ialu_mem_reg );
8304 8296  %}
8305 8297  
8306 8298  // Add Memory with Immediate
8307 8299  instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8308 8300    match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8309 8301    effect(KILL cr);
8310 8302  
8311 8303    ins_cost(125);
8312 8304    format %{ "ADD    $dst,$src" %}
8313 8305    opcode(0x81);               /* Opcode 81 /0 id */
8314 8306    ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
8315 8307    ins_pipe( ialu_mem_imm );
8316 8308  %}
8317 8309  
8318 8310  instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
8319 8311    match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8320 8312    effect(KILL cr);
8321 8313  
8322 8314    ins_cost(125);
8323 8315    format %{ "INC    $dst" %}
8324 8316    opcode(0xFF);               /* Opcode FF /0 */
8325 8317    ins_encode( OpcP, RMopc_Mem(0x00,dst));
8326 8318    ins_pipe( ialu_mem_imm );
8327 8319  %}
8328 8320  
8329 8321  instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
8330 8322    match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8331 8323    effect(KILL cr);
8332 8324  
8333 8325    ins_cost(125);
8334 8326    format %{ "DEC    $dst" %}
8335 8327    opcode(0xFF);               /* Opcode FF /1 */
8336 8328    ins_encode( OpcP, RMopc_Mem(0x01,dst));
8337 8329    ins_pipe( ialu_mem_imm );
8338 8330  %}
8339 8331  
8340 8332  
8341 8333  instruct checkCastPP( eRegP dst ) %{
8342 8334    match(Set dst (CheckCastPP dst));
8343 8335  
8344 8336    size(0);
8345 8337    format %{ "#checkcastPP of $dst" %}
8346 8338    ins_encode( /*empty encoding*/ );
8347 8339    ins_pipe( empty );
8348 8340  %}
8349 8341  
8350 8342  instruct castPP( eRegP dst ) %{
8351 8343    match(Set dst (CastPP dst));
8352 8344    format %{ "#castPP of $dst" %}
8353 8345    ins_encode( /*empty encoding*/ );
8354 8346    ins_pipe( empty );
8355 8347  %}
8356 8348  
8357 8349  instruct castII( eRegI dst ) %{
8358 8350    match(Set dst (CastII dst));
8359 8351    format %{ "#castII of $dst" %}
8360 8352    ins_encode( /*empty encoding*/ );
8361 8353    ins_cost(0);
8362 8354    ins_pipe( empty );
8363 8355  %}
8364 8356  
8365 8357  
8366 8358  // Load-locked - same as a regular pointer load when used with compare-swap
8367 8359  instruct loadPLocked(eRegP dst, memory mem) %{
8368 8360    match(Set dst (LoadPLocked mem));
8369 8361  
8370 8362    ins_cost(125);
8371 8363    format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
8372 8364    opcode(0x8B);
8373 8365    ins_encode( OpcP, RegMem(dst,mem));
8374 8366    ins_pipe( ialu_reg_mem );
8375 8367  %}
8376 8368  
8377 8369  // LoadLong-locked - same as a volatile long load when used with compare-swap
8378 8370  instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
8379 8371    predicate(UseSSE<=1);
8380 8372    match(Set dst (LoadLLocked mem));
8381 8373  
8382 8374    ins_cost(200);
8383 8375    format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
8384 8376              "FISTp  $dst" %}
8385 8377    ins_encode(enc_loadL_volatile(mem,dst));
8386 8378    ins_pipe( fpu_reg_mem );
8387 8379  %}
8388 8380  
8389 8381  instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
8390 8382    predicate(UseSSE>=2);
8391 8383    match(Set dst (LoadLLocked mem));
8392 8384    effect(TEMP tmp);
8393 8385    ins_cost(180);
8394 8386    format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8395 8387              "MOVSD  $dst,$tmp" %}
8396 8388    ins_encode(enc_loadLX_volatile(mem, dst, tmp));
8397 8389    ins_pipe( pipe_slow );
8398 8390  %}
8399 8391  
8400 8392  instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
8401 8393    predicate(UseSSE>=2);
8402 8394    match(Set dst (LoadLLocked mem));
8403 8395    effect(TEMP tmp);
8404 8396    ins_cost(160);
8405 8397    format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8406 8398              "MOVD   $dst.lo,$tmp\n\t"
8407 8399              "PSRLQ  $tmp,32\n\t"
8408 8400              "MOVD   $dst.hi,$tmp" %}
8409 8401    ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
8410 8402    ins_pipe( pipe_slow );
8411 8403  %}
8412 8404  
8413 8405  // Conditional-store of the updated heap-top.
8414 8406  // Used during allocation of the shared heap.
8415 8407  // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8416 8408  instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
8417 8409    match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8418 8410    // EAX is killed if there is contention, but then it's also unused.
8419 8411    // In the common case of no contention, EAX holds the new oop address.
8420 8412    format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
8421 8413    ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
8422 8414    ins_pipe( pipe_cmpxchg );
8423 8415  %}
8424 8416  
8425 8417  // Conditional-store of an int value.
8426 8418  // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
8427 8419  instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
8428 8420    match(Set cr (StoreIConditional mem (Binary oldval newval)));
8429 8421    effect(KILL oldval);
8430 8422    format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
8431 8423    ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
8432 8424    ins_pipe( pipe_cmpxchg );
8433 8425  %}
8434 8426  
8435 8427  // Conditional-store of a long value.
8436 8428  // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
8437 8429  instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8438 8430    match(Set cr (StoreLConditional mem (Binary oldval newval)));
8439 8431    effect(KILL oldval);
8440 8432    format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
8441 8433              "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
8442 8434              "XCHG   EBX,ECX"
8443 8435    %}
8444 8436    ins_encode %{
8445 8437      // Note: we need to swap rbx, and rcx before and after the
8446 8438      //       cmpxchg8 instruction because the instruction uses
8447 8439      //       rcx as the high order word of the new value to store but
8448 8440      //       our register encoding uses rbx.
8449 8441      __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8450 8442      if( os::is_MP() )
8451 8443        __ lock();
8452 8444      __ cmpxchg8($mem$$Address);
8453 8445      __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8454 8446    %}
8455 8447    ins_pipe( pipe_cmpxchg );
8456 8448  %}
8457 8449  
8458 8450  // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8459 8451  
8460 8452  instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8461 8453    match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8462 8454    effect(KILL cr, KILL oldval);
8463 8455    format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8464 8456              "MOV    $res,0\n\t"
8465 8457              "JNE,s  fail\n\t"
8466 8458              "MOV    $res,1\n"
8467 8459            "fail:" %}
8468 8460    ins_encode( enc_cmpxchg8(mem_ptr),
8469 8461                enc_flags_ne_to_boolean(res) );
8470 8462    ins_pipe( pipe_cmpxchg );
8471 8463  %}
8472 8464  
8473 8465  instruct compareAndSwapP( eRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
8474 8466    match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8475 8467    effect(KILL cr, KILL oldval);
8476 8468    format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8477 8469              "MOV    $res,0\n\t"
8478 8470              "JNE,s  fail\n\t"
8479 8471              "MOV    $res,1\n"
8480 8472            "fail:" %}
8481 8473    ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8482 8474    ins_pipe( pipe_cmpxchg );
8483 8475  %}
8484 8476  
8485 8477  instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
8486 8478    match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8487 8479    effect(KILL cr, KILL oldval);
8488 8480    format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8489 8481              "MOV    $res,0\n\t"
8490 8482              "JNE,s  fail\n\t"
8491 8483              "MOV    $res,1\n"
8492 8484            "fail:" %}
8493 8485    ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8494 8486    ins_pipe( pipe_cmpxchg );
8495 8487  %}
8496 8488  
8497 8489  //----------Subtraction Instructions-------------------------------------------
8498 8490  // Integer Subtraction Instructions
8499 8491  instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8500 8492    match(Set dst (SubI dst src));
8501 8493    effect(KILL cr);
8502 8494  
8503 8495    size(2);
8504 8496    format %{ "SUB    $dst,$src" %}
8505 8497    opcode(0x2B);
8506 8498    ins_encode( OpcP, RegReg( dst, src) );
8507 8499    ins_pipe( ialu_reg_reg );
8508 8500  %}
8509 8501  
8510 8502  instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8511 8503    match(Set dst (SubI dst src));
8512 8504    effect(KILL cr);
8513 8505  
8514 8506    format %{ "SUB    $dst,$src" %}
8515 8507    opcode(0x81,0x05);  /* Opcode 81 /5 */
8516 8508    // ins_encode( RegImm( dst, src) );
8517 8509    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8518 8510    ins_pipe( ialu_reg );
8519 8511  %}
8520 8512  
8521 8513  instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8522 8514    match(Set dst (SubI dst (LoadI src)));
8523 8515    effect(KILL cr);
8524 8516  
8525 8517    ins_cost(125);
8526 8518    format %{ "SUB    $dst,$src" %}
8527 8519    opcode(0x2B);
8528 8520    ins_encode( OpcP, RegMem( dst, src) );
8529 8521    ins_pipe( ialu_reg_mem );
8530 8522  %}
8531 8523  
8532 8524  instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8533 8525    match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8534 8526    effect(KILL cr);
8535 8527  
8536 8528    ins_cost(150);
8537 8529    format %{ "SUB    $dst,$src" %}
8538 8530    opcode(0x29);  /* Opcode 29 /r */
8539 8531    ins_encode( OpcP, RegMem( src, dst ) );
8540 8532    ins_pipe( ialu_mem_reg );
8541 8533  %}
8542 8534  
8543 8535  // Subtract from a pointer
8544 8536  instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
8545 8537    match(Set dst (AddP dst (SubI zero src)));
8546 8538    effect(KILL cr);
8547 8539  
8548 8540    size(2);
8549 8541    format %{ "SUB    $dst,$src" %}
8550 8542    opcode(0x2B);
8551 8543    ins_encode( OpcP, RegReg( dst, src) );
8552 8544    ins_pipe( ialu_reg_reg );
8553 8545  %}
8554 8546  
8555 8547  instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
8556 8548    match(Set dst (SubI zero dst));
8557 8549    effect(KILL cr);
8558 8550  
8559 8551    size(2);
8560 8552    format %{ "NEG    $dst" %}
8561 8553    opcode(0xF7,0x03);  // Opcode F7 /3
8562 8554    ins_encode( OpcP, RegOpc( dst ) );
8563 8555    ins_pipe( ialu_reg );
8564 8556  %}
8565 8557  
8566 8558  
8567 8559  //----------Multiplication/Division Instructions-------------------------------
8568 8560  // Integer Multiplication Instructions
8569 8561  // Multiply Register
8570 8562  instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8571 8563    match(Set dst (MulI dst src));
8572 8564    effect(KILL cr);
8573 8565  
8574 8566    size(3);
8575 8567    ins_cost(300);
8576 8568    format %{ "IMUL   $dst,$src" %}
8577 8569    opcode(0xAF, 0x0F);
8578 8570    ins_encode( OpcS, OpcP, RegReg( dst, src) );
8579 8571    ins_pipe( ialu_reg_reg_alu0 );
8580 8572  %}
8581 8573  
8582 8574  // Multiply 32-bit Immediate
8583 8575  instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
8584 8576    match(Set dst (MulI src imm));
8585 8577    effect(KILL cr);
8586 8578  
8587 8579    ins_cost(300);
8588 8580    format %{ "IMUL   $dst,$src,$imm" %}
8589 8581    opcode(0x69);  /* 69 /r id */
8590 8582    ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
8591 8583    ins_pipe( ialu_reg_reg_alu0 );
8592 8584  %}
8593 8585  
8594 8586  instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
8595 8587    match(Set dst src);
8596 8588    effect(KILL cr);
8597 8589  
8598 8590    // Note that this is artificially increased to make it more expensive than loadConL
8599 8591    ins_cost(250);
8600 8592    format %{ "MOV    EAX,$src\t// low word only" %}
8601 8593    opcode(0xB8);
8602 8594    ins_encode( LdImmL_Lo(dst, src) );
8603 8595    ins_pipe( ialu_reg_fat );
8604 8596  %}
8605 8597  
8606 8598  // Multiply by 32-bit Immediate, taking the shifted high order results
8607 8599  //  (special case for shift by 32)
8608 8600  instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
8609 8601    match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8610 8602    predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8611 8603               _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8612 8604               _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8613 8605    effect(USE src1, KILL cr);
8614 8606  
8615 8607    // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8616 8608    ins_cost(0*100 + 1*400 - 150);
8617 8609    format %{ "IMUL   EDX:EAX,$src1" %}
8618 8610    ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8619 8611    ins_pipe( pipe_slow );
8620 8612  %}
8621 8613  
8622 8614  // Multiply by 32-bit Immediate, taking the shifted high order results
8623 8615  instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
8624 8616    match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8625 8617    predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8626 8618               _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8627 8619               _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8628 8620    effect(USE src1, KILL cr);
8629 8621  
8630 8622    // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8631 8623    ins_cost(1*100 + 1*400 - 150);
8632 8624    format %{ "IMUL   EDX:EAX,$src1\n\t"
8633 8625              "SAR    EDX,$cnt-32" %}
8634 8626    ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8635 8627    ins_pipe( pipe_slow );
8636 8628  %}
8637 8629  
8638 8630  // Multiply Memory 32-bit Immediate
8639 8631  instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
8640 8632    match(Set dst (MulI (LoadI src) imm));
8641 8633    effect(KILL cr);
8642 8634  
8643 8635    ins_cost(300);
8644 8636    format %{ "IMUL   $dst,$src,$imm" %}
8645 8637    opcode(0x69);  /* 69 /r id */
8646 8638    ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
8647 8639    ins_pipe( ialu_reg_mem_alu0 );
8648 8640  %}
8649 8641  
8650 8642  // Multiply Memory
8651 8643  instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
8652 8644    match(Set dst (MulI dst (LoadI src)));
8653 8645    effect(KILL cr);
8654 8646  
8655 8647    ins_cost(350);
8656 8648    format %{ "IMUL   $dst,$src" %}
8657 8649    opcode(0xAF, 0x0F);
8658 8650    ins_encode( OpcS, OpcP, RegMem( dst, src) );
8659 8651    ins_pipe( ialu_reg_mem_alu0 );
8660 8652  %}
8661 8653  
8662 8654  // Multiply Register Int to Long
8663 8655  instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
8664 8656    // Basic Idea: long = (long)int * (long)int
8665 8657    match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
8666 8658    effect(DEF dst, USE src, USE src1, KILL flags);
8667 8659  
8668 8660    ins_cost(300);
8669 8661    format %{ "IMUL   $dst,$src1" %}
8670 8662  
8671 8663    ins_encode( long_int_multiply( dst, src1 ) );
8672 8664    ins_pipe( ialu_reg_reg_alu0 );
8673 8665  %}
8674 8666  
8675 8667  instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
8676 8668    // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
8677 8669    match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
8678 8670    effect(KILL flags);
8679 8671  
8680 8672    ins_cost(300);
8681 8673    format %{ "MUL    $dst,$src1" %}
8682 8674  
8683 8675    ins_encode( long_uint_multiply(dst, src1) );
8684 8676    ins_pipe( ialu_reg_reg_alu0 );
8685 8677  %}
8686 8678  
8687 8679  // Multiply Register Long
8688 8680  instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8689 8681    match(Set dst (MulL dst src));
8690 8682    effect(KILL cr, TEMP tmp);
8691 8683    ins_cost(4*100+3*400);
8692 8684  // Basic idea: lo(result) = lo(x_lo * y_lo)
8693 8685  //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
8694 8686    format %{ "MOV    $tmp,$src.lo\n\t"
8695 8687              "IMUL   $tmp,EDX\n\t"
8696 8688              "MOV    EDX,$src.hi\n\t"
8697 8689              "IMUL   EDX,EAX\n\t"
8698 8690              "ADD    $tmp,EDX\n\t"
8699 8691              "MUL    EDX:EAX,$src.lo\n\t"
8700 8692              "ADD    EDX,$tmp" %}
8701 8693    ins_encode( long_multiply( dst, src, tmp ) );
8702 8694    ins_pipe( pipe_slow );
8703 8695  %}
8704 8696  
8705 8697  // Multiply Register Long where the left operand's high 32 bits are zero
8706 8698  instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8707 8699    predicate(is_operand_hi32_zero(n->in(1)));
8708 8700    match(Set dst (MulL dst src));
8709 8701    effect(KILL cr, TEMP tmp);
8710 8702    ins_cost(2*100+2*400);
8711 8703  // Basic idea: lo(result) = lo(x_lo * y_lo)
8712 8704  //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
8713 8705    format %{ "MOV    $tmp,$src.hi\n\t"
8714 8706              "IMUL   $tmp,EAX\n\t"
8715 8707              "MUL    EDX:EAX,$src.lo\n\t"
8716 8708              "ADD    EDX,$tmp" %}
8717 8709    ins_encode %{
8718 8710      __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
8719 8711      __ imull($tmp$$Register, rax);
8720 8712      __ mull($src$$Register);
8721 8713      __ addl(rdx, $tmp$$Register);
8722 8714    %}
8723 8715    ins_pipe( pipe_slow );
8724 8716  %}
8725 8717  
8726 8718  // Multiply Register Long where the right operand's high 32 bits are zero
8727 8719  instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8728 8720    predicate(is_operand_hi32_zero(n->in(2)));
8729 8721    match(Set dst (MulL dst src));
8730 8722    effect(KILL cr, TEMP tmp);
8731 8723    ins_cost(2*100+2*400);
8732 8724  // Basic idea: lo(result) = lo(x_lo * y_lo)
8733 8725  //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
8734 8726    format %{ "MOV    $tmp,$src.lo\n\t"
8735 8727              "IMUL   $tmp,EDX\n\t"
8736 8728              "MUL    EDX:EAX,$src.lo\n\t"
8737 8729              "ADD    EDX,$tmp" %}
8738 8730    ins_encode %{
8739 8731      __ movl($tmp$$Register, $src$$Register);
8740 8732      __ imull($tmp$$Register, rdx);
8741 8733      __ mull($src$$Register);
8742 8734      __ addl(rdx, $tmp$$Register);
8743 8735    %}
8744 8736    ins_pipe( pipe_slow );
8745 8737  %}
8746 8738  
8747 8739  // Multiply Register Long where the left and the right operands' high 32 bits are zero
8748 8740  instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
8749 8741    predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
8750 8742    match(Set dst (MulL dst src));
8751 8743    effect(KILL cr);
8752 8744    ins_cost(1*400);
8753 8745  // Basic idea: lo(result) = lo(x_lo * y_lo)
8754 8746  //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
8755 8747    format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
8756 8748    ins_encode %{
8757 8749      __ mull($src$$Register);
8758 8750    %}
8759 8751    ins_pipe( pipe_slow );
8760 8752  %}
8761 8753  
8762 8754  // Multiply Register Long by small constant
8763 8755  instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
8764 8756    match(Set dst (MulL dst src));
8765 8757    effect(KILL cr, TEMP tmp);
8766 8758    ins_cost(2*100+2*400);
8767 8759    size(12);
8768 8760  // Basic idea: lo(result) = lo(src * EAX)
8769 8761  //             hi(result) = hi(src * EAX) + lo(src * EDX)
8770 8762    format %{ "IMUL   $tmp,EDX,$src\n\t"
8771 8763              "MOV    EDX,$src\n\t"
8772 8764              "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
8773 8765              "ADD    EDX,$tmp" %}
8774 8766    ins_encode( long_multiply_con( dst, src, tmp ) );
8775 8767    ins_pipe( pipe_slow );
8776 8768  %}
8777 8769  
8778 8770  // Integer DIV with Register
8779 8771  instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8780 8772    match(Set rax (DivI rax div));
8781 8773    effect(KILL rdx, KILL cr);
8782 8774    size(26);
8783 8775    ins_cost(30*100+10*100);
8784 8776    format %{ "CMP    EAX,0x80000000\n\t"
8785 8777              "JNE,s  normal\n\t"
8786 8778              "XOR    EDX,EDX\n\t"
8787 8779              "CMP    ECX,-1\n\t"
8788 8780              "JE,s   done\n"
8789 8781      "normal: CDQ\n\t"
8790 8782              "IDIV   $div\n\t"
8791 8783      "done:"        %}
8792 8784    opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8793 8785    ins_encode( cdq_enc, OpcP, RegOpc(div) );
8794 8786    ins_pipe( ialu_reg_reg_alu0 );
8795 8787  %}
8796 8788  
8797 8789  // Divide Register Long
8798 8790  instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8799 8791    match(Set dst (DivL src1 src2));
8800 8792    effect( KILL cr, KILL cx, KILL bx );
8801 8793    ins_cost(10000);
8802 8794    format %{ "PUSH   $src1.hi\n\t"
8803 8795              "PUSH   $src1.lo\n\t"
8804 8796              "PUSH   $src2.hi\n\t"
8805 8797              "PUSH   $src2.lo\n\t"
8806 8798              "CALL   SharedRuntime::ldiv\n\t"
8807 8799              "ADD    ESP,16" %}
8808 8800    ins_encode( long_div(src1,src2) );
8809 8801    ins_pipe( pipe_slow );
8810 8802  %}
8811 8803  
8812 8804  // Integer DIVMOD with Register, both quotient and mod results
8813 8805  instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8814 8806    match(DivModI rax div);
8815 8807    effect(KILL cr);
8816 8808    size(26);
8817 8809    ins_cost(30*100+10*100);
8818 8810    format %{ "CMP    EAX,0x80000000\n\t"
8819 8811              "JNE,s  normal\n\t"
8820 8812              "XOR    EDX,EDX\n\t"
8821 8813              "CMP    ECX,-1\n\t"
8822 8814              "JE,s   done\n"
8823 8815      "normal: CDQ\n\t"
8824 8816              "IDIV   $div\n\t"
8825 8817      "done:"        %}
8826 8818    opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8827 8819    ins_encode( cdq_enc, OpcP, RegOpc(div) );
8828 8820    ins_pipe( pipe_slow );
8829 8821  %}
8830 8822  
8831 8823  // Integer MOD with Register
8832 8824  instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8833 8825    match(Set rdx (ModI rax div));
8834 8826    effect(KILL rax, KILL cr);
8835 8827  
8836 8828    size(26);
8837 8829    ins_cost(300);
8838 8830    format %{ "CDQ\n\t"
8839 8831              "IDIV   $div" %}
8840 8832    opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8841 8833    ins_encode( cdq_enc, OpcP, RegOpc(div) );
8842 8834    ins_pipe( ialu_reg_reg_alu0 );
8843 8835  %}
8844 8836  
8845 8837  // Remainder Register Long
8846 8838  instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8847 8839    match(Set dst (ModL src1 src2));
8848 8840    effect( KILL cr, KILL cx, KILL bx );
8849 8841    ins_cost(10000);
8850 8842    format %{ "PUSH   $src1.hi\n\t"
8851 8843              "PUSH   $src1.lo\n\t"
8852 8844              "PUSH   $src2.hi\n\t"
8853 8845              "PUSH   $src2.lo\n\t"
8854 8846              "CALL   SharedRuntime::lrem\n\t"
8855 8847              "ADD    ESP,16" %}
8856 8848    ins_encode( long_mod(src1,src2) );
8857 8849    ins_pipe( pipe_slow );
8858 8850  %}
8859 8851  
8860 8852  // Divide Register Long (no special case since divisor != -1)
8861 8853  instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8862 8854    match(Set dst (DivL dst imm));
8863 8855    effect( TEMP tmp, TEMP tmp2, KILL cr );
8864 8856    ins_cost(1000);
8865 8857    format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
8866 8858              "XOR    $tmp2,$tmp2\n\t"
8867 8859              "CMP    $tmp,EDX\n\t"
8868 8860              "JA,s   fast\n\t"
8869 8861              "MOV    $tmp2,EAX\n\t"
8870 8862              "MOV    EAX,EDX\n\t"
8871 8863              "MOV    EDX,0\n\t"
8872 8864              "JLE,s  pos\n\t"
8873 8865              "LNEG   EAX : $tmp2\n\t"
8874 8866              "DIV    $tmp # unsigned division\n\t"
8875 8867              "XCHG   EAX,$tmp2\n\t"
8876 8868              "DIV    $tmp\n\t"
8877 8869              "LNEG   $tmp2 : EAX\n\t"
8878 8870              "JMP,s  done\n"
8879 8871      "pos:\n\t"
8880 8872              "DIV    $tmp\n\t"
8881 8873              "XCHG   EAX,$tmp2\n"
8882 8874      "fast:\n\t"
8883 8875              "DIV    $tmp\n"
8884 8876      "done:\n\t"
8885 8877              "MOV    EDX,$tmp2\n\t"
8886 8878              "NEG    EDX:EAX # if $imm < 0" %}
8887 8879    ins_encode %{
8888 8880      int con = (int)$imm$$constant;
8889 8881      assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8890 8882      int pcon = (con > 0) ? con : -con;
8891 8883      Label Lfast, Lpos, Ldone;
8892 8884  
8893 8885      __ movl($tmp$$Register, pcon);
8894 8886      __ xorl($tmp2$$Register,$tmp2$$Register);
8895 8887      __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8896 8888      __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8897 8889  
8898 8890      __ movl($tmp2$$Register, $dst$$Register); // save
8899 8891      __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8900 8892      __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8901 8893      __ jccb(Assembler::lessEqual, Lpos); // result is positive
8902 8894  
8903 8895      // Negative dividend.
8904 8896      // convert value to positive to use unsigned division
8905 8897      __ lneg($dst$$Register, $tmp2$$Register);
8906 8898      __ divl($tmp$$Register);
8907 8899      __ xchgl($dst$$Register, $tmp2$$Register);
8908 8900      __ divl($tmp$$Register);
8909 8901      // revert result back to negative
8910 8902      __ lneg($tmp2$$Register, $dst$$Register);
8911 8903      __ jmpb(Ldone);
8912 8904  
8913 8905      __ bind(Lpos);
8914 8906      __ divl($tmp$$Register); // Use unsigned division
8915 8907      __ xchgl($dst$$Register, $tmp2$$Register);
8916 8908      // Fallthrow for final divide, tmp2 has 32 bit hi result
8917 8909  
8918 8910      __ bind(Lfast);
8919 8911      // fast path: src is positive
8920 8912      __ divl($tmp$$Register); // Use unsigned division
8921 8913  
8922 8914      __ bind(Ldone);
8923 8915      __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8924 8916      if (con < 0) {
8925 8917        __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8926 8918      }
8927 8919    %}
8928 8920    ins_pipe( pipe_slow );
8929 8921  %}
8930 8922  
8931 8923  // Remainder Register Long (remainder fit into 32 bits)
8932 8924  instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8933 8925    match(Set dst (ModL dst imm));
8934 8926    effect( TEMP tmp, TEMP tmp2, KILL cr );
8935 8927    ins_cost(1000);
8936 8928    format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8937 8929              "CMP    $tmp,EDX\n\t"
8938 8930              "JA,s   fast\n\t"
8939 8931              "MOV    $tmp2,EAX\n\t"
8940 8932              "MOV    EAX,EDX\n\t"
8941 8933              "MOV    EDX,0\n\t"
8942 8934              "JLE,s  pos\n\t"
8943 8935              "LNEG   EAX : $tmp2\n\t"
8944 8936              "DIV    $tmp # unsigned division\n\t"
8945 8937              "MOV    EAX,$tmp2\n\t"
8946 8938              "DIV    $tmp\n\t"
8947 8939              "NEG    EDX\n\t"
8948 8940              "JMP,s  done\n"
8949 8941      "pos:\n\t"
8950 8942              "DIV    $tmp\n\t"
8951 8943              "MOV    EAX,$tmp2\n"
8952 8944      "fast:\n\t"
8953 8945              "DIV    $tmp\n"
8954 8946      "done:\n\t"
8955 8947              "MOV    EAX,EDX\n\t"
8956 8948              "SAR    EDX,31\n\t" %}
8957 8949    ins_encode %{
8958 8950      int con = (int)$imm$$constant;
8959 8951      assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8960 8952      int pcon = (con > 0) ? con : -con;
8961 8953      Label  Lfast, Lpos, Ldone;
8962 8954  
8963 8955      __ movl($tmp$$Register, pcon);
8964 8956      __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8965 8957      __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8966 8958  
8967 8959      __ movl($tmp2$$Register, $dst$$Register); // save
8968 8960      __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8969 8961      __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8970 8962      __ jccb(Assembler::lessEqual, Lpos); // result is positive
8971 8963  
8972 8964      // Negative dividend.
8973 8965      // convert value to positive to use unsigned division
8974 8966      __ lneg($dst$$Register, $tmp2$$Register);
8975 8967      __ divl($tmp$$Register);
8976 8968      __ movl($dst$$Register, $tmp2$$Register);
8977 8969      __ divl($tmp$$Register);
8978 8970      // revert remainder back to negative
8979 8971      __ negl(HIGH_FROM_LOW($dst$$Register));
8980 8972      __ jmpb(Ldone);
8981 8973  
8982 8974      __ bind(Lpos);
8983 8975      __ divl($tmp$$Register);
8984 8976      __ movl($dst$$Register, $tmp2$$Register);
8985 8977  
8986 8978      __ bind(Lfast);
8987 8979      // fast path: src is positive
8988 8980      __ divl($tmp$$Register);
8989 8981  
8990 8982      __ bind(Ldone);
8991 8983      __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8992 8984      __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8993 8985  
8994 8986    %}
8995 8987    ins_pipe( pipe_slow );
8996 8988  %}
8997 8989  
8998 8990  // Integer Shift Instructions
8999 8991  // Shift Left by one
9000 8992  instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9001 8993    match(Set dst (LShiftI dst shift));
9002 8994    effect(KILL cr);
9003 8995  
9004 8996    size(2);
9005 8997    format %{ "SHL    $dst,$shift" %}
9006 8998    opcode(0xD1, 0x4);  /* D1 /4 */
9007 8999    ins_encode( OpcP, RegOpc( dst ) );
9008 9000    ins_pipe( ialu_reg );
9009 9001  %}
9010 9002  
9011 9003  // Shift Left by 8-bit immediate
9012 9004  instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9013 9005    match(Set dst (LShiftI dst shift));
9014 9006    effect(KILL cr);
9015 9007  
9016 9008    size(3);
9017 9009    format %{ "SHL    $dst,$shift" %}
9018 9010    opcode(0xC1, 0x4);  /* C1 /4 ib */
9019 9011    ins_encode( RegOpcImm( dst, shift) );
9020 9012    ins_pipe( ialu_reg );
9021 9013  %}
9022 9014  
9023 9015  // Shift Left by variable
9024 9016  instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9025 9017    match(Set dst (LShiftI dst shift));
9026 9018    effect(KILL cr);
9027 9019  
9028 9020    size(2);
9029 9021    format %{ "SHL    $dst,$shift" %}
9030 9022    opcode(0xD3, 0x4);  /* D3 /4 */
9031 9023    ins_encode( OpcP, RegOpc( dst ) );
9032 9024    ins_pipe( ialu_reg_reg );
9033 9025  %}
9034 9026  
9035 9027  // Arithmetic shift right by one
9036 9028  instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9037 9029    match(Set dst (RShiftI dst shift));
9038 9030    effect(KILL cr);
9039 9031  
9040 9032    size(2);
9041 9033    format %{ "SAR    $dst,$shift" %}
9042 9034    opcode(0xD1, 0x7);  /* D1 /7 */
9043 9035    ins_encode( OpcP, RegOpc( dst ) );
9044 9036    ins_pipe( ialu_reg );
9045 9037  %}
9046 9038  
9047 9039  // Arithmetic shift right by one
9048 9040  instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
9049 9041    match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9050 9042    effect(KILL cr);
9051 9043    format %{ "SAR    $dst,$shift" %}
9052 9044    opcode(0xD1, 0x7);  /* D1 /7 */
9053 9045    ins_encode( OpcP, RMopc_Mem(secondary,dst) );
9054 9046    ins_pipe( ialu_mem_imm );
9055 9047  %}
9056 9048  
9057 9049  // Arithmetic Shift Right by 8-bit immediate
9058 9050  instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9059 9051    match(Set dst (RShiftI dst shift));
9060 9052    effect(KILL cr);
9061 9053  
9062 9054    size(3);
9063 9055    format %{ "SAR    $dst,$shift" %}
9064 9056    opcode(0xC1, 0x7);  /* C1 /7 ib */
9065 9057    ins_encode( RegOpcImm( dst, shift ) );
9066 9058    ins_pipe( ialu_mem_imm );
9067 9059  %}
9068 9060  
9069 9061  // Arithmetic Shift Right by 8-bit immediate
9070 9062  instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
9071 9063    match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9072 9064    effect(KILL cr);
9073 9065  
9074 9066    format %{ "SAR    $dst,$shift" %}
9075 9067    opcode(0xC1, 0x7);  /* C1 /7 ib */
9076 9068    ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
9077 9069    ins_pipe( ialu_mem_imm );
9078 9070  %}
9079 9071  
9080 9072  // Arithmetic Shift Right by variable
9081 9073  instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9082 9074    match(Set dst (RShiftI dst shift));
9083 9075    effect(KILL cr);
9084 9076  
9085 9077    size(2);
9086 9078    format %{ "SAR    $dst,$shift" %}
9087 9079    opcode(0xD3, 0x7);  /* D3 /7 */
9088 9080    ins_encode( OpcP, RegOpc( dst ) );
9089 9081    ins_pipe( ialu_reg_reg );
9090 9082  %}
9091 9083  
9092 9084  // Logical shift right by one
9093 9085  instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9094 9086    match(Set dst (URShiftI dst shift));
9095 9087    effect(KILL cr);
9096 9088  
9097 9089    size(2);
9098 9090    format %{ "SHR    $dst,$shift" %}
9099 9091    opcode(0xD1, 0x5);  /* D1 /5 */
9100 9092    ins_encode( OpcP, RegOpc( dst ) );
9101 9093    ins_pipe( ialu_reg );
9102 9094  %}
9103 9095  
9104 9096  // Logical Shift Right by 8-bit immediate
9105 9097  instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9106 9098    match(Set dst (URShiftI dst shift));
9107 9099    effect(KILL cr);
9108 9100  
9109 9101    size(3);
9110 9102    format %{ "SHR    $dst,$shift" %}
9111 9103    opcode(0xC1, 0x5);  /* C1 /5 ib */
9112 9104    ins_encode( RegOpcImm( dst, shift) );
9113 9105    ins_pipe( ialu_reg );
9114 9106  %}
9115 9107  
9116 9108  
9117 9109  // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9118 9110  // This idiom is used by the compiler for the i2b bytecode.
9119 9111  instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
9120 9112    match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9121 9113  
9122 9114    size(3);
9123 9115    format %{ "MOVSX  $dst,$src :8" %}
9124 9116    ins_encode %{
9125 9117      __ movsbl($dst$$Register, $src$$Register);
9126 9118    %}
9127 9119    ins_pipe(ialu_reg_reg);
9128 9120  %}
9129 9121  
9130 9122  // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9131 9123  // This idiom is used by the compiler the i2s bytecode.
9132 9124  instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
9133 9125    match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9134 9126  
9135 9127    size(3);
9136 9128    format %{ "MOVSX  $dst,$src :16" %}
9137 9129    ins_encode %{
9138 9130      __ movswl($dst$$Register, $src$$Register);
9139 9131    %}
9140 9132    ins_pipe(ialu_reg_reg);
9141 9133  %}
9142 9134  
9143 9135  
9144 9136  // Logical Shift Right by variable
9145 9137  instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9146 9138    match(Set dst (URShiftI dst shift));
9147 9139    effect(KILL cr);
9148 9140  
9149 9141    size(2);
9150 9142    format %{ "SHR    $dst,$shift" %}
9151 9143    opcode(0xD3, 0x5);  /* D3 /5 */
9152 9144    ins_encode( OpcP, RegOpc( dst ) );
9153 9145    ins_pipe( ialu_reg_reg );
9154 9146  %}
9155 9147  
9156 9148  
9157 9149  //----------Logical Instructions-----------------------------------------------
9158 9150  //----------Integer Logical Instructions---------------------------------------
9159 9151  // And Instructions
9160 9152  // And Register with Register
9161 9153  instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9162 9154    match(Set dst (AndI dst src));
9163 9155    effect(KILL cr);
9164 9156  
9165 9157    size(2);
9166 9158    format %{ "AND    $dst,$src" %}
9167 9159    opcode(0x23);
9168 9160    ins_encode( OpcP, RegReg( dst, src) );
9169 9161    ins_pipe( ialu_reg_reg );
9170 9162  %}
9171 9163  
9172 9164  // And Register with Immediate
9173 9165  instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9174 9166    match(Set dst (AndI dst src));
9175 9167    effect(KILL cr);
9176 9168  
9177 9169    format %{ "AND    $dst,$src" %}
9178 9170    opcode(0x81,0x04);  /* Opcode 81 /4 */
9179 9171    // ins_encode( RegImm( dst, src) );
9180 9172    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9181 9173    ins_pipe( ialu_reg );
9182 9174  %}
9183 9175  
9184 9176  // And Register with Memory
9185 9177  instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9186 9178    match(Set dst (AndI dst (LoadI src)));
9187 9179    effect(KILL cr);
9188 9180  
9189 9181    ins_cost(125);
9190 9182    format %{ "AND    $dst,$src" %}
9191 9183    opcode(0x23);
9192 9184    ins_encode( OpcP, RegMem( dst, src) );
9193 9185    ins_pipe( ialu_reg_mem );
9194 9186  %}
9195 9187  
9196 9188  // And Memory with Register
9197 9189  instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9198 9190    match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9199 9191    effect(KILL cr);
9200 9192  
9201 9193    ins_cost(150);
9202 9194    format %{ "AND    $dst,$src" %}
9203 9195    opcode(0x21);  /* Opcode 21 /r */
9204 9196    ins_encode( OpcP, RegMem( src, dst ) );
9205 9197    ins_pipe( ialu_mem_reg );
9206 9198  %}
9207 9199  
9208 9200  // And Memory with Immediate
9209 9201  instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9210 9202    match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9211 9203    effect(KILL cr);
9212 9204  
9213 9205    ins_cost(125);
9214 9206    format %{ "AND    $dst,$src" %}
9215 9207    opcode(0x81, 0x4);  /* Opcode 81 /4 id */
9216 9208    // ins_encode( MemImm( dst, src) );
9217 9209    ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9218 9210    ins_pipe( ialu_mem_imm );
9219 9211  %}
9220 9212  
9221 9213  // Or Instructions
9222 9214  // Or Register with Register
9223 9215  instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9224 9216    match(Set dst (OrI dst src));
9225 9217    effect(KILL cr);
9226 9218  
9227 9219    size(2);
9228 9220    format %{ "OR     $dst,$src" %}
9229 9221    opcode(0x0B);
9230 9222    ins_encode( OpcP, RegReg( dst, src) );
9231 9223    ins_pipe( ialu_reg_reg );
9232 9224  %}
9233 9225  
9234 9226  instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
9235 9227    match(Set dst (OrI dst (CastP2X src)));
9236 9228    effect(KILL cr);
9237 9229  
9238 9230    size(2);
9239 9231    format %{ "OR     $dst,$src" %}
9240 9232    opcode(0x0B);
9241 9233    ins_encode( OpcP, RegReg( dst, src) );
9242 9234    ins_pipe( ialu_reg_reg );
9243 9235  %}
9244 9236  
9245 9237  
9246 9238  // Or Register with Immediate
9247 9239  instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9248 9240    match(Set dst (OrI dst src));
9249 9241    effect(KILL cr);
9250 9242  
9251 9243    format %{ "OR     $dst,$src" %}
9252 9244    opcode(0x81,0x01);  /* Opcode 81 /1 id */
9253 9245    // ins_encode( RegImm( dst, src) );
9254 9246    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9255 9247    ins_pipe( ialu_reg );
9256 9248  %}
9257 9249  
9258 9250  // Or Register with Memory
9259 9251  instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9260 9252    match(Set dst (OrI dst (LoadI src)));
9261 9253    effect(KILL cr);
9262 9254  
9263 9255    ins_cost(125);
9264 9256    format %{ "OR     $dst,$src" %}
9265 9257    opcode(0x0B);
9266 9258    ins_encode( OpcP, RegMem( dst, src) );
9267 9259    ins_pipe( ialu_reg_mem );
9268 9260  %}
9269 9261  
9270 9262  // Or Memory with Register
9271 9263  instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9272 9264    match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9273 9265    effect(KILL cr);
9274 9266  
9275 9267    ins_cost(150);
9276 9268    format %{ "OR     $dst,$src" %}
9277 9269    opcode(0x09);  /* Opcode 09 /r */
9278 9270    ins_encode( OpcP, RegMem( src, dst ) );
9279 9271    ins_pipe( ialu_mem_reg );
9280 9272  %}
9281 9273  
9282 9274  // Or Memory with Immediate
9283 9275  instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9284 9276    match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9285 9277    effect(KILL cr);
9286 9278  
9287 9279    ins_cost(125);
9288 9280    format %{ "OR     $dst,$src" %}
9289 9281    opcode(0x81,0x1);  /* Opcode 81 /1 id */
9290 9282    // ins_encode( MemImm( dst, src) );
9291 9283    ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9292 9284    ins_pipe( ialu_mem_imm );
9293 9285  %}
9294 9286  
9295 9287  // ROL/ROR
9296 9288  // ROL expand
9297 9289  instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9298 9290    effect(USE_DEF dst, USE shift, KILL cr);
9299 9291  
9300 9292    format %{ "ROL    $dst, $shift" %}
9301 9293    opcode(0xD1, 0x0); /* Opcode D1 /0 */
9302 9294    ins_encode( OpcP, RegOpc( dst ));
9303 9295    ins_pipe( ialu_reg );
9304 9296  %}
9305 9297  
9306 9298  instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9307 9299    effect(USE_DEF dst, USE shift, KILL cr);
9308 9300  
9309 9301    format %{ "ROL    $dst, $shift" %}
9310 9302    opcode(0xC1, 0x0); /*Opcode /C1  /0  */
9311 9303    ins_encode( RegOpcImm(dst, shift) );
9312 9304    ins_pipe(ialu_reg);
9313 9305  %}
9314 9306  
9315 9307  instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
9316 9308    effect(USE_DEF dst, USE shift, KILL cr);
9317 9309  
9318 9310    format %{ "ROL    $dst, $shift" %}
9319 9311    opcode(0xD3, 0x0);    /* Opcode D3 /0 */
9320 9312    ins_encode(OpcP, RegOpc(dst));
9321 9313    ins_pipe( ialu_reg_reg );
9322 9314  %}
9323 9315  // end of ROL expand
9324 9316  
9325 9317  // ROL 32bit by one once
9326 9318  instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
9327 9319    match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9328 9320  
9329 9321    expand %{
9330 9322      rolI_eReg_imm1(dst, lshift, cr);
9331 9323    %}
9332 9324  %}
9333 9325  
9334 9326  // ROL 32bit var by imm8 once
9335 9327  instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
9336 9328    predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9337 9329    match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9338 9330  
9339 9331    expand %{
9340 9332      rolI_eReg_imm8(dst, lshift, cr);
9341 9333    %}
9342 9334  %}
9343 9335  
9344 9336  // ROL 32bit var by var once
9345 9337  instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9346 9338    match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9347 9339  
9348 9340    expand %{
9349 9341      rolI_eReg_CL(dst, shift, cr);
9350 9342    %}
9351 9343  %}
9352 9344  
9353 9345  // ROL 32bit var by var once
9354 9346  instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9355 9347    match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9356 9348  
9357 9349    expand %{
9358 9350      rolI_eReg_CL(dst, shift, cr);
9359 9351    %}
9360 9352  %}
9361 9353  
9362 9354  // ROR expand
9363 9355  instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9364 9356    effect(USE_DEF dst, USE shift, KILL cr);
9365 9357  
9366 9358    format %{ "ROR    $dst, $shift" %}
9367 9359    opcode(0xD1,0x1);  /* Opcode D1 /1 */
9368 9360    ins_encode( OpcP, RegOpc( dst ) );
9369 9361    ins_pipe( ialu_reg );
9370 9362  %}
9371 9363  
9372 9364  instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9373 9365    effect (USE_DEF dst, USE shift, KILL cr);
9374 9366  
9375 9367    format %{ "ROR    $dst, $shift" %}
9376 9368    opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
9377 9369    ins_encode( RegOpcImm(dst, shift) );
9378 9370    ins_pipe( ialu_reg );
9379 9371  %}
9380 9372  
9381 9373  instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
9382 9374    effect(USE_DEF dst, USE shift, KILL cr);
9383 9375  
9384 9376    format %{ "ROR    $dst, $shift" %}
9385 9377    opcode(0xD3, 0x1);    /* Opcode D3 /1 */
9386 9378    ins_encode(OpcP, RegOpc(dst));
9387 9379    ins_pipe( ialu_reg_reg );
9388 9380  %}
9389 9381  // end of ROR expand
9390 9382  
9391 9383  // ROR right once
9392 9384  instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
9393 9385    match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9394 9386  
9395 9387    expand %{
9396 9388      rorI_eReg_imm1(dst, rshift, cr);
9397 9389    %}
9398 9390  %}
9399 9391  
9400 9392  // ROR 32bit by immI8 once
9401 9393  instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
9402 9394    predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9403 9395    match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9404 9396  
9405 9397    expand %{
9406 9398      rorI_eReg_imm8(dst, rshift, cr);
9407 9399    %}
9408 9400  %}
9409 9401  
9410 9402  // ROR 32bit var by var once
9411 9403  instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9412 9404    match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9413 9405  
9414 9406    expand %{
9415 9407      rorI_eReg_CL(dst, shift, cr);
9416 9408    %}
9417 9409  %}
9418 9410  
9419 9411  // ROR 32bit var by var once
9420 9412  instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9421 9413    match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9422 9414  
9423 9415    expand %{
9424 9416      rorI_eReg_CL(dst, shift, cr);
9425 9417    %}
9426 9418  %}
9427 9419  
9428 9420  // Xor Instructions
9429 9421  // Xor Register with Register
9430 9422  instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9431 9423    match(Set dst (XorI dst src));
9432 9424    effect(KILL cr);
9433 9425  
9434 9426    size(2);
9435 9427    format %{ "XOR    $dst,$src" %}
9436 9428    opcode(0x33);
9437 9429    ins_encode( OpcP, RegReg( dst, src) );
9438 9430    ins_pipe( ialu_reg_reg );
9439 9431  %}
9440 9432  
9441 9433  // Xor Register with Immediate -1
9442 9434  instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
9443 9435    match(Set dst (XorI dst imm));  
9444 9436  
9445 9437    size(2);
9446 9438    format %{ "NOT    $dst" %}  
9447 9439    ins_encode %{
9448 9440       __ notl($dst$$Register);
9449 9441    %}
9450 9442    ins_pipe( ialu_reg );
9451 9443  %}
9452 9444  
9453 9445  // Xor Register with Immediate
9454 9446  instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9455 9447    match(Set dst (XorI dst src));
9456 9448    effect(KILL cr);
9457 9449  
9458 9450    format %{ "XOR    $dst,$src" %}
9459 9451    opcode(0x81,0x06);  /* Opcode 81 /6 id */
9460 9452    // ins_encode( RegImm( dst, src) );
9461 9453    ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9462 9454    ins_pipe( ialu_reg );
9463 9455  %}
9464 9456  
9465 9457  // Xor Register with Memory
9466 9458  instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9467 9459    match(Set dst (XorI dst (LoadI src)));
9468 9460    effect(KILL cr);
9469 9461  
9470 9462    ins_cost(125);
9471 9463    format %{ "XOR    $dst,$src" %}
9472 9464    opcode(0x33);
9473 9465    ins_encode( OpcP, RegMem(dst, src) );
9474 9466    ins_pipe( ialu_reg_mem );
9475 9467  %}
9476 9468  
9477 9469  // Xor Memory with Register
9478 9470  instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9479 9471    match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9480 9472    effect(KILL cr);
9481 9473  
9482 9474    ins_cost(150);
9483 9475    format %{ "XOR    $dst,$src" %}
9484 9476    opcode(0x31);  /* Opcode 31 /r */
9485 9477    ins_encode( OpcP, RegMem( src, dst ) );
9486 9478    ins_pipe( ialu_mem_reg );
9487 9479  %}
9488 9480  
9489 9481  // Xor Memory with Immediate
9490 9482  instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9491 9483    match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9492 9484    effect(KILL cr);
9493 9485  
9494 9486    ins_cost(125);
9495 9487    format %{ "XOR    $dst,$src" %}
9496 9488    opcode(0x81,0x6);  /* Opcode 81 /6 id */
9497 9489    ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9498 9490    ins_pipe( ialu_mem_imm );
9499 9491  %}
9500 9492  
9501 9493  //----------Convert Int to Boolean---------------------------------------------
9502 9494  
9503 9495  instruct movI_nocopy(eRegI dst, eRegI src) %{
9504 9496    effect( DEF dst, USE src );
9505 9497    format %{ "MOV    $dst,$src" %}
9506 9498    ins_encode( enc_Copy( dst, src) );
9507 9499    ins_pipe( ialu_reg_reg );
9508 9500  %}
9509 9501  
9510 9502  instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
9511 9503    effect( USE_DEF dst, USE src, KILL cr );
9512 9504  
9513 9505    size(4);
9514 9506    format %{ "NEG    $dst\n\t"
9515 9507              "ADC    $dst,$src" %}
9516 9508    ins_encode( neg_reg(dst),
9517 9509                OpcRegReg(0x13,dst,src) );
9518 9510    ins_pipe( ialu_reg_reg_long );
9519 9511  %}
9520 9512  
9521 9513  instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
9522 9514    match(Set dst (Conv2B src));
9523 9515  
9524 9516    expand %{
9525 9517      movI_nocopy(dst,src);
9526 9518      ci2b(dst,src,cr);
9527 9519    %}
9528 9520  %}
9529 9521  
9530 9522  instruct movP_nocopy(eRegI dst, eRegP src) %{
9531 9523    effect( DEF dst, USE src );
9532 9524    format %{ "MOV    $dst,$src" %}
9533 9525    ins_encode( enc_Copy( dst, src) );
9534 9526    ins_pipe( ialu_reg_reg );
9535 9527  %}
9536 9528  
9537 9529  instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
9538 9530    effect( USE_DEF dst, USE src, KILL cr );
9539 9531    format %{ "NEG    $dst\n\t"
9540 9532              "ADC    $dst,$src" %}
9541 9533    ins_encode( neg_reg(dst),
9542 9534                OpcRegReg(0x13,dst,src) );
9543 9535    ins_pipe( ialu_reg_reg_long );
9544 9536  %}
9545 9537  
9546 9538  instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
9547 9539    match(Set dst (Conv2B src));
9548 9540  
9549 9541    expand %{
9550 9542      movP_nocopy(dst,src);
9551 9543      cp2b(dst,src,cr);
9552 9544    %}
9553 9545  %}
9554 9546  
9555 9547  instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
9556 9548    match(Set dst (CmpLTMask p q));
9557 9549    effect( KILL cr );
9558 9550    ins_cost(400);
9559 9551  
9560 9552    // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
9561 9553    format %{ "XOR    $dst,$dst\n\t"
9562 9554              "CMP    $p,$q\n\t"
9563 9555              "SETlt  $dst\n\t"
9564 9556              "NEG    $dst" %}
9565 9557    ins_encode( OpcRegReg(0x33,dst,dst),
9566 9558                OpcRegReg(0x3B,p,q),
9567 9559                setLT_reg(dst), neg_reg(dst) );
9568 9560    ins_pipe( pipe_slow );
9569 9561  %}
9570 9562  
9571 9563  instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
9572 9564    match(Set dst (CmpLTMask dst zero));
9573 9565    effect( DEF dst, KILL cr );
9574 9566    ins_cost(100);
9575 9567  
9576 9568    format %{ "SAR    $dst,31" %}
9577 9569    opcode(0xC1, 0x7);  /* C1 /7 ib */
9578 9570    ins_encode( RegOpcImm( dst, 0x1F ) );
9579 9571    ins_pipe( ialu_reg );
9580 9572  %}
9581 9573  
9582 9574  
9583 9575  instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
9584 9576    match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9585 9577    effect( KILL tmp, KILL cr );
9586 9578    ins_cost(400);
9587 9579    // annoyingly, $tmp has no edges so you cant ask for it in
9588 9580    // any format or encoding
9589 9581    format %{ "SUB    $p,$q\n\t"
9590 9582              "SBB    ECX,ECX\n\t"
9591 9583              "AND    ECX,$y\n\t"
9592 9584              "ADD    $p,ECX" %}
9593 9585    ins_encode( enc_cmpLTP(p,q,y,tmp) );
9594 9586    ins_pipe( pipe_cmplt );
9595 9587  %}
9596 9588  
9597 9589  /* If I enable this, I encourage spilling in the inner loop of compress.
9598 9590  instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
9599 9591    match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9600 9592    effect( USE_KILL tmp, KILL cr );
9601 9593    ins_cost(400);
9602 9594  
9603 9595    format %{ "SUB    $p,$q\n\t"
9604 9596              "SBB    ECX,ECX\n\t"
9605 9597              "AND    ECX,$y\n\t"
9606 9598              "ADD    $p,ECX" %}
9607 9599    ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9608 9600  %}
9609 9601  */
9610 9602  
9611 9603  //----------Long Instructions------------------------------------------------
9612 9604  // Add Long Register with Register
9613 9605  instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9614 9606    match(Set dst (AddL dst src));
9615 9607    effect(KILL cr);
9616 9608    ins_cost(200);
9617 9609    format %{ "ADD    $dst.lo,$src.lo\n\t"
9618 9610              "ADC    $dst.hi,$src.hi" %}
9619 9611    opcode(0x03, 0x13);
9620 9612    ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9621 9613    ins_pipe( ialu_reg_reg_long );
9622 9614  %}
9623 9615  
9624 9616  // Add Long Register with Immediate
9625 9617  instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9626 9618    match(Set dst (AddL dst src));
9627 9619    effect(KILL cr);
9628 9620    format %{ "ADD    $dst.lo,$src.lo\n\t"
9629 9621              "ADC    $dst.hi,$src.hi" %}
9630 9622    opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
9631 9623    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9632 9624    ins_pipe( ialu_reg_long );
9633 9625  %}
9634 9626  
9635 9627  // Add Long Register with Memory
9636 9628  instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9637 9629    match(Set dst (AddL dst (LoadL mem)));
9638 9630    effect(KILL cr);
9639 9631    ins_cost(125);
9640 9632    format %{ "ADD    $dst.lo,$mem\n\t"
9641 9633              "ADC    $dst.hi,$mem+4" %}
9642 9634    opcode(0x03, 0x13);
9643 9635    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9644 9636    ins_pipe( ialu_reg_long_mem );
9645 9637  %}
9646 9638  
9647 9639  // Subtract Long Register with Register.
9648 9640  instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9649 9641    match(Set dst (SubL dst src));
9650 9642    effect(KILL cr);
9651 9643    ins_cost(200);
9652 9644    format %{ "SUB    $dst.lo,$src.lo\n\t"
9653 9645              "SBB    $dst.hi,$src.hi" %}
9654 9646    opcode(0x2B, 0x1B);
9655 9647    ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9656 9648    ins_pipe( ialu_reg_reg_long );
9657 9649  %}
9658 9650  
9659 9651  // Subtract Long Register with Immediate
9660 9652  instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9661 9653    match(Set dst (SubL dst src));
9662 9654    effect(KILL cr);
9663 9655    format %{ "SUB    $dst.lo,$src.lo\n\t"
9664 9656              "SBB    $dst.hi,$src.hi" %}
9665 9657    opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9666 9658    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9667 9659    ins_pipe( ialu_reg_long );
9668 9660  %}
9669 9661  
9670 9662  // Subtract Long Register with Memory
9671 9663  instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9672 9664    match(Set dst (SubL dst (LoadL mem)));
9673 9665    effect(KILL cr);
9674 9666    ins_cost(125);
9675 9667    format %{ "SUB    $dst.lo,$mem\n\t"
9676 9668              "SBB    $dst.hi,$mem+4" %}
9677 9669    opcode(0x2B, 0x1B);
9678 9670    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9679 9671    ins_pipe( ialu_reg_long_mem );
9680 9672  %}
9681 9673  
9682 9674  instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9683 9675    match(Set dst (SubL zero dst));
9684 9676    effect(KILL cr);
9685 9677    ins_cost(300);
9686 9678    format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9687 9679    ins_encode( neg_long(dst) );
9688 9680    ins_pipe( ialu_reg_reg_long );
9689 9681  %}
9690 9682  
9691 9683  // And Long Register with Register
9692 9684  instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9693 9685    match(Set dst (AndL dst src));
9694 9686    effect(KILL cr);
9695 9687    format %{ "AND    $dst.lo,$src.lo\n\t"
9696 9688              "AND    $dst.hi,$src.hi" %}
9697 9689    opcode(0x23,0x23);
9698 9690    ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9699 9691    ins_pipe( ialu_reg_reg_long );
9700 9692  %}
9701 9693  
9702 9694  // And Long Register with Immediate
9703 9695  instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9704 9696    match(Set dst (AndL dst src));
9705 9697    effect(KILL cr);
9706 9698    format %{ "AND    $dst.lo,$src.lo\n\t"
9707 9699              "AND    $dst.hi,$src.hi" %}
9708 9700    opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9709 9701    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9710 9702    ins_pipe( ialu_reg_long );
9711 9703  %}
9712 9704  
9713 9705  // And Long Register with Memory
9714 9706  instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9715 9707    match(Set dst (AndL dst (LoadL mem)));
9716 9708    effect(KILL cr);
9717 9709    ins_cost(125);
9718 9710    format %{ "AND    $dst.lo,$mem\n\t"
9719 9711              "AND    $dst.hi,$mem+4" %}
9720 9712    opcode(0x23, 0x23);
9721 9713    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9722 9714    ins_pipe( ialu_reg_long_mem );
9723 9715  %}
9724 9716  
9725 9717  // Or Long Register with Register
9726 9718  instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9727 9719    match(Set dst (OrL dst src));
9728 9720    effect(KILL cr);
9729 9721    format %{ "OR     $dst.lo,$src.lo\n\t"
9730 9722              "OR     $dst.hi,$src.hi" %}
9731 9723    opcode(0x0B,0x0B);
9732 9724    ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9733 9725    ins_pipe( ialu_reg_reg_long );
9734 9726  %}
9735 9727  
9736 9728  // Or Long Register with Immediate
9737 9729  instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9738 9730    match(Set dst (OrL dst src));
9739 9731    effect(KILL cr);
9740 9732    format %{ "OR     $dst.lo,$src.lo\n\t"
9741 9733              "OR     $dst.hi,$src.hi" %}
9742 9734    opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9743 9735    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9744 9736    ins_pipe( ialu_reg_long );
9745 9737  %}
9746 9738  
9747 9739  // Or Long Register with Memory
9748 9740  instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9749 9741    match(Set dst (OrL dst (LoadL mem)));
9750 9742    effect(KILL cr);
9751 9743    ins_cost(125);
9752 9744    format %{ "OR     $dst.lo,$mem\n\t"
9753 9745              "OR     $dst.hi,$mem+4" %}
9754 9746    opcode(0x0B,0x0B);
9755 9747    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9756 9748    ins_pipe( ialu_reg_long_mem );
9757 9749  %}
9758 9750  
9759 9751  // Xor Long Register with Register
9760 9752  instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9761 9753    match(Set dst (XorL dst src));
9762 9754    effect(KILL cr);
9763 9755    format %{ "XOR    $dst.lo,$src.lo\n\t"
9764 9756              "XOR    $dst.hi,$src.hi" %}
9765 9757    opcode(0x33,0x33);
9766 9758    ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9767 9759    ins_pipe( ialu_reg_reg_long );
9768 9760  %}
9769 9761  
9770 9762  // Xor Long Register with Immediate -1
9771 9763  instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9772 9764    match(Set dst (XorL dst imm));  
9773 9765    format %{ "NOT    $dst.lo\n\t"
9774 9766              "NOT    $dst.hi" %}
9775 9767    ins_encode %{
9776 9768       __ notl($dst$$Register);
9777 9769       __ notl(HIGH_FROM_LOW($dst$$Register));
9778 9770    %}
9779 9771    ins_pipe( ialu_reg_long );
9780 9772  %}
9781 9773  
9782 9774  // Xor Long Register with Immediate
9783 9775  instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9784 9776    match(Set dst (XorL dst src));
9785 9777    effect(KILL cr);
9786 9778    format %{ "XOR    $dst.lo,$src.lo\n\t"
9787 9779              "XOR    $dst.hi,$src.hi" %}
9788 9780    opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9789 9781    ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9790 9782    ins_pipe( ialu_reg_long );
9791 9783  %}
9792 9784  
9793 9785  // Xor Long Register with Memory
9794 9786  instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9795 9787    match(Set dst (XorL dst (LoadL mem)));
9796 9788    effect(KILL cr);
9797 9789    ins_cost(125);
9798 9790    format %{ "XOR    $dst.lo,$mem\n\t"
9799 9791              "XOR    $dst.hi,$mem+4" %}
9800 9792    opcode(0x33,0x33);
9801 9793    ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9802 9794    ins_pipe( ialu_reg_long_mem );
9803 9795  %}
9804 9796  
9805 9797  // Shift Left Long by 1
9806 9798  instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9807 9799    predicate(UseNewLongLShift);
9808 9800    match(Set dst (LShiftL dst cnt));
9809 9801    effect(KILL cr);
9810 9802    ins_cost(100);
9811 9803    format %{ "ADD    $dst.lo,$dst.lo\n\t"
9812 9804              "ADC    $dst.hi,$dst.hi" %}
9813 9805    ins_encode %{
9814 9806      __ addl($dst$$Register,$dst$$Register);
9815 9807      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9816 9808    %}
9817 9809    ins_pipe( ialu_reg_long );
9818 9810  %}
9819 9811  
9820 9812  // Shift Left Long by 2
9821 9813  instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9822 9814    predicate(UseNewLongLShift);
9823 9815    match(Set dst (LShiftL dst cnt));
9824 9816    effect(KILL cr);
9825 9817    ins_cost(100);
9826 9818    format %{ "ADD    $dst.lo,$dst.lo\n\t"
9827 9819              "ADC    $dst.hi,$dst.hi\n\t" 
9828 9820              "ADD    $dst.lo,$dst.lo\n\t"
9829 9821              "ADC    $dst.hi,$dst.hi" %}
9830 9822    ins_encode %{
9831 9823      __ addl($dst$$Register,$dst$$Register);
9832 9824      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9833 9825      __ addl($dst$$Register,$dst$$Register);
9834 9826      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9835 9827    %}
9836 9828    ins_pipe( ialu_reg_long );
9837 9829  %}
9838 9830  
9839 9831  // Shift Left Long by 3
9840 9832  instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9841 9833    predicate(UseNewLongLShift);
9842 9834    match(Set dst (LShiftL dst cnt));
9843 9835    effect(KILL cr);
9844 9836    ins_cost(100);
9845 9837    format %{ "ADD    $dst.lo,$dst.lo\n\t"
9846 9838              "ADC    $dst.hi,$dst.hi\n\t" 
9847 9839              "ADD    $dst.lo,$dst.lo\n\t"
9848 9840              "ADC    $dst.hi,$dst.hi\n\t" 
9849 9841              "ADD    $dst.lo,$dst.lo\n\t"
9850 9842              "ADC    $dst.hi,$dst.hi" %}
9851 9843    ins_encode %{
9852 9844      __ addl($dst$$Register,$dst$$Register);
9853 9845      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9854 9846      __ addl($dst$$Register,$dst$$Register);
9855 9847      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9856 9848      __ addl($dst$$Register,$dst$$Register);
9857 9849      __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9858 9850    %}
9859 9851    ins_pipe( ialu_reg_long );
9860 9852  %}
9861 9853  
9862 9854  // Shift Left Long by 1-31
9863 9855  instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9864 9856    match(Set dst (LShiftL dst cnt));
9865 9857    effect(KILL cr);
9866 9858    ins_cost(200);
9867 9859    format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9868 9860              "SHL    $dst.lo,$cnt" %}
9869 9861    opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9870 9862    ins_encode( move_long_small_shift(dst,cnt) );
9871 9863    ins_pipe( ialu_reg_long );
9872 9864  %}
9873 9865  
9874 9866  // Shift Left Long by 32-63
9875 9867  instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9876 9868    match(Set dst (LShiftL dst cnt));
9877 9869    effect(KILL cr);
9878 9870    ins_cost(300);
9879 9871    format %{ "MOV    $dst.hi,$dst.lo\n"
9880 9872            "\tSHL    $dst.hi,$cnt-32\n"
9881 9873            "\tXOR    $dst.lo,$dst.lo" %}
9882 9874    opcode(0xC1, 0x4);  /* C1 /4 ib */
9883 9875    ins_encode( move_long_big_shift_clr(dst,cnt) );
9884 9876    ins_pipe( ialu_reg_long );
9885 9877  %}
9886 9878  
9887 9879  // Shift Left Long by variable
9888 9880  instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9889 9881    match(Set dst (LShiftL dst shift));
9890 9882    effect(KILL cr);
9891 9883    ins_cost(500+200);
9892 9884    size(17);
9893 9885    format %{ "TEST   $shift,32\n\t"
9894 9886              "JEQ,s  small\n\t"
9895 9887              "MOV    $dst.hi,$dst.lo\n\t"
9896 9888              "XOR    $dst.lo,$dst.lo\n"
9897 9889      "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9898 9890              "SHL    $dst.lo,$shift" %}
9899 9891    ins_encode( shift_left_long( dst, shift ) );
9900 9892    ins_pipe( pipe_slow );
9901 9893  %}
9902 9894  
9903 9895  // Shift Right Long by 1-31
9904 9896  instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9905 9897    match(Set dst (URShiftL dst cnt));
9906 9898    effect(KILL cr);
9907 9899    ins_cost(200);
9908 9900    format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9909 9901              "SHR    $dst.hi,$cnt" %}
9910 9902    opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9911 9903    ins_encode( move_long_small_shift(dst,cnt) );
9912 9904    ins_pipe( ialu_reg_long );
9913 9905  %}
9914 9906  
9915 9907  // Shift Right Long by 32-63
9916 9908  instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9917 9909    match(Set dst (URShiftL dst cnt));
9918 9910    effect(KILL cr);
9919 9911    ins_cost(300);
9920 9912    format %{ "MOV    $dst.lo,$dst.hi\n"
9921 9913            "\tSHR    $dst.lo,$cnt-32\n"
9922 9914            "\tXOR    $dst.hi,$dst.hi" %}
9923 9915    opcode(0xC1, 0x5);  /* C1 /5 ib */
9924 9916    ins_encode( move_long_big_shift_clr(dst,cnt) );
9925 9917    ins_pipe( ialu_reg_long );
9926 9918  %}
9927 9919  
9928 9920  // Shift Right Long by variable
9929 9921  instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9930 9922    match(Set dst (URShiftL dst shift));
9931 9923    effect(KILL cr);
9932 9924    ins_cost(600);
9933 9925    size(17);
9934 9926    format %{ "TEST   $shift,32\n\t"
9935 9927              "JEQ,s  small\n\t"
9936 9928              "MOV    $dst.lo,$dst.hi\n\t"
9937 9929              "XOR    $dst.hi,$dst.hi\n"
9938 9930      "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9939 9931              "SHR    $dst.hi,$shift" %}
9940 9932    ins_encode( shift_right_long( dst, shift ) );
9941 9933    ins_pipe( pipe_slow );
9942 9934  %}
9943 9935  
9944 9936  // Shift Right Long by 1-31
9945 9937  instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9946 9938    match(Set dst (RShiftL dst cnt));
9947 9939    effect(KILL cr);
9948 9940    ins_cost(200);
9949 9941    format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9950 9942              "SAR    $dst.hi,$cnt" %}
9951 9943    opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9952 9944    ins_encode( move_long_small_shift(dst,cnt) );
9953 9945    ins_pipe( ialu_reg_long );
9954 9946  %}
9955 9947  
9956 9948  // Shift Right Long by 32-63
9957 9949  instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9958 9950    match(Set dst (RShiftL dst cnt));
9959 9951    effect(KILL cr);
9960 9952    ins_cost(300);
9961 9953    format %{ "MOV    $dst.lo,$dst.hi\n"
9962 9954            "\tSAR    $dst.lo,$cnt-32\n"
9963 9955            "\tSAR    $dst.hi,31" %}
9964 9956    opcode(0xC1, 0x7);  /* C1 /7 ib */
9965 9957    ins_encode( move_long_big_shift_sign(dst,cnt) );
9966 9958    ins_pipe( ialu_reg_long );
9967 9959  %}
9968 9960  
9969 9961  // Shift Right arithmetic Long by variable
9970 9962  instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9971 9963    match(Set dst (RShiftL dst shift));
9972 9964    effect(KILL cr);
9973 9965    ins_cost(600);
9974 9966    size(18);
9975 9967    format %{ "TEST   $shift,32\n\t"
9976 9968              "JEQ,s  small\n\t"
9977 9969              "MOV    $dst.lo,$dst.hi\n\t"
9978 9970              "SAR    $dst.hi,31\n"
9979 9971      "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9980 9972              "SAR    $dst.hi,$shift" %}
9981 9973    ins_encode( shift_right_arith_long( dst, shift ) );
9982 9974    ins_pipe( pipe_slow );
9983 9975  %}
9984 9976  
9985 9977  
9986 9978  //----------Double Instructions------------------------------------------------
9987 9979  // Double Math
9988 9980  
9989 9981  // Compare & branch
9990 9982  
9991 9983  // P6 version of float compare, sets condition codes in EFLAGS
9992 9984  instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9993 9985    predicate(VM_Version::supports_cmov() && UseSSE <=1);
9994 9986    match(Set cr (CmpD src1 src2));
9995 9987    effect(KILL rax);
9996 9988    ins_cost(150);
9997 9989    format %{ "FLD    $src1\n\t"
9998 9990              "FUCOMIP ST,$src2  // P6 instruction\n\t"
9999 9991              "JNP    exit\n\t"
10000 9992              "MOV    ah,1       // saw a NaN, set CF\n\t"
10001 9993              "SAHF\n"
10002 9994       "exit:\tNOP               // avoid branch to branch" %}
10003 9995    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10004 9996    ins_encode( Push_Reg_D(src1),
10005 9997                OpcP, RegOpc(src2),
10006 9998                cmpF_P6_fixup );
10007 9999    ins_pipe( pipe_slow );
10008 10000  %}
10009 10001  
10010 10002  instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
10011 10003    predicate(VM_Version::supports_cmov() && UseSSE <=1);
10012 10004    match(Set cr (CmpD src1 src2));
10013 10005    ins_cost(150);
10014 10006    format %{ "FLD    $src1\n\t"
10015 10007              "FUCOMIP ST,$src2  // P6 instruction" %}
10016 10008    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10017 10009    ins_encode( Push_Reg_D(src1),
10018 10010                OpcP, RegOpc(src2));
10019 10011    ins_pipe( pipe_slow );
10020 10012  %}
10021 10013  
10022 10014  // Compare & branch
10023 10015  instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
10024 10016    predicate(UseSSE<=1);
10025 10017    match(Set cr (CmpD src1 src2));
10026 10018    effect(KILL rax);
10027 10019    ins_cost(200);
10028 10020    format %{ "FLD    $src1\n\t"
10029 10021              "FCOMp  $src2\n\t"
10030 10022              "FNSTSW AX\n\t"
10031 10023              "TEST   AX,0x400\n\t"
10032 10024              "JZ,s   flags\n\t"
10033 10025              "MOV    AH,1\t# unordered treat as LT\n"
10034 10026      "flags:\tSAHF" %}
10035 10027    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10036 10028    ins_encode( Push_Reg_D(src1),
10037 10029                OpcP, RegOpc(src2),
10038 10030                fpu_flags);
10039 10031    ins_pipe( pipe_slow );
10040 10032  %}
10041 10033  
10042 10034  // Compare vs zero into -1,0,1
10043 10035  instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
10044 10036    predicate(UseSSE<=1);
10045 10037    match(Set dst (CmpD3 src1 zero));
10046 10038    effect(KILL cr, KILL rax);
10047 10039    ins_cost(280);
10048 10040    format %{ "FTSTD  $dst,$src1" %}
10049 10041    opcode(0xE4, 0xD9);
10050 10042    ins_encode( Push_Reg_D(src1),
10051 10043                OpcS, OpcP, PopFPU,
10052 10044                CmpF_Result(dst));
10053 10045    ins_pipe( pipe_slow );
10054 10046  %}
10055 10047  
10056 10048  // Compare into -1,0,1
10057 10049  instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
10058 10050    predicate(UseSSE<=1);
10059 10051    match(Set dst (CmpD3 src1 src2));
10060 10052    effect(KILL cr, KILL rax);
10061 10053    ins_cost(300);
10062 10054    format %{ "FCMPD  $dst,$src1,$src2" %}
10063 10055    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10064 10056    ins_encode( Push_Reg_D(src1),
10065 10057                OpcP, RegOpc(src2),
10066 10058                CmpF_Result(dst));
10067 10059    ins_pipe( pipe_slow );
10068 10060  %}
10069 10061  
10070 10062  // float compare and set condition codes in EFLAGS by XMM regs
10071 10063  instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
10072 10064    predicate(UseSSE>=2);
10073 10065    match(Set cr (CmpD dst src));
10074 10066    effect(KILL rax);
10075 10067    ins_cost(125);
10076 10068    format %{ "COMISD $dst,$src\n"
10077 10069            "\tJNP    exit\n"
10078 10070            "\tMOV    ah,1       // saw a NaN, set CF\n"
10079 10071            "\tSAHF\n"
10080 10072       "exit:\tNOP               // avoid branch to branch" %}
10081 10073    opcode(0x66, 0x0F, 0x2F);
10082 10074    ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
10083 10075    ins_pipe( pipe_slow );
10084 10076  %}
10085 10077  
10086 10078  instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
10087 10079    predicate(UseSSE>=2);
10088 10080    match(Set cr (CmpD dst src));
10089 10081    ins_cost(100);
10090 10082    format %{ "COMISD $dst,$src" %}
10091 10083    opcode(0x66, 0x0F, 0x2F);
10092 10084    ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
10093 10085    ins_pipe( pipe_slow );
10094 10086  %}
10095 10087  
10096 10088  // float compare and set condition codes in EFLAGS by XMM regs
10097 10089  instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
10098 10090    predicate(UseSSE>=2);
10099 10091    match(Set cr (CmpD dst (LoadD src)));
10100 10092    effect(KILL rax);
10101 10093    ins_cost(145);
10102 10094    format %{ "COMISD $dst,$src\n"
10103 10095            "\tJNP    exit\n"
10104 10096            "\tMOV    ah,1       // saw a NaN, set CF\n"
10105 10097            "\tSAHF\n"
10106 10098       "exit:\tNOP               // avoid branch to branch" %}
10107 10099    opcode(0x66, 0x0F, 0x2F);
10108 10100    ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
10109 10101    ins_pipe( pipe_slow );
10110 10102  %}
10111 10103  
10112 10104  instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
10113 10105    predicate(UseSSE>=2);
10114 10106    match(Set cr (CmpD dst (LoadD src)));
10115 10107    ins_cost(100);
10116 10108    format %{ "COMISD $dst,$src" %}
10117 10109    opcode(0x66, 0x0F, 0x2F);
10118 10110    ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
10119 10111    ins_pipe( pipe_slow );
10120 10112  %}
10121 10113  
10122 10114  // Compare into -1,0,1 in XMM
10123 10115  instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
10124 10116    predicate(UseSSE>=2);
10125 10117    match(Set dst (CmpD3 src1 src2));
10126 10118    effect(KILL cr);
10127 10119    ins_cost(255);
10128 10120    format %{ "XOR    $dst,$dst\n"
10129 10121            "\tCOMISD $src1,$src2\n"
10130 10122            "\tJP,s   nan\n"
10131 10123            "\tJEQ,s  exit\n"
10132 10124            "\tJA,s   inc\n"
10133 10125        "nan:\tDEC    $dst\n"
10134 10126            "\tJMP,s  exit\n"
10135 10127        "inc:\tINC    $dst\n"
10136 10128        "exit:"
10137 10129                  %}
10138 10130    opcode(0x66, 0x0F, 0x2F);
10139 10131    ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
10140 10132               CmpX_Result(dst));
10141 10133    ins_pipe( pipe_slow );
10142 10134  %}
10143 10135  
10144 10136  // Compare into -1,0,1 in XMM and memory
10145 10137  instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
10146 10138    predicate(UseSSE>=2);
10147 10139    match(Set dst (CmpD3 src1 (LoadD mem)));
10148 10140    effect(KILL cr);
10149 10141    ins_cost(275);
10150 10142    format %{ "COMISD $src1,$mem\n"
10151 10143            "\tMOV    $dst,0\t\t# do not blow flags\n"
10152 10144            "\tJP,s   nan\n"
10153 10145            "\tJEQ,s  exit\n"
10154 10146            "\tJA,s   inc\n"
10155 10147        "nan:\tDEC    $dst\n"
10156 10148            "\tJMP,s  exit\n"
10157 10149        "inc:\tINC    $dst\n"
10158 10150        "exit:"
10159 10151                  %}
10160 10152    opcode(0x66, 0x0F, 0x2F);
10161 10153    ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
10162 10154               LdImmI(dst,0x0), CmpX_Result(dst));
10163 10155    ins_pipe( pipe_slow );
10164 10156  %}
10165 10157  
10166 10158  
10167 10159  instruct subD_reg(regD dst, regD src) %{
10168 10160    predicate (UseSSE <=1);
10169 10161    match(Set dst (SubD dst src));
10170 10162  
10171 10163    format %{ "FLD    $src\n\t"
10172 10164              "DSUBp  $dst,ST" %}
10173 10165    opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10174 10166    ins_cost(150);
10175 10167    ins_encode( Push_Reg_D(src),
10176 10168                OpcP, RegOpc(dst) );
10177 10169    ins_pipe( fpu_reg_reg );
10178 10170  %}
10179 10171  
10180 10172  instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10181 10173    predicate (UseSSE <=1);
10182 10174    match(Set dst (RoundDouble (SubD src1 src2)));
10183 10175    ins_cost(250);
10184 10176  
10185 10177    format %{ "FLD    $src2\n\t"
10186 10178              "DSUB   ST,$src1\n\t"
10187 10179              "FSTP_D $dst\t# D-round" %}
10188 10180    opcode(0xD8, 0x5);
10189 10181    ins_encode( Push_Reg_D(src2),
10190 10182                OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10191 10183    ins_pipe( fpu_mem_reg_reg );
10192 10184  %}
10193 10185  
10194 10186  
10195 10187  instruct subD_reg_mem(regD dst, memory src) %{
10196 10188    predicate (UseSSE <=1);
10197 10189    match(Set dst (SubD dst (LoadD src)));
10198 10190    ins_cost(150);
10199 10191  
10200 10192    format %{ "FLD    $src\n\t"
10201 10193              "DSUBp  $dst,ST" %}
10202 10194    opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
10203 10195    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10204 10196                OpcP, RegOpc(dst) );
10205 10197    ins_pipe( fpu_reg_mem );
10206 10198  %}
10207 10199  
10208 10200  instruct absD_reg(regDPR1 dst, regDPR1 src) %{
10209 10201    predicate (UseSSE<=1);
10210 10202    match(Set dst (AbsD src));
10211 10203    ins_cost(100);
10212 10204    format %{ "FABS" %}
10213 10205    opcode(0xE1, 0xD9);
10214 10206    ins_encode( OpcS, OpcP );
10215 10207    ins_pipe( fpu_reg_reg );
10216 10208  %}
10217 10209  
10218 10210  instruct absXD_reg( regXD dst ) %{
10219 10211    predicate(UseSSE>=2);
10220 10212    match(Set dst (AbsD dst));
10221 10213    format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
10222 10214    ins_encode( AbsXD_encoding(dst));
10223 10215    ins_pipe( pipe_slow );
10224 10216  %}
10225 10217  
10226 10218  instruct negD_reg(regDPR1 dst, regDPR1 src) %{
10227 10219    predicate(UseSSE<=1);
10228 10220    match(Set dst (NegD src));
10229 10221    ins_cost(100);
10230 10222    format %{ "FCHS" %}
10231 10223    opcode(0xE0, 0xD9);
10232 10224    ins_encode( OpcS, OpcP );
10233 10225    ins_pipe( fpu_reg_reg );
10234 10226  %}
10235 10227  
10236 10228  instruct negXD_reg( regXD dst ) %{
10237 10229    predicate(UseSSE>=2);
10238 10230    match(Set dst (NegD dst));
10239 10231    format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
10240 10232    ins_encode %{
10241 10233       __ xorpd($dst$$XMMRegister,
10242 10234                ExternalAddress((address)double_signflip_pool));
10243 10235    %}
10244 10236    ins_pipe( pipe_slow );
10245 10237  %}
10246 10238  
10247 10239  instruct addD_reg(regD dst, regD src) %{
10248 10240    predicate(UseSSE<=1);
10249 10241    match(Set dst (AddD dst src));
10250 10242    format %{ "FLD    $src\n\t"
10251 10243              "DADD   $dst,ST" %}
10252 10244    size(4);
10253 10245    ins_cost(150);
10254 10246    opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10255 10247    ins_encode( Push_Reg_D(src),
10256 10248                OpcP, RegOpc(dst) );
10257 10249    ins_pipe( fpu_reg_reg );
10258 10250  %}
10259 10251  
10260 10252  
10261 10253  instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10262 10254    predicate(UseSSE<=1);
10263 10255    match(Set dst (RoundDouble (AddD src1 src2)));
10264 10256    ins_cost(250);
10265 10257  
10266 10258    format %{ "FLD    $src2\n\t"
10267 10259              "DADD   ST,$src1\n\t"
10268 10260              "FSTP_D $dst\t# D-round" %}
10269 10261    opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
10270 10262    ins_encode( Push_Reg_D(src2),
10271 10263                OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10272 10264    ins_pipe( fpu_mem_reg_reg );
10273 10265  %}
10274 10266  
10275 10267  
10276 10268  instruct addD_reg_mem(regD dst, memory src) %{
10277 10269    predicate(UseSSE<=1);
10278 10270    match(Set dst (AddD dst (LoadD src)));
10279 10271    ins_cost(150);
10280 10272  
10281 10273    format %{ "FLD    $src\n\t"
10282 10274              "DADDp  $dst,ST" %}
10283 10275    opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
10284 10276    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10285 10277                OpcP, RegOpc(dst) );
10286 10278    ins_pipe( fpu_reg_mem );
10287 10279  %}
10288 10280  
10289 10281  // add-to-memory
10290 10282  instruct addD_mem_reg(memory dst, regD src) %{
10291 10283    predicate(UseSSE<=1);
10292 10284    match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
10293 10285    ins_cost(150);
10294 10286  
10295 10287    format %{ "FLD_D  $dst\n\t"

↓ open down ↓

3021 lines elided

↑ open up ↑

10296 10288              "DADD   ST,$src\n\t"
10297 10289              "FST_D  $dst" %}
10298 10290    opcode(0xDD, 0x0);
10299 10291    ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
10300 10292                Opcode(0xD8), RegOpc(src),
10301 10293                set_instruction_start,
10302 10294                Opcode(0xDD), RMopc_Mem(0x03,dst) );
10303 10295    ins_pipe( fpu_reg_mem );
10304 10296  %}
10305 10297  
10306      -instruct addD_reg_imm1(regD dst, immD1 src) %{
     10298 +instruct addD_reg_imm1(regD dst, immD1 con) %{
10307 10299    predicate(UseSSE<=1);
10308      -  match(Set dst (AddD dst src));
     10300 +  match(Set dst (AddD dst con));
10309 10301    ins_cost(125);
10310 10302    format %{ "FLD1\n\t"
10311 10303              "DADDp  $dst,ST" %}
10312      -  opcode(0xDE, 0x00);
10313      -  ins_encode( LdImmD(src),
10314      -              OpcP, RegOpc(dst) );
10315      -  ins_pipe( fpu_reg );
     10304 +  ins_encode %{
     10305 +    __ fld1();
     10306 +    __ faddp($dst$$reg);
     10307 +  %}
     10308 +  ins_pipe(fpu_reg);
10316 10309  %}
10317 10310  
10318      -instruct addD_reg_imm(regD dst, immD src) %{
     10311 +instruct addD_reg_imm(regD dst, immD con) %{
10319 10312    predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10320      -  match(Set dst (AddD dst src));
     10313 +  match(Set dst (AddD dst con));
10321 10314    ins_cost(200);
10322      -  format %{ "FLD_D  [$src]\n\t"
     10315 +  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10323 10316              "DADDp  $dst,ST" %}
10324      -  opcode(0xDE, 0x00);       /* DE /0 */
10325      -  ins_encode( LdImmD(src),
10326      -              OpcP, RegOpc(dst));
10327      -  ins_pipe( fpu_reg_mem );
     10317 +  ins_encode %{
     10318 +    __ fld_d($constantaddress($con));
     10319 +    __ faddp($dst$$reg);
     10320 +  %}
     10321 +  ins_pipe(fpu_reg_mem);
10328 10322  %}
10329 10323  
10330 10324  instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
10331 10325    predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
10332 10326    match(Set dst (RoundDouble (AddD src con)));
10333 10327    ins_cost(200);
10334      -  format %{ "FLD_D  [$con]\n\t"
     10328 +  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10335 10329              "DADD   ST,$src\n\t"
10336 10330              "FSTP_D $dst\t# D-round" %}
10337      -  opcode(0xD8, 0x00);       /* D8 /0 */
10338      -  ins_encode( LdImmD(con),
10339      -              OpcP, RegOpc(src), Pop_Mem_D(dst));
10340      -  ins_pipe( fpu_mem_reg_con );
     10331 +  ins_encode %{
     10332 +    __ fld_d($constantaddress($con));
     10333 +    __ fadd($src$$reg);
     10334 +    __ fstp_d(Address(rsp, $dst$$disp));
     10335 +  %}
     10336 +  ins_pipe(fpu_mem_reg_con);
10341 10337  %}
10342 10338  
10343 10339  // Add two double precision floating point values in xmm
10344 10340  instruct addXD_reg(regXD dst, regXD src) %{
10345 10341    predicate(UseSSE>=2);
10346 10342    match(Set dst (AddD dst src));
10347 10343    format %{ "ADDSD  $dst,$src" %}
10348 10344    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10349 10345    ins_pipe( pipe_slow );
10350 10346  %}
10351 10347  
10352 10348  instruct addXD_imm(regXD dst, immXD con) %{
10353 10349    predicate(UseSSE>=2);
10354 10350    match(Set dst (AddD dst con));
10355      -  format %{ "ADDSD  $dst,[$con]" %}
10356      -  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), LdImmXD(dst, con) );
10357      -  ins_pipe( pipe_slow );
     10351 +  format %{ "ADDSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
     10352 +  ins_encode %{
     10353 +    __ addsd($dst$$XMMRegister, $constantaddress($con));
     10354 +  %}
     10355 +  ins_pipe(pipe_slow);
10358 10356  %}
10359 10357  
10360 10358  instruct addXD_mem(regXD dst, memory mem) %{
10361 10359    predicate(UseSSE>=2);
10362 10360    match(Set dst (AddD dst (LoadD mem)));
10363 10361    format %{ "ADDSD  $dst,$mem" %}
10364 10362    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
10365 10363    ins_pipe( pipe_slow );
10366 10364  %}
10367 10365

10368 10366  // Sub two double precision floating point values in xmm
10369 10367  instruct subXD_reg(regXD dst, regXD src) %{

↓ open down ↓

2 lines elided

↑ open up ↑

10370 10368    predicate(UseSSE>=2);
10371 10369    match(Set dst (SubD dst src));
10372 10370    format %{ "SUBSD  $dst,$src" %}
10373 10371    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10374 10372    ins_pipe( pipe_slow );
10375 10373  %}
10376 10374  
10377 10375  instruct subXD_imm(regXD dst, immXD con) %{
10378 10376    predicate(UseSSE>=2);
10379 10377    match(Set dst (SubD dst con));
10380      -  format %{ "SUBSD  $dst,[$con]" %}
10381      -  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), LdImmXD(dst, con) );
10382      -  ins_pipe( pipe_slow );
     10378 +  format %{ "SUBSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
     10379 +  ins_encode %{
     10380 +    __ subsd($dst$$XMMRegister, $constantaddress($con));
     10381 +  %}
     10382 +  ins_pipe(pipe_slow);
10383 10383  %}
10384 10384  
10385 10385  instruct subXD_mem(regXD dst, memory mem) %{
10386 10386    predicate(UseSSE>=2);
10387 10387    match(Set dst (SubD dst (LoadD mem)));
10388 10388    format %{ "SUBSD  $dst,$mem" %}
10389 10389    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10390 10390    ins_pipe( pipe_slow );
10391 10391  %}
10392 10392

10393 10393  // Mul two double precision floating point values in xmm
10394 10394  instruct mulXD_reg(regXD dst, regXD src) %{

↓ open down ↓

2 lines elided

↑ open up ↑

10395 10395    predicate(UseSSE>=2);
10396 10396    match(Set dst (MulD dst src));
10397 10397    format %{ "MULSD  $dst,$src" %}
10398 10398    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10399 10399    ins_pipe( pipe_slow );
10400 10400  %}
10401 10401  
10402 10402  instruct mulXD_imm(regXD dst, immXD con) %{
10403 10403    predicate(UseSSE>=2);
10404 10404    match(Set dst (MulD dst con));
10405      -  format %{ "MULSD  $dst,[$con]" %}
10406      -  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), LdImmXD(dst, con) );
10407      -  ins_pipe( pipe_slow );
     10405 +  format %{ "MULSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
     10406 +  ins_encode %{
     10407 +    __ mulsd($dst$$XMMRegister, $constantaddress($con));
     10408 +  %}
     10409 +  ins_pipe(pipe_slow);
10408 10410  %}
10409 10411  
10410 10412  instruct mulXD_mem(regXD dst, memory mem) %{
10411 10413    predicate(UseSSE>=2);
10412 10414    match(Set dst (MulD dst (LoadD mem)));
10413 10415    format %{ "MULSD  $dst,$mem" %}
10414 10416    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10415 10417    ins_pipe( pipe_slow );
10416 10418  %}
10417 10419

10418 10420  // Div two double precision floating point values in xmm
10419 10421  instruct divXD_reg(regXD dst, regXD src) %{
10420 10422    predicate(UseSSE>=2);

↓ open down ↓

3 lines elided

↑ open up ↑

10421 10423    match(Set dst (DivD dst src));
10422 10424    format %{ "DIVSD  $dst,$src" %}
10423 10425    opcode(0xF2, 0x0F, 0x5E);
10424 10426    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10425 10427    ins_pipe( pipe_slow );
10426 10428  %}
10427 10429  
10428 10430  instruct divXD_imm(regXD dst, immXD con) %{
10429 10431    predicate(UseSSE>=2);
10430 10432    match(Set dst (DivD dst con));
10431      -  format %{ "DIVSD  $dst,[$con]" %}
10432      -  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), LdImmXD(dst, con));
10433      -  ins_pipe( pipe_slow );
     10433 +  format %{ "DIVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
     10434 +  ins_encode %{
     10435 +    __ divsd($dst$$XMMRegister, $constantaddress($con));
     10436 +  %}
     10437 +  ins_pipe(pipe_slow);
10434 10438  %}
10435 10439  
10436 10440  instruct divXD_mem(regXD dst, memory mem) %{
10437 10441    predicate(UseSSE>=2);
10438 10442    match(Set dst (DivD dst (LoadD mem)));
10439 10443    format %{ "DIVSD  $dst,$mem" %}
10440 10444    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10441 10445    ins_pipe( pipe_slow );
10442 10446  %}
10443 10447

10444 10448  
10445 10449  instruct mulD_reg(regD dst, regD src) %{
10446 10450    predicate(UseSSE<=1);
10447 10451    match(Set dst (MulD dst src));
10448 10452    format %{ "FLD    $src\n\t"
10449 10453              "DMULp  $dst,ST" %}
10450 10454    opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10451 10455    ins_cost(150);
10452 10456    ins_encode( Push_Reg_D(src),
10453 10457                OpcP, RegOpc(dst) );
10454 10458    ins_pipe( fpu_reg_reg );
10455 10459  %}
10456 10460  
10457 10461  // Strict FP instruction biases argument before multiply then
10458 10462  // biases result to avoid double rounding of subnormals.
10459 10463  //
10460 10464  // scale arg1 by multiplying arg1 by 2^(-15360)
10461 10465  // load arg2
10462 10466  // multiply scaled arg1 by arg2
10463 10467  // rescale product by 2^(15360)
10464 10468  //
10465 10469  instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
10466 10470    predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10467 10471    match(Set dst (MulD dst src));
10468 10472    ins_cost(1);   // Select this instruction for all strict FP double multiplies
10469 10473  
10470 10474    format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10471 10475              "DMULp  $dst,ST\n\t"
10472 10476              "FLD    $src\n\t"
10473 10477              "DMULp  $dst,ST\n\t"

↓ open down ↓

30 lines elided

↑ open up ↑

10474 10478              "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10475 10479              "DMULp  $dst,ST\n\t" %}
10476 10480    opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10477 10481    ins_encode( strictfp_bias1(dst),
10478 10482                Push_Reg_D(src),
10479 10483                OpcP, RegOpc(dst),
10480 10484                strictfp_bias2(dst) );
10481 10485    ins_pipe( fpu_reg_reg );
10482 10486  %}
10483 10487  
10484      -instruct mulD_reg_imm(regD dst, immD src) %{
     10488 +instruct mulD_reg_imm(regD dst, immD con) %{
10485 10489    predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10486      -  match(Set dst (MulD dst src));
     10490 +  match(Set dst (MulD dst con));
10487 10491    ins_cost(200);
10488      -  format %{ "FLD_D  [$src]\n\t"
     10492 +  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10489 10493              "DMULp  $dst,ST" %}
10490      -  opcode(0xDE, 0x1); /* DE /1 */
10491      -  ins_encode( LdImmD(src),
10492      -              OpcP, RegOpc(dst) );
10493      -  ins_pipe( fpu_reg_mem );
     10494 +  ins_encode %{
     10495 +    __ fld_d($constantaddress($con));
     10496 +    __ fmulp($dst$$reg);
     10497 +  %}
     10498 +  ins_pipe(fpu_reg_mem);
10494 10499  %}
10495 10500  
10496 10501  
10497 10502  instruct mulD_reg_mem(regD dst, memory src) %{
10498 10503    predicate( UseSSE<=1 );
10499 10504    match(Set dst (MulD dst (LoadD src)));
10500 10505    ins_cost(200);
10501 10506    format %{ "FLD_D  $src\n\t"
10502 10507              "DMULp  $dst,ST" %}
10503 10508    opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */

10504 10509    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10505 10510                OpcP, RegOpc(dst) );
10506 10511    ins_pipe( fpu_reg_mem );
10507 10512  %}
10508 10513  
10509 10514  //
10510 10515  // Cisc-alternate to reg-reg multiply
10511 10516  instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
10512 10517    predicate( UseSSE<=1 );
10513 10518    match(Set dst (MulD src (LoadD mem)));
10514 10519    ins_cost(250);
10515 10520    format %{ "FLD_D  $mem\n\t"
10516 10521              "DMUL   ST,$src\n\t"
10517 10522              "FSTP_D $dst" %}
10518 10523    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
10519 10524    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10520 10525                OpcReg_F(src),
10521 10526                Pop_Reg_D(dst) );
10522 10527    ins_pipe( fpu_reg_reg_mem );
10523 10528  %}
10524 10529  
10525 10530  
10526 10531  // MACRO3 -- addD a mulD
10527 10532  // This instruction is a '2-address' instruction in that the result goes
10528 10533  // back to src2.  This eliminates a move from the macro; possibly the
10529 10534  // register allocator will have to add it back (and maybe not).
10530 10535  instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
10531 10536    predicate( UseSSE<=1 );
10532 10537    match(Set src2 (AddD (MulD src0 src1) src2));
10533 10538    format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10534 10539              "DMUL   ST,$src1\n\t"
10535 10540              "DADDp  $src2,ST" %}
10536 10541    ins_cost(250);
10537 10542    opcode(0xDD); /* LoadD DD /0 */
10538 10543    ins_encode( Push_Reg_F(src0),
10539 10544                FMul_ST_reg(src1),
10540 10545                FAddP_reg_ST(src2) );
10541 10546    ins_pipe( fpu_reg_reg_reg );
10542 10547  %}
10543 10548  
10544 10549  
10545 10550  // MACRO3 -- subD a mulD
10546 10551  instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
10547 10552    predicate( UseSSE<=1 );
10548 10553    match(Set src2 (SubD (MulD src0 src1) src2));
10549 10554    format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10550 10555              "DMUL   ST,$src1\n\t"
10551 10556              "DSUBRp $src2,ST" %}
10552 10557    ins_cost(250);
10553 10558    ins_encode( Push_Reg_F(src0),
10554 10559                FMul_ST_reg(src1),
10555 10560                Opcode(0xDE), Opc_plus(0xE0,src2));
10556 10561    ins_pipe( fpu_reg_reg_reg );
10557 10562  %}
10558 10563  
10559 10564  
10560 10565  instruct divD_reg(regD dst, regD src) %{
10561 10566    predicate( UseSSE<=1 );
10562 10567    match(Set dst (DivD dst src));
10563 10568  
10564 10569    format %{ "FLD    $src\n\t"
10565 10570              "FDIVp  $dst,ST" %}
10566 10571    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10567 10572    ins_cost(150);
10568 10573    ins_encode( Push_Reg_D(src),
10569 10574                OpcP, RegOpc(dst) );
10570 10575    ins_pipe( fpu_reg_reg );
10571 10576  %}
10572 10577  
10573 10578  // Strict FP instruction biases argument before division then
10574 10579  // biases result, to avoid double rounding of subnormals.
10575 10580  //
10576 10581  // scale dividend by multiplying dividend by 2^(-15360)
10577 10582  // load divisor
10578 10583  // divide scaled dividend by divisor
10579 10584  // rescale quotient by 2^(15360)
10580 10585  //
10581 10586  instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
10582 10587    predicate (UseSSE<=1);
10583 10588    match(Set dst (DivD dst src));
10584 10589    predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10585 10590    ins_cost(01);
10586 10591  
10587 10592    format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10588 10593              "DMULp  $dst,ST\n\t"
10589 10594              "FLD    $src\n\t"
10590 10595              "FDIVp  $dst,ST\n\t"
10591 10596              "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10592 10597              "DMULp  $dst,ST\n\t" %}
10593 10598    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10594 10599    ins_encode( strictfp_bias1(dst),
10595 10600                Push_Reg_D(src),
10596 10601                OpcP, RegOpc(dst),
10597 10602                strictfp_bias2(dst) );
10598 10603    ins_pipe( fpu_reg_reg );
10599 10604  %}
10600 10605  
10601 10606  instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10602 10607    predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10603 10608    match(Set dst (RoundDouble (DivD src1 src2)));
10604 10609  
10605 10610    format %{ "FLD    $src1\n\t"
10606 10611              "FDIV   ST,$src2\n\t"
10607 10612              "FSTP_D $dst\t# D-round" %}
10608 10613    opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10609 10614    ins_encode( Push_Reg_D(src1),
10610 10615                OpcP, RegOpc(src2), Pop_Mem_D(dst) );
10611 10616    ins_pipe( fpu_mem_reg_reg );
10612 10617  %}
10613 10618  
10614 10619  
10615 10620  instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
10616 10621    predicate(UseSSE<=1);
10617 10622    match(Set dst (ModD dst src));
10618 10623    effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
10619 10624  
10620 10625    format %{ "DMOD   $dst,$src" %}
10621 10626    ins_cost(250);
10622 10627    ins_encode(Push_Reg_Mod_D(dst, src),
10623 10628                emitModD(),
10624 10629                Push_Result_Mod_D(src),
10625 10630                Pop_Reg_D(dst));
10626 10631    ins_pipe( pipe_slow );
10627 10632  %}
10628 10633  
10629 10634  instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
10630 10635    predicate(UseSSE>=2);
10631 10636    match(Set dst (ModD src0 src1));
10632 10637    effect(KILL rax, KILL cr);
10633 10638  
10634 10639    format %{ "SUB    ESP,8\t # DMOD\n"
10635 10640            "\tMOVSD  [ESP+0],$src1\n"
10636 10641            "\tFLD_D  [ESP+0]\n"
10637 10642            "\tMOVSD  [ESP+0],$src0\n"
10638 10643            "\tFLD_D  [ESP+0]\n"
10639 10644       "loop:\tFPREM\n"
10640 10645            "\tFWAIT\n"
10641 10646            "\tFNSTSW AX\n"
10642 10647            "\tSAHF\n"
10643 10648            "\tJP     loop\n"
10644 10649            "\tFSTP_D [ESP+0]\n"
10645 10650            "\tMOVSD  $dst,[ESP+0]\n"
10646 10651            "\tADD    ESP,8\n"
10647 10652            "\tFSTP   ST0\t # Restore FPU Stack"
10648 10653      %}
10649 10654    ins_cost(250);
10650 10655    ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
10651 10656    ins_pipe( pipe_slow );
10652 10657  %}
10653 10658  
10654 10659  instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
10655 10660    predicate (UseSSE<=1);
10656 10661    match(Set dst (SinD src));
10657 10662    ins_cost(1800);
10658 10663    format %{ "DSIN   $dst" %}
10659 10664    opcode(0xD9, 0xFE);
10660 10665    ins_encode( OpcP, OpcS );
10661 10666    ins_pipe( pipe_slow );
10662 10667  %}
10663 10668  
10664 10669  instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
10665 10670    predicate (UseSSE>=2);
10666 10671    match(Set dst (SinD dst));
10667 10672    effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10668 10673    ins_cost(1800);
10669 10674    format %{ "DSIN   $dst" %}
10670 10675    opcode(0xD9, 0xFE);
10671 10676    ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10672 10677    ins_pipe( pipe_slow );
10673 10678  %}
10674 10679  
10675 10680  instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
10676 10681    predicate (UseSSE<=1);
10677 10682    match(Set dst (CosD src));
10678 10683    ins_cost(1800);
10679 10684    format %{ "DCOS   $dst" %}
10680 10685    opcode(0xD9, 0xFF);
10681 10686    ins_encode( OpcP, OpcS );
10682 10687    ins_pipe( pipe_slow );
10683 10688  %}
10684 10689  
10685 10690  instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
10686 10691    predicate (UseSSE>=2);
10687 10692    match(Set dst (CosD dst));
10688 10693    effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10689 10694    ins_cost(1800);
10690 10695    format %{ "DCOS   $dst" %}
10691 10696    opcode(0xD9, 0xFF);
10692 10697    ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10693 10698    ins_pipe( pipe_slow );
10694 10699  %}
10695 10700  
10696 10701  instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
10697 10702    predicate (UseSSE<=1);
10698 10703    match(Set dst(TanD src));
10699 10704    format %{ "DTAN   $dst" %}
10700 10705    ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
10701 10706                Opcode(0xDD), Opcode(0xD8));   // fstp st
10702 10707    ins_pipe( pipe_slow );
10703 10708  %}
10704 10709  
10705 10710  instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
10706 10711    predicate (UseSSE>=2);
10707 10712    match(Set dst(TanD dst));
10708 10713    effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10709 10714    format %{ "DTAN   $dst" %}
10710 10715    ins_encode( Push_SrcXD(dst),
10711 10716                Opcode(0xD9), Opcode(0xF2),    // fptan
10712 10717                Opcode(0xDD), Opcode(0xD8),   // fstp st
10713 10718                Push_ResultXD(dst) );
10714 10719    ins_pipe( pipe_slow );
10715 10720  %}
10716 10721  
10717 10722  instruct atanD_reg(regD dst, regD src) %{
10718 10723    predicate (UseSSE<=1);
10719 10724    match(Set dst(AtanD dst src));
10720 10725    format %{ "DATA   $dst,$src" %}
10721 10726    opcode(0xD9, 0xF3);
10722 10727    ins_encode( Push_Reg_D(src),
10723 10728                OpcP, OpcS, RegOpc(dst) );
10724 10729    ins_pipe( pipe_slow );
10725 10730  %}
10726 10731  
10727 10732  instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10728 10733    predicate (UseSSE>=2);
10729 10734    match(Set dst(AtanD dst src));
10730 10735    effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10731 10736    format %{ "DATA   $dst,$src" %}
10732 10737    opcode(0xD9, 0xF3);
10733 10738    ins_encode( Push_SrcXD(src),
10734 10739                OpcP, OpcS, Push_ResultXD(dst) );
10735 10740    ins_pipe( pipe_slow );
10736 10741  %}
10737 10742  
10738 10743  instruct sqrtD_reg(regD dst, regD src) %{
10739 10744    predicate (UseSSE<=1);
10740 10745    match(Set dst (SqrtD src));
10741 10746    format %{ "DSQRT  $dst,$src" %}
10742 10747    opcode(0xFA, 0xD9);
10743 10748    ins_encode( Push_Reg_D(src),
10744 10749                OpcS, OpcP, Pop_Reg_D(dst) );
10745 10750    ins_pipe( pipe_slow );
10746 10751  %}
10747 10752  
10748 10753  instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10749 10754    predicate (UseSSE<=1);
10750 10755    match(Set Y (PowD X Y));  // Raise X to the Yth power
10751 10756    effect(KILL rax, KILL rbx, KILL rcx);
10752 10757    format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10753 10758              "FLD_D  $X\n\t"
10754 10759              "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10755 10760  
10756 10761              "FDUP   \t\t\t# Q Q\n\t"
10757 10762              "FRNDINT\t\t\t# int(Q) Q\n\t"
10758 10763              "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10759 10764              "FISTP  dword [ESP]\n\t"
10760 10765              "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10761 10766              "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10762 10767              "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10763 10768              "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10764 10769              "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10765 10770              "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10766 10771              "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10767 10772              "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10768 10773              "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10769 10774              "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10770 10775              "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10771 10776              "MOV    [ESP+0],0\n\t"
10772 10777              "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10773 10778  
10774 10779              "ADD    ESP,8"
10775 10780               %}
10776 10781    ins_encode( push_stack_temp_qword,
10777 10782                Push_Reg_D(X),
10778 10783                Opcode(0xD9), Opcode(0xF1),   // fyl2x
10779 10784                pow_exp_core_encoding,
10780 10785                pop_stack_temp_qword);
10781 10786    ins_pipe( pipe_slow );
10782 10787  %}
10783 10788  
10784 10789  instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10785 10790    predicate (UseSSE>=2);
10786 10791    match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
10787 10792    effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10788 10793    format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10789 10794              "MOVSD  [ESP],$src1\n\t"
10790 10795              "FLD    FPR1,$src1\n\t"
10791 10796              "MOVSD  [ESP],$src0\n\t"
10792 10797              "FLD    FPR1,$src0\n\t"
10793 10798              "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10794 10799  
10795 10800              "FDUP   \t\t\t# Q Q\n\t"
10796 10801              "FRNDINT\t\t\t# int(Q) Q\n\t"
10797 10802              "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10798 10803              "FISTP  dword [ESP]\n\t"
10799 10804              "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10800 10805              "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10801 10806              "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10802 10807              "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10803 10808              "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10804 10809              "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10805 10810              "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10806 10811              "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10807 10812              "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10808 10813              "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10809 10814              "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10810 10815              "MOV    [ESP+0],0\n\t"
10811 10816              "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10812 10817  
10813 10818              "FST_D  [ESP]\n\t"
10814 10819              "MOVSD  $dst,[ESP]\n\t"
10815 10820              "ADD    ESP,8"
10816 10821               %}
10817 10822    ins_encode( push_stack_temp_qword,
10818 10823                push_xmm_to_fpr1(src1),
10819 10824                push_xmm_to_fpr1(src0),
10820 10825                Opcode(0xD9), Opcode(0xF1),   // fyl2x
10821 10826                pow_exp_core_encoding,
10822 10827                Push_ResultXD(dst) );
10823 10828    ins_pipe( pipe_slow );
10824 10829  %}
10825 10830  
10826 10831  
10827 10832  instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10828 10833    predicate (UseSSE<=1);
10829 10834    match(Set dpr1 (ExpD dpr1));
10830 10835    effect(KILL rax, KILL rbx, KILL rcx);
10831 10836    format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding"
10832 10837              "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10833 10838              "FMULP  \t\t\t# Q=X*log2(e)\n\t"
10834 10839  
10835 10840              "FDUP   \t\t\t# Q Q\n\t"
10836 10841              "FRNDINT\t\t\t# int(Q) Q\n\t"
10837 10842              "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10838 10843              "FISTP  dword [ESP]\n\t"
10839 10844              "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10840 10845              "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10841 10846              "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10842 10847              "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10843 10848              "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10844 10849              "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10845 10850              "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10846 10851              "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10847 10852              "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10848 10853              "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10849 10854              "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10850 10855              "MOV    [ESP+0],0\n\t"
10851 10856              "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10852 10857  
10853 10858              "ADD    ESP,8"
10854 10859               %}
10855 10860    ins_encode( push_stack_temp_qword,
10856 10861                Opcode(0xD9), Opcode(0xEA),   // fldl2e
10857 10862                Opcode(0xDE), Opcode(0xC9),   // fmulp
10858 10863                pow_exp_core_encoding,
10859 10864                pop_stack_temp_qword);
10860 10865    ins_pipe( pipe_slow );
10861 10866  %}
10862 10867  
10863 10868  instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10864 10869    predicate (UseSSE>=2);
10865 10870    match(Set dst (ExpD src));
10866 10871    effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10867 10872    format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding\n\t"
10868 10873              "MOVSD  [ESP],$src\n\t"
10869 10874              "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10870 10875              "FMULP  \t\t\t# Q=X*log2(e) X\n\t"
10871 10876  
10872 10877              "FDUP   \t\t\t# Q Q\n\t"
10873 10878              "FRNDINT\t\t\t# int(Q) Q\n\t"
10874 10879              "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10875 10880              "FISTP  dword [ESP]\n\t"
10876 10881              "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10877 10882              "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10878 10883              "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10879 10884              "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10880 10885              "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10881 10886              "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10882 10887              "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10883 10888              "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10884 10889              "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10885 10890              "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10886 10891              "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10887 10892              "MOV    [ESP+0],0\n\t"
10888 10893              "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10889 10894  
10890 10895              "FST_D  [ESP]\n\t"
10891 10896              "MOVSD  $dst,[ESP]\n\t"
10892 10897              "ADD    ESP,8"
10893 10898               %}
10894 10899    ins_encode( Push_SrcXD(src),
10895 10900                Opcode(0xD9), Opcode(0xEA),   // fldl2e
10896 10901                Opcode(0xDE), Opcode(0xC9),   // fmulp
10897 10902                pow_exp_core_encoding,
10898 10903                Push_ResultXD(dst) );
10899 10904    ins_pipe( pipe_slow );
10900 10905  %}
10901 10906  
10902 10907  
10903 10908  
10904 10909  instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
10905 10910    predicate (UseSSE<=1);
10906 10911    // The source Double operand on FPU stack
10907 10912    match(Set dst (Log10D src));
10908 10913    // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10909 10914    // fxch         ; swap ST(0) with ST(1)
10910 10915    // fyl2x        ; compute log_10(2) * log_2(x)
10911 10916    format %{ "FLDLG2 \t\t\t#Log10\n\t"
10912 10917              "FXCH   \n\t"
10913 10918              "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10914 10919           %}
10915 10920    ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10916 10921                Opcode(0xD9), Opcode(0xC9),   // fxch
10917 10922                Opcode(0xD9), Opcode(0xF1));  // fyl2x
10918 10923  
10919 10924    ins_pipe( pipe_slow );
10920 10925  %}
10921 10926  
10922 10927  instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10923 10928    predicate (UseSSE>=2);
10924 10929    effect(KILL cr);
10925 10930    match(Set dst (Log10D src));
10926 10931    // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10927 10932    // fyl2x        ; compute log_10(2) * log_2(x)
10928 10933    format %{ "FLDLG2 \t\t\t#Log10\n\t"
10929 10934              "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10930 10935           %}
10931 10936    ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10932 10937                Push_SrcXD(src),
10933 10938                Opcode(0xD9), Opcode(0xF1),   // fyl2x
10934 10939                Push_ResultXD(dst));
10935 10940  
10936 10941    ins_pipe( pipe_slow );
10937 10942  %}
10938 10943  
10939 10944  instruct logD_reg(regDPR1 dst, regDPR1 src) %{
10940 10945    predicate (UseSSE<=1);
10941 10946    // The source Double operand on FPU stack
10942 10947    match(Set dst (LogD src));
10943 10948    // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10944 10949    // fxch         ; swap ST(0) with ST(1)
10945 10950    // fyl2x        ; compute log_e(2) * log_2(x)
10946 10951    format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10947 10952              "FXCH   \n\t"
10948 10953              "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10949 10954           %}
10950 10955    ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10951 10956                Opcode(0xD9), Opcode(0xC9),   // fxch
10952 10957                Opcode(0xD9), Opcode(0xF1));  // fyl2x
10953 10958  
10954 10959    ins_pipe( pipe_slow );
10955 10960  %}
10956 10961  
10957 10962  instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10958 10963    predicate (UseSSE>=2);
10959 10964    effect(KILL cr);
10960 10965    // The source and result Double operands in XMM registers
10961 10966    match(Set dst (LogD src));
10962 10967    // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10963 10968    // fyl2x        ; compute log_e(2) * log_2(x)
10964 10969    format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10965 10970              "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10966 10971           %}
10967 10972    ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10968 10973                Push_SrcXD(src),
10969 10974                Opcode(0xD9), Opcode(0xF1),   // fyl2x
10970 10975                Push_ResultXD(dst));
10971 10976    ins_pipe( pipe_slow );
10972 10977  %}
10973 10978  
10974 10979  //-------------Float Instructions-------------------------------
10975 10980  // Float Math
10976 10981  
10977 10982  // Code for float compare:
10978 10983  //     fcompp();
10979 10984  //     fwait(); fnstsw_ax();
10980 10985  //     sahf();
10981 10986  //     movl(dst, unordered_result);
10982 10987  //     jcc(Assembler::parity, exit);
10983 10988  //     movl(dst, less_result);
10984 10989  //     jcc(Assembler::below, exit);
10985 10990  //     movl(dst, equal_result);
10986 10991  //     jcc(Assembler::equal, exit);
10987 10992  //     movl(dst, greater_result);
10988 10993  //   exit:
10989 10994  
10990 10995  // P6 version of float compare, sets condition codes in EFLAGS
10991 10996  instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10992 10997    predicate(VM_Version::supports_cmov() && UseSSE == 0);
10993 10998    match(Set cr (CmpF src1 src2));
10994 10999    effect(KILL rax);
10995 11000    ins_cost(150);
10996 11001    format %{ "FLD    $src1\n\t"
10997 11002              "FUCOMIP ST,$src2  // P6 instruction\n\t"
10998 11003              "JNP    exit\n\t"
10999 11004              "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
11000 11005              "SAHF\n"
11001 11006       "exit:\tNOP               // avoid branch to branch" %}
11002 11007    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11003 11008    ins_encode( Push_Reg_D(src1),
11004 11009                OpcP, RegOpc(src2),
11005 11010                cmpF_P6_fixup );
11006 11011    ins_pipe( pipe_slow );
11007 11012  %}
11008 11013  
11009 11014  instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
11010 11015    predicate(VM_Version::supports_cmov() && UseSSE == 0);
11011 11016    match(Set cr (CmpF src1 src2));
11012 11017    ins_cost(100);
11013 11018    format %{ "FLD    $src1\n\t"
11014 11019              "FUCOMIP ST,$src2  // P6 instruction" %}
11015 11020    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11016 11021    ins_encode( Push_Reg_D(src1),
11017 11022                OpcP, RegOpc(src2));
11018 11023    ins_pipe( pipe_slow );
11019 11024  %}
11020 11025  
11021 11026  
11022 11027  // Compare & branch
11023 11028  instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
11024 11029    predicate(UseSSE == 0);
11025 11030    match(Set cr (CmpF src1 src2));
11026 11031    effect(KILL rax);
11027 11032    ins_cost(200);
11028 11033    format %{ "FLD    $src1\n\t"
11029 11034              "FCOMp  $src2\n\t"
11030 11035              "FNSTSW AX\n\t"
11031 11036              "TEST   AX,0x400\n\t"
11032 11037              "JZ,s   flags\n\t"
11033 11038              "MOV    AH,1\t# unordered treat as LT\n"
11034 11039      "flags:\tSAHF" %}
11035 11040    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11036 11041    ins_encode( Push_Reg_D(src1),
11037 11042                OpcP, RegOpc(src2),
11038 11043                fpu_flags);
11039 11044    ins_pipe( pipe_slow );
11040 11045  %}
11041 11046  
11042 11047  // Compare vs zero into -1,0,1
11043 11048  instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
11044 11049    predicate(UseSSE == 0);
11045 11050    match(Set dst (CmpF3 src1 zero));
11046 11051    effect(KILL cr, KILL rax);
11047 11052    ins_cost(280);
11048 11053    format %{ "FTSTF  $dst,$src1" %}
11049 11054    opcode(0xE4, 0xD9);
11050 11055    ins_encode( Push_Reg_D(src1),
11051 11056                OpcS, OpcP, PopFPU,
11052 11057                CmpF_Result(dst));
11053 11058    ins_pipe( pipe_slow );
11054 11059  %}
11055 11060  
11056 11061  // Compare into -1,0,1
11057 11062  instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11058 11063    predicate(UseSSE == 0);
11059 11064    match(Set dst (CmpF3 src1 src2));
11060 11065    effect(KILL cr, KILL rax);
11061 11066    ins_cost(300);
11062 11067    format %{ "FCMPF  $dst,$src1,$src2" %}
11063 11068    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11064 11069    ins_encode( Push_Reg_D(src1),
11065 11070                OpcP, RegOpc(src2),
11066 11071                CmpF_Result(dst));
11067 11072    ins_pipe( pipe_slow );
11068 11073  %}
11069 11074  
11070 11075  // float compare and set condition codes in EFLAGS by XMM regs
11071 11076  instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
11072 11077    predicate(UseSSE>=1);
11073 11078    match(Set cr (CmpF dst src));
11074 11079    effect(KILL rax);
11075 11080    ins_cost(145);
11076 11081    format %{ "COMISS $dst,$src\n"
11077 11082            "\tJNP    exit\n"
11078 11083            "\tMOV    ah,1       // saw a NaN, set CF\n"
11079 11084            "\tSAHF\n"
11080 11085       "exit:\tNOP               // avoid branch to branch" %}
11081 11086    opcode(0x0F, 0x2F);
11082 11087    ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
11083 11088    ins_pipe( pipe_slow );
11084 11089  %}
11085 11090  
11086 11091  instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
11087 11092    predicate(UseSSE>=1);
11088 11093    match(Set cr (CmpF dst src));
11089 11094    ins_cost(100);
11090 11095    format %{ "COMISS $dst,$src" %}
11091 11096    opcode(0x0F, 0x2F);
11092 11097    ins_encode(OpcP, OpcS, RegReg(dst, src));
11093 11098    ins_pipe( pipe_slow );
11094 11099  %}
11095 11100  
11096 11101  // float compare and set condition codes in EFLAGS by XMM regs
11097 11102  instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
11098 11103    predicate(UseSSE>=1);
11099 11104    match(Set cr (CmpF dst (LoadF src)));
11100 11105    effect(KILL rax);
11101 11106    ins_cost(165);
11102 11107    format %{ "COMISS $dst,$src\n"
11103 11108            "\tJNP    exit\n"
11104 11109            "\tMOV    ah,1       // saw a NaN, set CF\n"
11105 11110            "\tSAHF\n"
11106 11111       "exit:\tNOP               // avoid branch to branch" %}
11107 11112    opcode(0x0F, 0x2F);
11108 11113    ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
11109 11114    ins_pipe( pipe_slow );
11110 11115  %}
11111 11116  
11112 11117  instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
11113 11118    predicate(UseSSE>=1);
11114 11119    match(Set cr (CmpF dst (LoadF src)));
11115 11120    ins_cost(100);
11116 11121    format %{ "COMISS $dst,$src" %}
11117 11122    opcode(0x0F, 0x2F);
11118 11123    ins_encode(OpcP, OpcS, RegMem(dst, src));
11119 11124    ins_pipe( pipe_slow );
11120 11125  %}
11121 11126  
11122 11127  // Compare into -1,0,1 in XMM
11123 11128  instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
11124 11129    predicate(UseSSE>=1);
11125 11130    match(Set dst (CmpF3 src1 src2));
11126 11131    effect(KILL cr);
11127 11132    ins_cost(255);
11128 11133    format %{ "XOR    $dst,$dst\n"
11129 11134            "\tCOMISS $src1,$src2\n"
11130 11135            "\tJP,s   nan\n"
11131 11136            "\tJEQ,s  exit\n"
11132 11137            "\tJA,s   inc\n"
11133 11138        "nan:\tDEC    $dst\n"
11134 11139            "\tJMP,s  exit\n"
11135 11140        "inc:\tINC    $dst\n"
11136 11141        "exit:"
11137 11142                  %}
11138 11143    opcode(0x0F, 0x2F);
11139 11144    ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
11140 11145    ins_pipe( pipe_slow );
11141 11146  %}
11142 11147  
11143 11148  // Compare into -1,0,1 in XMM and memory
11144 11149  instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
11145 11150    predicate(UseSSE>=1);
11146 11151    match(Set dst (CmpF3 src1 (LoadF mem)));
11147 11152    effect(KILL cr);
11148 11153    ins_cost(275);
11149 11154    format %{ "COMISS $src1,$mem\n"
11150 11155            "\tMOV    $dst,0\t\t# do not blow flags\n"
11151 11156            "\tJP,s   nan\n"
11152 11157            "\tJEQ,s  exit\n"
11153 11158            "\tJA,s   inc\n"
11154 11159        "nan:\tDEC    $dst\n"
11155 11160            "\tJMP,s  exit\n"
11156 11161        "inc:\tINC    $dst\n"
11157 11162        "exit:"
11158 11163                  %}
11159 11164    opcode(0x0F, 0x2F);
11160 11165    ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
11161 11166    ins_pipe( pipe_slow );
11162 11167  %}
11163 11168  
11164 11169  // Spill to obtain 24-bit precision
11165 11170  instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
11166 11171    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11167 11172    match(Set dst (SubF src1 src2));
11168 11173  
11169 11174    format %{ "FSUB   $dst,$src1 - $src2" %}
11170 11175    opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
11171 11176    ins_encode( Push_Reg_F(src1),
11172 11177                OpcReg_F(src2),
11173 11178                Pop_Mem_F(dst) );
11174 11179    ins_pipe( fpu_mem_reg_reg );
11175 11180  %}
11176 11181  //
11177 11182  // This instruction does not round to 24-bits
11178 11183  instruct subF_reg(regF dst, regF src) %{
11179 11184    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11180 11185    match(Set dst (SubF dst src));
11181 11186  
11182 11187    format %{ "FSUB   $dst,$src" %}
11183 11188    opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
11184 11189    ins_encode( Push_Reg_F(src),
11185 11190                OpcP, RegOpc(dst) );
11186 11191    ins_pipe( fpu_reg_reg );
11187 11192  %}
11188 11193  
11189 11194  // Spill to obtain 24-bit precision
11190 11195  instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
11191 11196    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11192 11197    match(Set dst (AddF src1 src2));
11193 11198  
11194 11199    format %{ "FADD   $dst,$src1,$src2" %}
11195 11200    opcode(0xD8, 0x0); /* D8 C0+i */
11196 11201    ins_encode( Push_Reg_F(src2),
11197 11202                OpcReg_F(src1),
11198 11203                Pop_Mem_F(dst) );
11199 11204    ins_pipe( fpu_mem_reg_reg );
11200 11205  %}
11201 11206  //
11202 11207  // This instruction does not round to 24-bits
11203 11208  instruct addF_reg(regF dst, regF src) %{
11204 11209    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11205 11210    match(Set dst (AddF dst src));
11206 11211  
11207 11212    format %{ "FLD    $src\n\t"
11208 11213              "FADDp  $dst,ST" %}
11209 11214    opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
11210 11215    ins_encode( Push_Reg_F(src),
11211 11216                OpcP, RegOpc(dst) );
11212 11217    ins_pipe( fpu_reg_reg );
11213 11218  %}
11214 11219  
11215 11220  // Add two single precision floating point values in xmm
11216 11221  instruct addX_reg(regX dst, regX src) %{

↓ open down ↓

713 lines elided

↑ open up ↑

11217 11222    predicate(UseSSE>=1);
11218 11223    match(Set dst (AddF dst src));
11219 11224    format %{ "ADDSS  $dst,$src" %}
11220 11225    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
11221 11226    ins_pipe( pipe_slow );
11222 11227  %}
11223 11228  
11224 11229  instruct addX_imm(regX dst, immXF con) %{
11225 11230    predicate(UseSSE>=1);
11226 11231    match(Set dst (AddF dst con));
11227      -  format %{ "ADDSS  $dst,[$con]" %}
11228      -  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), LdImmX(dst, con) );
11229      -  ins_pipe( pipe_slow );
     11232 +  format %{ "ADDSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
     11233 +  ins_encode %{
     11234 +    __ addss($dst$$XMMRegister, $constantaddress($con));
     11235 +  %}
     11236 +  ins_pipe(pipe_slow);
11230 11237  %}
11231 11238  
11232 11239  instruct addX_mem(regX dst, memory mem) %{
11233 11240    predicate(UseSSE>=1);
11234 11241    match(Set dst (AddF dst (LoadF mem)));
11235 11242    format %{ "ADDSS  $dst,$mem" %}
11236 11243    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
11237 11244    ins_pipe( pipe_slow );
11238 11245  %}
11239 11246

11240 11247  // Subtract two single precision floating point values in xmm
11241 11248  instruct subX_reg(regX dst, regX src) %{

↓ open down ↓

2 lines elided

↑ open up ↑

11242 11249    predicate(UseSSE>=1);
11243 11250    match(Set dst (SubF dst src));
11244 11251    format %{ "SUBSS  $dst,$src" %}
11245 11252    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
11246 11253    ins_pipe( pipe_slow );
11247 11254  %}
11248 11255  
11249 11256  instruct subX_imm(regX dst, immXF con) %{
11250 11257    predicate(UseSSE>=1);
11251 11258    match(Set dst (SubF dst con));
11252      -  format %{ "SUBSS  $dst,[$con]" %}
11253      -  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), LdImmX(dst, con) );
11254      -  ins_pipe( pipe_slow );
     11259 +  format %{ "SUBSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
     11260 +  ins_encode %{
     11261 +    __ subss($dst$$XMMRegister, $constantaddress($con));
     11262 +  %}
     11263 +  ins_pipe(pipe_slow);
11255 11264  %}
11256 11265  
11257 11266  instruct subX_mem(regX dst, memory mem) %{
11258 11267    predicate(UseSSE>=1);
11259 11268    match(Set dst (SubF dst (LoadF mem)));
11260 11269    format %{ "SUBSS  $dst,$mem" %}
11261 11270    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
11262 11271    ins_pipe( pipe_slow );
11263 11272  %}
11264 11273

11265 11274  // Multiply two single precision floating point values in xmm
11266 11275  instruct mulX_reg(regX dst, regX src) %{

↓ open down ↓

2 lines elided

↑ open up ↑

11267 11276    predicate(UseSSE>=1);
11268 11277    match(Set dst (MulF dst src));
11269 11278    format %{ "MULSS  $dst,$src" %}
11270 11279    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
11271 11280    ins_pipe( pipe_slow );
11272 11281  %}
11273 11282  
11274 11283  instruct mulX_imm(regX dst, immXF con) %{
11275 11284    predicate(UseSSE>=1);
11276 11285    match(Set dst (MulF dst con));
11277      -  format %{ "MULSS  $dst,[$con]" %}
11278      -  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), LdImmX(dst, con) );
11279      -  ins_pipe( pipe_slow );
     11286 +  format %{ "MULSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
     11287 +  ins_encode %{
     11288 +    __ mulss($dst$$XMMRegister, $constantaddress($con));
     11289 +  %}
     11290 +  ins_pipe(pipe_slow);
11280 11291  %}
11281 11292  
11282 11293  instruct mulX_mem(regX dst, memory mem) %{
11283 11294    predicate(UseSSE>=1);
11284 11295    match(Set dst (MulF dst (LoadF mem)));
11285 11296    format %{ "MULSS  $dst,$mem" %}
11286 11297    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
11287 11298    ins_pipe( pipe_slow );
11288 11299  %}
11289 11300

11290 11301  // Divide two single precision floating point values in xmm
11291 11302  instruct divX_reg(regX dst, regX src) %{

↓ open down ↓

2 lines elided

↑ open up ↑

11292 11303    predicate(UseSSE>=1);
11293 11304    match(Set dst (DivF dst src));
11294 11305    format %{ "DIVSS  $dst,$src" %}
11295 11306    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
11296 11307    ins_pipe( pipe_slow );
11297 11308  %}
11298 11309  
11299 11310  instruct divX_imm(regX dst, immXF con) %{
11300 11311    predicate(UseSSE>=1);
11301 11312    match(Set dst (DivF dst con));
11302      -  format %{ "DIVSS  $dst,[$con]" %}
11303      -  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), LdImmX(dst, con) );
11304      -  ins_pipe( pipe_slow );
     11313 +  format %{ "DIVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
     11314 +  ins_encode %{
     11315 +    __ divss($dst$$XMMRegister, $constantaddress($con));
     11316 +  %}
     11317 +  ins_pipe(pipe_slow);
11305 11318  %}
11306 11319  
11307 11320  instruct divX_mem(regX dst, memory mem) %{
11308 11321    predicate(UseSSE>=1);
11309 11322    match(Set dst (DivF dst (LoadF mem)));
11310 11323    format %{ "DIVSS  $dst,$mem" %}
11311 11324    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
11312 11325    ins_pipe( pipe_slow );
11313 11326  %}
11314 11327

11315 11328  // Get the square root of a single precision floating point values in xmm
11316 11329  instruct sqrtX_reg(regX dst, regX src) %{
11317 11330    predicate(UseSSE>=1);
11318 11331    match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11319 11332    format %{ "SQRTSS $dst,$src" %}
11320 11333    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11321 11334    ins_pipe( pipe_slow );
11322 11335  %}
11323 11336  
11324 11337  instruct sqrtX_mem(regX dst, memory mem) %{
11325 11338    predicate(UseSSE>=1);
11326 11339    match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
11327 11340    format %{ "SQRTSS $dst,$mem" %}
11328 11341    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11329 11342    ins_pipe( pipe_slow );
11330 11343  %}
11331 11344  
11332 11345  // Get the square root of a double precision floating point values in xmm
11333 11346  instruct sqrtXD_reg(regXD dst, regXD src) %{
11334 11347    predicate(UseSSE>=2);
11335 11348    match(Set dst (SqrtD src));
11336 11349    format %{ "SQRTSD $dst,$src" %}
11337 11350    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11338 11351    ins_pipe( pipe_slow );
11339 11352  %}
11340 11353  
11341 11354  instruct sqrtXD_mem(regXD dst, memory mem) %{
11342 11355    predicate(UseSSE>=2);
11343 11356    match(Set dst (SqrtD (LoadD mem)));
11344 11357    format %{ "SQRTSD $dst,$mem" %}
11345 11358    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11346 11359    ins_pipe( pipe_slow );
11347 11360  %}
11348 11361  
11349 11362  instruct absF_reg(regFPR1 dst, regFPR1 src) %{
11350 11363    predicate(UseSSE==0);
11351 11364    match(Set dst (AbsF src));
11352 11365    ins_cost(100);
11353 11366    format %{ "FABS" %}
11354 11367    opcode(0xE1, 0xD9);
11355 11368    ins_encode( OpcS, OpcP );
11356 11369    ins_pipe( fpu_reg_reg );
11357 11370  %}
11358 11371  
11359 11372  instruct absX_reg(regX dst ) %{
11360 11373    predicate(UseSSE>=1);
11361 11374    match(Set dst (AbsF dst));
11362 11375    format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11363 11376    ins_encode( AbsXF_encoding(dst));
11364 11377    ins_pipe( pipe_slow );
11365 11378  %}
11366 11379  
11367 11380  instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11368 11381    predicate(UseSSE==0);
11369 11382    match(Set dst (NegF src));
11370 11383    ins_cost(100);
11371 11384    format %{ "FCHS" %}
11372 11385    opcode(0xE0, 0xD9);
11373 11386    ins_encode( OpcS, OpcP );
11374 11387    ins_pipe( fpu_reg_reg );
11375 11388  %}
11376 11389  
11377 11390  instruct negX_reg( regX dst ) %{
11378 11391    predicate(UseSSE>=1);
11379 11392    match(Set dst (NegF dst));
11380 11393    format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
11381 11394    ins_encode( NegXF_encoding(dst));
11382 11395    ins_pipe( pipe_slow );
11383 11396  %}
11384 11397  
11385 11398  // Cisc-alternate to addF_reg
11386 11399  // Spill to obtain 24-bit precision
11387 11400  instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11388 11401    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11389 11402    match(Set dst (AddF src1 (LoadF src2)));
11390 11403  
11391 11404    format %{ "FLD    $src2\n\t"
11392 11405              "FADD   ST,$src1\n\t"
11393 11406              "FSTP_S $dst" %}
11394 11407    opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11395 11408    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11396 11409                OpcReg_F(src1),
11397 11410                Pop_Mem_F(dst) );
11398 11411    ins_pipe( fpu_mem_reg_mem );
11399 11412  %}
11400 11413  //
11401 11414  // Cisc-alternate to addF_reg
11402 11415  // This instruction does not round to 24-bits
11403 11416  instruct addF_reg_mem(regF dst, memory src) %{
11404 11417    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11405 11418    match(Set dst (AddF dst (LoadF src)));
11406 11419  
11407 11420    format %{ "FADD   $dst,$src" %}
11408 11421    opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
11409 11422    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
11410 11423                OpcP, RegOpc(dst) );
11411 11424    ins_pipe( fpu_reg_mem );
11412 11425  %}
11413 11426  
11414 11427  // // Following two instructions for _222_mpegaudio
11415 11428  // Spill to obtain 24-bit precision
11416 11429  instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
11417 11430    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11418 11431    match(Set dst (AddF src1 src2));
11419 11432  
11420 11433    format %{ "FADD   $dst,$src1,$src2" %}
11421 11434    opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11422 11435    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
11423 11436                OpcReg_F(src2),
11424 11437                Pop_Mem_F(dst) );
11425 11438    ins_pipe( fpu_mem_reg_mem );
11426 11439  %}
11427 11440  
11428 11441  // Cisc-spill variant
11429 11442  // Spill to obtain 24-bit precision
11430 11443  instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
11431 11444    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11432 11445    match(Set dst (AddF src1 (LoadF src2)));
11433 11446  
11434 11447    format %{ "FADD   $dst,$src1,$src2 cisc" %}
11435 11448    opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11436 11449    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11437 11450                set_instruction_start,
11438 11451                OpcP, RMopc_Mem(secondary,src1),
11439 11452                Pop_Mem_F(dst) );
11440 11453    ins_pipe( fpu_mem_mem_mem );
11441 11454  %}
11442 11455  
11443 11456  // Spill to obtain 24-bit precision
11444 11457  instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11445 11458    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11446 11459    match(Set dst (AddF src1 src2));
11447 11460  
11448 11461    format %{ "FADD   $dst,$src1,$src2" %}

↓ open down ↓

134 lines elided

↑ open up ↑

11449 11462    opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
11450 11463    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11451 11464                set_instruction_start,
11452 11465                OpcP, RMopc_Mem(secondary,src1),
11453 11466                Pop_Mem_F(dst) );
11454 11467    ins_pipe( fpu_mem_mem_mem );
11455 11468  %}
11456 11469  
11457 11470  
11458 11471  // Spill to obtain 24-bit precision
11459      -instruct addF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
     11472 +instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11460 11473    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11461      -  match(Set dst (AddF src1 src2));
11462      -  format %{ "FLD    $src1\n\t"
11463      -            "FADD   $src2\n\t"
     11474 +  match(Set dst (AddF src con));
     11475 +  format %{ "FLD    $src\n\t"
     11476 +            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11464 11477              "FSTP_S $dst"  %}
11465      -  opcode(0xD8, 0x00);       /* D8 /0 */
11466      -  ins_encode( Push_Reg_F(src1),
11467      -              Opc_MemImm_F(src2),
11468      -              Pop_Mem_F(dst));
11469      -  ins_pipe( fpu_mem_reg_con );
     11478 +  ins_encode %{
     11479 +    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
     11480 +    __ fadd_s($constantaddress($con));
     11481 +    __ fstp_s(Address(rsp, $dst$$disp));
     11482 +  %}
     11483 +  ins_pipe(fpu_mem_reg_con);
11470 11484  %}
11471 11485  //
11472 11486  // This instruction does not round to 24-bits
11473      -instruct addF_reg_imm(regF dst, regF src1, immF src2) %{
     11487 +instruct addF_reg_imm(regF dst, regF src, immF con) %{
11474 11488    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11475      -  match(Set dst (AddF src1 src2));
11476      -  format %{ "FLD    $src1\n\t"
11477      -            "FADD   $src2\n\t"
11478      -            "FSTP_S $dst"  %}
11479      -  opcode(0xD8, 0x00);       /* D8 /0 */
11480      -  ins_encode( Push_Reg_F(src1),
11481      -              Opc_MemImm_F(src2),
11482      -              Pop_Reg_F(dst));
11483      -  ins_pipe( fpu_reg_reg_con );
     11489 +  match(Set dst (AddF src con));
     11490 +  format %{ "FLD    $src\n\t"
     11491 +            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
     11492 +            "FSTP   $dst"  %}
     11493 +  ins_encode %{
     11494 +    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
     11495 +    __ fadd_s($constantaddress($con));
     11496 +    __ fstp_d($dst$$reg);
     11497 +  %}
     11498 +  ins_pipe(fpu_reg_reg_con);
11484 11499  %}
11485 11500  
11486 11501  // Spill to obtain 24-bit precision
11487 11502  instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
11488 11503    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11489 11504    match(Set dst (MulF src1 src2));
11490 11505  
11491 11506    format %{ "FLD    $src1\n\t"
11492 11507              "FMUL   $src2\n\t"
11493 11508              "FSTP_S $dst"  %}

11494 11509    opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
11495 11510    ins_encode( Push_Reg_F(src1),
11496 11511                OpcReg_F(src2),
11497 11512                Pop_Mem_F(dst) );
11498 11513    ins_pipe( fpu_mem_reg_reg );
11499 11514  %}
11500 11515  //
11501 11516  // This instruction does not round to 24-bits
11502 11517  instruct mulF_reg(regF dst, regF src1, regF src2) %{
11503 11518    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11504 11519    match(Set dst (MulF src1 src2));
11505 11520  
11506 11521    format %{ "FLD    $src1\n\t"
11507 11522              "FMUL   $src2\n\t"
11508 11523              "FSTP_S $dst"  %}
11509 11524    opcode(0xD8, 0x1); /* D8 C8+i */
11510 11525    ins_encode( Push_Reg_F(src2),
11511 11526                OpcReg_F(src1),
11512 11527                Pop_Reg_F(dst) );
11513 11528    ins_pipe( fpu_reg_reg_reg );
11514 11529  %}
11515 11530  
11516 11531  
11517 11532  // Spill to obtain 24-bit precision
11518 11533  // Cisc-alternate to reg-reg multiply
11519 11534  instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11520 11535    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11521 11536    match(Set dst (MulF src1 (LoadF src2)));
11522 11537  
11523 11538    format %{ "FLD_S  $src2\n\t"
11524 11539              "FMUL   $src1\n\t"
11525 11540              "FSTP_S $dst"  %}
11526 11541    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
11527 11542    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11528 11543                OpcReg_F(src1),
11529 11544                Pop_Mem_F(dst) );
11530 11545    ins_pipe( fpu_mem_reg_mem );
11531 11546  %}
11532 11547  //
11533 11548  // This instruction does not round to 24-bits
11534 11549  // Cisc-alternate to reg-reg multiply
11535 11550  instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
11536 11551    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11537 11552    match(Set dst (MulF src1 (LoadF src2)));
11538 11553  
11539 11554    format %{ "FMUL   $dst,$src1,$src2" %}
11540 11555    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
11541 11556    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11542 11557                OpcReg_F(src1),
11543 11558                Pop_Reg_F(dst) );
11544 11559    ins_pipe( fpu_reg_reg_mem );
11545 11560  %}
11546 11561  
11547 11562  // Spill to obtain 24-bit precision
11548 11563  instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11549 11564    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11550 11565    match(Set dst (MulF src1 src2));
11551 11566

↓ open down ↓

58 lines elided

↑ open up ↑

11552 11567    format %{ "FMUL   $dst,$src1,$src2" %}
11553 11568    opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
11554 11569    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11555 11570                set_instruction_start,
11556 11571                OpcP, RMopc_Mem(secondary,src1),
11557 11572                Pop_Mem_F(dst) );
11558 11573    ins_pipe( fpu_mem_mem_mem );
11559 11574  %}
11560 11575  
11561 11576  // Spill to obtain 24-bit precision
11562      -instruct mulF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
     11577 +instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11563 11578    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11564      -  match(Set dst (MulF src1 src2));
     11579 +  match(Set dst (MulF src con));
11565 11580  
11566      -  format %{ "FMULc $dst,$src1,$src2" %}
11567      -  opcode(0xD8, 0x1);  /* D8 /1*/
11568      -  ins_encode( Push_Reg_F(src1),
11569      -              Opc_MemImm_F(src2),
11570      -              Pop_Mem_F(dst));
11571      -  ins_pipe( fpu_mem_reg_con );
     11581 +  format %{ "FLD    $src\n\t"
     11582 +            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
     11583 +            "FSTP_S $dst"  %}
     11584 +  ins_encode %{
     11585 +    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
     11586 +    __ fmul_s($constantaddress($con));
     11587 +    __ fstp_s(Address(rsp, $dst$$disp));
     11588 +  %}
     11589 +  ins_pipe(fpu_mem_reg_con);
11572 11590  %}
11573 11591  //
11574 11592  // This instruction does not round to 24-bits
11575      -instruct mulF_reg_imm(regF dst, regF src1, immF src2) %{
     11593 +instruct mulF_reg_imm(regF dst, regF src, immF con) %{
11576 11594    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11577      -  match(Set dst (MulF src1 src2));
     11595 +  match(Set dst (MulF src con));
11578 11596  
11579      -  format %{ "FMULc $dst. $src1, $src2" %}
11580      -  opcode(0xD8, 0x1);  /* D8 /1*/
11581      -  ins_encode( Push_Reg_F(src1),
11582      -              Opc_MemImm_F(src2),
11583      -              Pop_Reg_F(dst));
11584      -  ins_pipe( fpu_reg_reg_con );
     11597 +  format %{ "FLD    $src\n\t"
     11598 +            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
     11599 +            "FSTP   $dst"  %}
     11600 +  ins_encode %{
     11601 +    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
     11602 +    __ fmul_s($constantaddress($con));
     11603 +    __ fstp_d($dst$$reg);
     11604 +  %}
     11605 +  ins_pipe(fpu_reg_reg_con);
11585 11606  %}
11586 11607  
11587 11608  
11588 11609  //
11589 11610  // MACRO1 -- subsume unshared load into mulF
11590 11611  // This instruction does not round to 24-bits
11591 11612  instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
11592 11613    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11593 11614    match(Set dst (MulF (LoadF mem1) src));
11594 11615

11595 11616    format %{ "FLD    $mem1    ===MACRO1===\n\t"
11596 11617              "FMUL   ST,$src\n\t"
11597 11618              "FSTP   $dst" %}
11598 11619    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
11599 11620    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
11600 11621                OpcReg_F(src),
11601 11622                Pop_Reg_F(dst) );
11602 11623    ins_pipe( fpu_reg_reg_mem );
11603 11624  %}
11604 11625  //
11605 11626  // MACRO2 -- addF a mulF which subsumed an unshared load
11606 11627  // This instruction does not round to 24-bits
11607 11628  instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
11608 11629    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11609 11630    match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
11610 11631    ins_cost(95);
11611 11632  
11612 11633    format %{ "FLD    $mem1     ===MACRO2===\n\t"
11613 11634              "FMUL   ST,$src1  subsume mulF left load\n\t"
11614 11635              "FADD   ST,$src2\n\t"
11615 11636              "FSTP   $dst" %}
11616 11637    opcode(0xD9); /* LoadF D9 /0 */
11617 11638    ins_encode( OpcP, RMopc_Mem(0x00,mem1),
11618 11639                FMul_ST_reg(src1),
11619 11640                FAdd_ST_reg(src2),
11620 11641                Pop_Reg_F(dst) );
11621 11642    ins_pipe( fpu_reg_mem_reg_reg );
11622 11643  %}
11623 11644  
11624 11645  // MACRO3 -- addF a mulF
11625 11646  // This instruction does not round to 24-bits.  It is a '2-address'
11626 11647  // instruction in that the result goes back to src2.  This eliminates
11627 11648  // a move from the macro; possibly the register allocator will have
11628 11649  // to add it back (and maybe not).
11629 11650  instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
11630 11651    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11631 11652    match(Set src2 (AddF (MulF src0 src1) src2));
11632 11653  
11633 11654    format %{ "FLD    $src0     ===MACRO3===\n\t"
11634 11655              "FMUL   ST,$src1\n\t"
11635 11656              "FADDP  $src2,ST" %}
11636 11657    opcode(0xD9); /* LoadF D9 /0 */
11637 11658    ins_encode( Push_Reg_F(src0),
11638 11659                FMul_ST_reg(src1),
11639 11660                FAddP_reg_ST(src2) );
11640 11661    ins_pipe( fpu_reg_reg_reg );
11641 11662  %}
11642 11663  
11643 11664  // MACRO4 -- divF subF
11644 11665  // This instruction does not round to 24-bits
11645 11666  instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
11646 11667    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11647 11668    match(Set dst (DivF (SubF src2 src1) src3));
11648 11669  
11649 11670    format %{ "FLD    $src2   ===MACRO4===\n\t"
11650 11671              "FSUB   ST,$src1\n\t"
11651 11672              "FDIV   ST,$src3\n\t"
11652 11673              "FSTP  $dst" %}
11653 11674    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11654 11675    ins_encode( Push_Reg_F(src2),
11655 11676                subF_divF_encode(src1,src3),
11656 11677                Pop_Reg_F(dst) );
11657 11678    ins_pipe( fpu_reg_reg_reg_reg );
11658 11679  %}
11659 11680  
11660 11681  // Spill to obtain 24-bit precision
11661 11682  instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
11662 11683    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11663 11684    match(Set dst (DivF src1 src2));
11664 11685  
11665 11686    format %{ "FDIV   $dst,$src1,$src2" %}
11666 11687    opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
11667 11688    ins_encode( Push_Reg_F(src1),
11668 11689                OpcReg_F(src2),
11669 11690                Pop_Mem_F(dst) );
11670 11691    ins_pipe( fpu_mem_reg_reg );
11671 11692  %}
11672 11693  //
11673 11694  // This instruction does not round to 24-bits
11674 11695  instruct divF_reg(regF dst, regF src) %{
11675 11696    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11676 11697    match(Set dst (DivF dst src));
11677 11698  
11678 11699    format %{ "FDIV   $dst,$src" %}
11679 11700    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11680 11701    ins_encode( Push_Reg_F(src),
11681 11702                OpcP, RegOpc(dst) );
11682 11703    ins_pipe( fpu_reg_reg );
11683 11704  %}
11684 11705  
11685 11706  
11686 11707  // Spill to obtain 24-bit precision
11687 11708  instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11688 11709    predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11689 11710    match(Set dst (ModF src1 src2));
11690 11711    effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11691 11712  
11692 11713    format %{ "FMOD   $dst,$src1,$src2" %}
11693 11714    ins_encode( Push_Reg_Mod_D(src1, src2),
11694 11715                emitModD(),
11695 11716                Push_Result_Mod_D(src2),
11696 11717                Pop_Mem_F(dst));
11697 11718    ins_pipe( pipe_slow );
11698 11719  %}
11699 11720  //
11700 11721  // This instruction does not round to 24-bits
11701 11722  instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
11702 11723    predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11703 11724    match(Set dst (ModF dst src));
11704 11725    effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11705 11726  
11706 11727    format %{ "FMOD   $dst,$src" %}
11707 11728    ins_encode(Push_Reg_Mod_D(dst, src),
11708 11729                emitModD(),
11709 11730                Push_Result_Mod_D(src),
11710 11731                Pop_Reg_F(dst));
11711 11732    ins_pipe( pipe_slow );
11712 11733  %}
11713 11734  
11714 11735  instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
11715 11736    predicate(UseSSE>=1);
11716 11737    match(Set dst (ModF src0 src1));
11717 11738    effect(KILL rax, KILL cr);
11718 11739    format %{ "SUB    ESP,4\t # FMOD\n"
11719 11740            "\tMOVSS  [ESP+0],$src1\n"
11720 11741            "\tFLD_S  [ESP+0]\n"
11721 11742            "\tMOVSS  [ESP+0],$src0\n"
11722 11743            "\tFLD_S  [ESP+0]\n"
11723 11744       "loop:\tFPREM\n"
11724 11745            "\tFWAIT\n"
11725 11746            "\tFNSTSW AX\n"
11726 11747            "\tSAHF\n"
11727 11748            "\tJP     loop\n"
11728 11749            "\tFSTP_S [ESP+0]\n"
11729 11750            "\tMOVSS  $dst,[ESP+0]\n"
11730 11751            "\tADD    ESP,4\n"
11731 11752            "\tFSTP   ST0\t # Restore FPU Stack"
11732 11753      %}
11733 11754    ins_cost(250);
11734 11755    ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
11735 11756    ins_pipe( pipe_slow );
11736 11757  %}
11737 11758  
11738 11759  
11739 11760  //----------Arithmetic Conversion Instructions---------------------------------
11740 11761  // The conversions operations are all Alpha sorted.  Please keep it that way!
11741 11762  
11742 11763  instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
11743 11764    predicate(UseSSE==0);
11744 11765    match(Set dst (RoundFloat src));
11745 11766    ins_cost(125);
11746 11767    format %{ "FST_S  $dst,$src\t# F-round" %}
11747 11768    ins_encode( Pop_Mem_Reg_F(dst, src) );
11748 11769    ins_pipe( fpu_mem_reg );
11749 11770  %}
11750 11771  
11751 11772  instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
11752 11773    predicate(UseSSE<=1);
11753 11774    match(Set dst (RoundDouble src));
11754 11775    ins_cost(125);
11755 11776    format %{ "FST_D  $dst,$src\t# D-round" %}
11756 11777    ins_encode( Pop_Mem_Reg_D(dst, src) );
11757 11778    ins_pipe( fpu_mem_reg );
11758 11779  %}
11759 11780  
11760 11781  // Force rounding to 24-bit precision and 6-bit exponent
11761 11782  instruct convD2F_reg(stackSlotF dst, regD src) %{
11762 11783    predicate(UseSSE==0);
11763 11784    match(Set dst (ConvD2F src));
11764 11785    format %{ "FST_S  $dst,$src\t# F-round" %}
11765 11786    expand %{
11766 11787      roundFloat_mem_reg(dst,src);
11767 11788    %}
11768 11789  %}
11769 11790  
11770 11791  // Force rounding to 24-bit precision and 6-bit exponent
11771 11792  instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11772 11793    predicate(UseSSE==1);
11773 11794    match(Set dst (ConvD2F src));
11774 11795    effect( KILL cr );
11775 11796    format %{ "SUB    ESP,4\n\t"
11776 11797              "FST_S  [ESP],$src\t# F-round\n\t"
11777 11798              "MOVSS  $dst,[ESP]\n\t"
11778 11799              "ADD ESP,4" %}
11779 11800    ins_encode( D2X_encoding(dst, src) );
11780 11801    ins_pipe( pipe_slow );
11781 11802  %}
11782 11803  
11783 11804  // Force rounding double precision to single precision
11784 11805  instruct convXD2X_reg(regX dst, regXD src) %{
11785 11806    predicate(UseSSE>=2);
11786 11807    match(Set dst (ConvD2F src));
11787 11808    format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11788 11809    opcode(0xF2, 0x0F, 0x5A);
11789 11810    ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11790 11811    ins_pipe( pipe_slow );
11791 11812  %}
11792 11813  
11793 11814  instruct convF2D_reg_reg(regD dst, regF src) %{
11794 11815    predicate(UseSSE==0);
11795 11816    match(Set dst (ConvF2D src));
11796 11817    format %{ "FST_S  $dst,$src\t# D-round" %}
11797 11818    ins_encode( Pop_Reg_Reg_D(dst, src));
11798 11819    ins_pipe( fpu_reg_reg );
11799 11820  %}
11800 11821  
11801 11822  instruct convF2D_reg(stackSlotD dst, regF src) %{
11802 11823    predicate(UseSSE==1);
11803 11824    match(Set dst (ConvF2D src));
11804 11825    format %{ "FST_D  $dst,$src\t# D-round" %}
11805 11826    expand %{
11806 11827      roundDouble_mem_reg(dst,src);
11807 11828    %}
11808 11829  %}
11809 11830  
11810 11831  instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11811 11832    predicate(UseSSE==1);
11812 11833    match(Set dst (ConvF2D src));
11813 11834    effect( KILL cr );
11814 11835    format %{ "SUB    ESP,4\n\t"
11815 11836              "MOVSS  [ESP] $src\n\t"
11816 11837              "FLD_S  [ESP]\n\t"
11817 11838              "ADD    ESP,4\n\t"
11818 11839              "FSTP   $dst\t# D-round" %}
11819 11840    ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
11820 11841    ins_pipe( pipe_slow );
11821 11842  %}
11822 11843  
11823 11844  instruct convX2XD_reg(regXD dst, regX src) %{
11824 11845    predicate(UseSSE>=2);
11825 11846    match(Set dst (ConvF2D src));
11826 11847    format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11827 11848    opcode(0xF3, 0x0F, 0x5A);
11828 11849    ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11829 11850    ins_pipe( pipe_slow );
11830 11851  %}
11831 11852  
11832 11853  // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11833 11854  instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11834 11855    predicate(UseSSE<=1);
11835 11856    match(Set dst (ConvD2I src));
11836 11857    effect( KILL tmp, KILL cr );
11837 11858    format %{ "FLD    $src\t# Convert double to int \n\t"
11838 11859              "FLDCW  trunc mode\n\t"
11839 11860              "SUB    ESP,4\n\t"
11840 11861              "FISTp  [ESP + #0]\n\t"
11841 11862              "FLDCW  std/24-bit mode\n\t"
11842 11863              "POP    EAX\n\t"
11843 11864              "CMP    EAX,0x80000000\n\t"
11844 11865              "JNE,s  fast\n\t"
11845 11866              "FLD_D  $src\n\t"
11846 11867              "CALL   d2i_wrapper\n"
11847 11868        "fast:" %}
11848 11869    ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11849 11870    ins_pipe( pipe_slow );
11850 11871  %}
11851 11872  
11852 11873  // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11853 11874  instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11854 11875    predicate(UseSSE>=2);
11855 11876    match(Set dst (ConvD2I src));
11856 11877    effect( KILL tmp, KILL cr );
11857 11878    format %{ "CVTTSD2SI $dst, $src\n\t"
11858 11879              "CMP    $dst,0x80000000\n\t"
11859 11880              "JNE,s  fast\n\t"
11860 11881              "SUB    ESP, 8\n\t"
11861 11882              "MOVSD  [ESP], $src\n\t"
11862 11883              "FLD_D  [ESP]\n\t"
11863 11884              "ADD    ESP, 8\n\t"
11864 11885              "CALL   d2i_wrapper\n"
11865 11886        "fast:" %}
11866 11887    opcode(0x1); // double-precision conversion
11867 11888    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11868 11889    ins_pipe( pipe_slow );
11869 11890  %}
11870 11891  
11871 11892  instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11872 11893    predicate(UseSSE<=1);
11873 11894    match(Set dst (ConvD2L src));
11874 11895    effect( KILL cr );
11875 11896    format %{ "FLD    $src\t# Convert double to long\n\t"
11876 11897              "FLDCW  trunc mode\n\t"
11877 11898              "SUB    ESP,8\n\t"
11878 11899              "FISTp  [ESP + #0]\n\t"
11879 11900              "FLDCW  std/24-bit mode\n\t"
11880 11901              "POP    EAX\n\t"
11881 11902              "POP    EDX\n\t"
11882 11903              "CMP    EDX,0x80000000\n\t"
11883 11904              "JNE,s  fast\n\t"
11884 11905              "TEST   EAX,EAX\n\t"
11885 11906              "JNE,s  fast\n\t"
11886 11907              "FLD    $src\n\t"
11887 11908              "CALL   d2l_wrapper\n"
11888 11909        "fast:" %}
11889 11910    ins_encode( Push_Reg_D(src),  D2L_encoding(src) );
11890 11911    ins_pipe( pipe_slow );
11891 11912  %}
11892 11913  
11893 11914  // XMM lacks a float/double->long conversion, so use the old FPU stack.
11894 11915  instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11895 11916    predicate (UseSSE>=2);
11896 11917    match(Set dst (ConvD2L src));
11897 11918    effect( KILL cr );
11898 11919    format %{ "SUB    ESP,8\t# Convert double to long\n\t"
11899 11920              "MOVSD  [ESP],$src\n\t"
11900 11921              "FLD_D  [ESP]\n\t"
11901 11922              "FLDCW  trunc mode\n\t"
11902 11923              "FISTp  [ESP + #0]\n\t"
11903 11924              "FLDCW  std/24-bit mode\n\t"
11904 11925              "POP    EAX\n\t"
11905 11926              "POP    EDX\n\t"
11906 11927              "CMP    EDX,0x80000000\n\t"
11907 11928              "JNE,s  fast\n\t"
11908 11929              "TEST   EAX,EAX\n\t"
11909 11930              "JNE,s  fast\n\t"
11910 11931              "SUB    ESP,8\n\t"
11911 11932              "MOVSD  [ESP],$src\n\t"
11912 11933              "FLD_D  [ESP]\n\t"
11913 11934              "CALL   d2l_wrapper\n"
11914 11935        "fast:" %}
11915 11936    ins_encode( XD2L_encoding(src) );
11916 11937    ins_pipe( pipe_slow );
11917 11938  %}
11918 11939  
11919 11940  // Convert a double to an int.  Java semantics require we do complex
11920 11941  // manglations in the corner cases.  So we set the rounding mode to
11921 11942  // 'zero', store the darned double down as an int, and reset the
11922 11943  // rounding mode to 'nearest'.  The hardware stores a flag value down
11923 11944  // if we would overflow or converted a NAN; we check for this and
11924 11945  // and go the slow path if needed.
11925 11946  instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11926 11947    predicate(UseSSE==0);
11927 11948    match(Set dst (ConvF2I src));
11928 11949    effect( KILL tmp, KILL cr );
11929 11950    format %{ "FLD    $src\t# Convert float to int \n\t"
11930 11951              "FLDCW  trunc mode\n\t"
11931 11952              "SUB    ESP,4\n\t"
11932 11953              "FISTp  [ESP + #0]\n\t"
11933 11954              "FLDCW  std/24-bit mode\n\t"
11934 11955              "POP    EAX\n\t"
11935 11956              "CMP    EAX,0x80000000\n\t"
11936 11957              "JNE,s  fast\n\t"
11937 11958              "FLD    $src\n\t"
11938 11959              "CALL   d2i_wrapper\n"
11939 11960        "fast:" %}
11940 11961    // D2I_encoding works for F2I
11941 11962    ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11942 11963    ins_pipe( pipe_slow );
11943 11964  %}
11944 11965  
11945 11966  // Convert a float in xmm to an int reg.
11946 11967  instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11947 11968    predicate(UseSSE>=1);
11948 11969    match(Set dst (ConvF2I src));
11949 11970    effect( KILL tmp, KILL cr );
11950 11971    format %{ "CVTTSS2SI $dst, $src\n\t"
11951 11972              "CMP    $dst,0x80000000\n\t"
11952 11973              "JNE,s  fast\n\t"
11953 11974              "SUB    ESP, 4\n\t"
11954 11975              "MOVSS  [ESP], $src\n\t"
11955 11976              "FLD    [ESP]\n\t"
11956 11977              "ADD    ESP, 4\n\t"
11957 11978              "CALL   d2i_wrapper\n"
11958 11979        "fast:" %}
11959 11980    opcode(0x0); // single-precision conversion
11960 11981    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11961 11982    ins_pipe( pipe_slow );
11962 11983  %}
11963 11984  
11964 11985  instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11965 11986    predicate(UseSSE==0);
11966 11987    match(Set dst (ConvF2L src));
11967 11988    effect( KILL cr );
11968 11989    format %{ "FLD    $src\t# Convert float to long\n\t"
11969 11990              "FLDCW  trunc mode\n\t"
11970 11991              "SUB    ESP,8\n\t"
11971 11992              "FISTp  [ESP + #0]\n\t"
11972 11993              "FLDCW  std/24-bit mode\n\t"
11973 11994              "POP    EAX\n\t"
11974 11995              "POP    EDX\n\t"
11975 11996              "CMP    EDX,0x80000000\n\t"
11976 11997              "JNE,s  fast\n\t"
11977 11998              "TEST   EAX,EAX\n\t"
11978 11999              "JNE,s  fast\n\t"
11979 12000              "FLD    $src\n\t"
11980 12001              "CALL   d2l_wrapper\n"
11981 12002        "fast:" %}
11982 12003    // D2L_encoding works for F2L
11983 12004    ins_encode( Push_Reg_F(src), D2L_encoding(src) );
11984 12005    ins_pipe( pipe_slow );
11985 12006  %}
11986 12007  
11987 12008  // XMM lacks a float/double->long conversion, so use the old FPU stack.
11988 12009  instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
11989 12010    predicate (UseSSE>=1);
11990 12011    match(Set dst (ConvF2L src));
11991 12012    effect( KILL cr );
11992 12013    format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11993 12014              "MOVSS  [ESP],$src\n\t"
11994 12015              "FLD_S  [ESP]\n\t"
11995 12016              "FLDCW  trunc mode\n\t"
11996 12017              "FISTp  [ESP + #0]\n\t"
11997 12018              "FLDCW  std/24-bit mode\n\t"
11998 12019              "POP    EAX\n\t"
11999 12020              "POP    EDX\n\t"
12000 12021              "CMP    EDX,0x80000000\n\t"
12001 12022              "JNE,s  fast\n\t"
12002 12023              "TEST   EAX,EAX\n\t"
12003 12024              "JNE,s  fast\n\t"
12004 12025              "SUB    ESP,4\t# Convert float to long\n\t"
12005 12026              "MOVSS  [ESP],$src\n\t"
12006 12027              "FLD_S  [ESP]\n\t"
12007 12028              "ADD    ESP,4\n\t"
12008 12029              "CALL   d2l_wrapper\n"
12009 12030        "fast:" %}
12010 12031    ins_encode( X2L_encoding(src) );
12011 12032    ins_pipe( pipe_slow );
12012 12033  %}
12013 12034  
12014 12035  instruct convI2D_reg(regD dst, stackSlotI src) %{
12015 12036    predicate( UseSSE<=1 );
12016 12037    match(Set dst (ConvI2D src));
12017 12038    format %{ "FILD   $src\n\t"
12018 12039              "FSTP   $dst" %}
12019 12040    opcode(0xDB, 0x0);  /* DB /0 */
12020 12041    ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
12021 12042    ins_pipe( fpu_reg_mem );
12022 12043  %}
12023 12044  
12024 12045  instruct convI2XD_reg(regXD dst, eRegI src) %{
12025 12046    predicate( UseSSE>=2 && !UseXmmI2D );
12026 12047    match(Set dst (ConvI2D src));
12027 12048    format %{ "CVTSI2SD $dst,$src" %}
12028 12049    opcode(0xF2, 0x0F, 0x2A);
12029 12050    ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12030 12051    ins_pipe( pipe_slow );
12031 12052  %}
12032 12053  
12033 12054  instruct convI2XD_mem(regXD dst, memory mem) %{
12034 12055    predicate( UseSSE>=2 );
12035 12056    match(Set dst (ConvI2D (LoadI mem)));
12036 12057    format %{ "CVTSI2SD $dst,$mem" %}
12037 12058    opcode(0xF2, 0x0F, 0x2A);
12038 12059    ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
12039 12060    ins_pipe( pipe_slow );
12040 12061  %}
12041 12062  
12042 12063  instruct convXI2XD_reg(regXD dst, eRegI src)
12043 12064  %{
12044 12065    predicate( UseSSE>=2 && UseXmmI2D );
12045 12066    match(Set dst (ConvI2D src));
12046 12067  
12047 12068    format %{ "MOVD  $dst,$src\n\t"
12048 12069              "CVTDQ2PD $dst,$dst\t# i2d" %}
12049 12070    ins_encode %{
12050 12071      __ movdl($dst$$XMMRegister, $src$$Register);
12051 12072      __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
12052 12073    %}
12053 12074    ins_pipe(pipe_slow); // XXX
12054 12075  %}
12055 12076  
12056 12077  instruct convI2D_mem(regD dst, memory mem) %{
12057 12078    predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
12058 12079    match(Set dst (ConvI2D (LoadI mem)));
12059 12080    format %{ "FILD   $mem\n\t"
12060 12081              "FSTP   $dst" %}
12061 12082    opcode(0xDB);      /* DB /0 */
12062 12083    ins_encode( OpcP, RMopc_Mem(0x00,mem),
12063 12084                Pop_Reg_D(dst));
12064 12085    ins_pipe( fpu_reg_mem );
12065 12086  %}
12066 12087  
12067 12088  // Convert a byte to a float; no rounding step needed.
12068 12089  instruct conv24I2F_reg(regF dst, stackSlotI src) %{
12069 12090    predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
12070 12091    match(Set dst (ConvI2F src));
12071 12092    format %{ "FILD   $src\n\t"
12072 12093              "FSTP   $dst" %}
12073 12094  
12074 12095    opcode(0xDB, 0x0);  /* DB /0 */
12075 12096    ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
12076 12097    ins_pipe( fpu_reg_mem );
12077 12098  %}
12078 12099  
12079 12100  // In 24-bit mode, force exponent rounding by storing back out
12080 12101  instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
12081 12102    predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12082 12103    match(Set dst (ConvI2F src));
12083 12104    ins_cost(200);
12084 12105    format %{ "FILD   $src\n\t"
12085 12106              "FSTP_S $dst" %}
12086 12107    opcode(0xDB, 0x0);  /* DB /0 */
12087 12108    ins_encode( Push_Mem_I(src),
12088 12109                Pop_Mem_F(dst));
12089 12110    ins_pipe( fpu_mem_mem );
12090 12111  %}
12091 12112  
12092 12113  // In 24-bit mode, force exponent rounding by storing back out
12093 12114  instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
12094 12115    predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12095 12116    match(Set dst (ConvI2F (LoadI mem)));
12096 12117    ins_cost(200);
12097 12118    format %{ "FILD   $mem\n\t"
12098 12119              "FSTP_S $dst" %}
12099 12120    opcode(0xDB);  /* DB /0 */
12100 12121    ins_encode( OpcP, RMopc_Mem(0x00,mem),
12101 12122                Pop_Mem_F(dst));
12102 12123    ins_pipe( fpu_mem_mem );
12103 12124  %}
12104 12125  
12105 12126  // This instruction does not round to 24-bits
12106 12127  instruct convI2F_reg(regF dst, stackSlotI src) %{
12107 12128    predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12108 12129    match(Set dst (ConvI2F src));
12109 12130    format %{ "FILD   $src\n\t"
12110 12131              "FSTP   $dst" %}
12111 12132    opcode(0xDB, 0x0);  /* DB /0 */
12112 12133    ins_encode( Push_Mem_I(src),
12113 12134                Pop_Reg_F(dst));
12114 12135    ins_pipe( fpu_reg_mem );
12115 12136  %}
12116 12137  
12117 12138  // This instruction does not round to 24-bits
12118 12139  instruct convI2F_mem(regF dst, memory mem) %{
12119 12140    predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12120 12141    match(Set dst (ConvI2F (LoadI mem)));
12121 12142    format %{ "FILD   $mem\n\t"
12122 12143              "FSTP   $dst" %}
12123 12144    opcode(0xDB);      /* DB /0 */
12124 12145    ins_encode( OpcP, RMopc_Mem(0x00,mem),
12125 12146                Pop_Reg_F(dst));
12126 12147    ins_pipe( fpu_reg_mem );
12127 12148  %}
12128 12149  
12129 12150  // Convert an int to a float in xmm; no rounding step needed.
12130 12151  instruct convI2X_reg(regX dst, eRegI src) %{
12131 12152    predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
12132 12153    match(Set dst (ConvI2F src));
12133 12154    format %{ "CVTSI2SS $dst, $src" %}
12134 12155  
12135 12156    opcode(0xF3, 0x0F, 0x2A);  /* F3 0F 2A /r */
12136 12157    ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12137 12158    ins_pipe( pipe_slow );
12138 12159  %}
12139 12160  
12140 12161   instruct convXI2X_reg(regX dst, eRegI src)
12141 12162  %{
12142 12163    predicate( UseSSE>=2 && UseXmmI2F );
12143 12164    match(Set dst (ConvI2F src));
12144 12165  
12145 12166    format %{ "MOVD  $dst,$src\n\t"
12146 12167              "CVTDQ2PS $dst,$dst\t# i2f" %}
12147 12168    ins_encode %{
12148 12169      __ movdl($dst$$XMMRegister, $src$$Register);
12149 12170      __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
12150 12171    %}
12151 12172    ins_pipe(pipe_slow); // XXX
12152 12173  %}
12153 12174  
12154 12175  instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
12155 12176    match(Set dst (ConvI2L src));
12156 12177    effect(KILL cr);
12157 12178    ins_cost(375);
12158 12179    format %{ "MOV    $dst.lo,$src\n\t"
12159 12180              "MOV    $dst.hi,$src\n\t"
12160 12181              "SAR    $dst.hi,31" %}
12161 12182    ins_encode(convert_int_long(dst,src));
12162 12183    ins_pipe( ialu_reg_reg_long );
12163 12184  %}
12164 12185  
12165 12186  // Zero-extend convert int to long
12166 12187  instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
12167 12188    match(Set dst (AndL (ConvI2L src) mask) );
12168 12189    effect( KILL flags );
12169 12190    ins_cost(250);
12170 12191    format %{ "MOV    $dst.lo,$src\n\t"
12171 12192              "XOR    $dst.hi,$dst.hi" %}
12172 12193    opcode(0x33); // XOR
12173 12194    ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12174 12195    ins_pipe( ialu_reg_reg_long );
12175 12196  %}
12176 12197  
12177 12198  // Zero-extend long
12178 12199  instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
12179 12200    match(Set dst (AndL src mask) );
12180 12201    effect( KILL flags );
12181 12202    ins_cost(250);
12182 12203    format %{ "MOV    $dst.lo,$src.lo\n\t"
12183 12204              "XOR    $dst.hi,$dst.hi\n\t" %}
12184 12205    opcode(0x33); // XOR
12185 12206    ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12186 12207    ins_pipe( ialu_reg_reg_long );
12187 12208  %}
12188 12209  
12189 12210  instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
12190 12211    predicate (UseSSE<=1);
12191 12212    match(Set dst (ConvL2D src));
12192 12213    effect( KILL cr );
12193 12214    format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
12194 12215              "PUSH   $src.lo\n\t"
12195 12216              "FILD   ST,[ESP + #0]\n\t"
12196 12217              "ADD    ESP,8\n\t"
12197 12218              "FSTP_D $dst\t# D-round" %}
12198 12219    opcode(0xDF, 0x5);  /* DF /5 */
12199 12220    ins_encode(convert_long_double(src), Pop_Mem_D(dst));
12200 12221    ins_pipe( pipe_slow );
12201 12222  %}
12202 12223  
12203 12224  instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
12204 12225    predicate (UseSSE>=2);
12205 12226    match(Set dst (ConvL2D src));
12206 12227    effect( KILL cr );
12207 12228    format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
12208 12229              "PUSH   $src.lo\n\t"
12209 12230              "FILD_D [ESP]\n\t"
12210 12231              "FSTP_D [ESP]\n\t"
12211 12232              "MOVSD  $dst,[ESP]\n\t"
12212 12233              "ADD    ESP,8" %}
12213 12234    opcode(0xDF, 0x5);  /* DF /5 */
12214 12235    ins_encode(convert_long_double2(src), Push_ResultXD(dst));
12215 12236    ins_pipe( pipe_slow );
12216 12237  %}
12217 12238  
12218 12239  instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
12219 12240    predicate (UseSSE>=1);
12220 12241    match(Set dst (ConvL2F src));
12221 12242    effect( KILL cr );
12222 12243    format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
12223 12244              "PUSH   $src.lo\n\t"
12224 12245              "FILD_D [ESP]\n\t"
12225 12246              "FSTP_S [ESP]\n\t"
12226 12247              "MOVSS  $dst,[ESP]\n\t"
12227 12248              "ADD    ESP,8" %}
12228 12249    opcode(0xDF, 0x5);  /* DF /5 */
12229 12250    ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
12230 12251    ins_pipe( pipe_slow );
12231 12252  %}
12232 12253  
12233 12254  instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
12234 12255    match(Set dst (ConvL2F src));
12235 12256    effect( KILL cr );
12236 12257    format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
12237 12258              "PUSH   $src.lo\n\t"
12238 12259              "FILD   ST,[ESP + #0]\n\t"
12239 12260              "ADD    ESP,8\n\t"
12240 12261              "FSTP_S $dst\t# F-round" %}
12241 12262    opcode(0xDF, 0x5);  /* DF /5 */
12242 12263    ins_encode(convert_long_double(src), Pop_Mem_F(dst));
12243 12264    ins_pipe( pipe_slow );
12244 12265  %}
12245 12266  
12246 12267  instruct convL2I_reg( eRegI dst, eRegL src ) %{
12247 12268    match(Set dst (ConvL2I src));
12248 12269    effect( DEF dst, USE src );
12249 12270    format %{ "MOV    $dst,$src.lo" %}
12250 12271    ins_encode(enc_CopyL_Lo(dst,src));
12251 12272    ins_pipe( ialu_reg_reg );
12252 12273  %}
12253 12274  
12254 12275  
12255 12276  instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12256 12277    match(Set dst (MoveF2I src));
12257 12278    effect( DEF dst, USE src );
12258 12279    ins_cost(100);
12259 12280    format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
12260 12281    opcode(0x8B);
12261 12282    ins_encode( OpcP, RegMem(dst,src));
12262 12283    ins_pipe( ialu_reg_mem );
12263 12284  %}
12264 12285  
12265 12286  instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
12266 12287    predicate(UseSSE==0);
12267 12288    match(Set dst (MoveF2I src));
12268 12289    effect( DEF dst, USE src );
12269 12290  
12270 12291    ins_cost(125);
12271 12292    format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
12272 12293    ins_encode( Pop_Mem_Reg_F(dst, src) );
12273 12294    ins_pipe( fpu_mem_reg );
12274 12295  %}
12275 12296  
12276 12297  instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
12277 12298    predicate(UseSSE>=1);
12278 12299    match(Set dst (MoveF2I src));
12279 12300    effect( DEF dst, USE src );
12280 12301  
12281 12302    ins_cost(95);
12282 12303    format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
12283 12304    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
12284 12305    ins_pipe( pipe_slow );
12285 12306  %}
12286 12307  
12287 12308  instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
12288 12309    predicate(UseSSE>=2);
12289 12310    match(Set dst (MoveF2I src));
12290 12311    effect( DEF dst, USE src );
12291 12312    ins_cost(85);
12292 12313    format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
12293 12314    ins_encode( MovX2I_reg(dst, src));
12294 12315    ins_pipe( pipe_slow );
12295 12316  %}
12296 12317  
12297 12318  instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12298 12319    match(Set dst (MoveI2F src));
12299 12320    effect( DEF dst, USE src );
12300 12321  
12301 12322    ins_cost(100);
12302 12323    format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
12303 12324    opcode(0x89);
12304 12325    ins_encode( OpcPRegSS( dst, src ) );
12305 12326    ins_pipe( ialu_mem_reg );
12306 12327  %}
12307 12328  
12308 12329  
12309 12330  instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
12310 12331    predicate(UseSSE==0);
12311 12332    match(Set dst (MoveI2F src));
12312 12333    effect(DEF dst, USE src);
12313 12334  
12314 12335    ins_cost(125);
12315 12336    format %{ "FLD_S  $src\n\t"
12316 12337              "FSTP   $dst\t# MoveI2F_stack_reg" %}
12317 12338    opcode(0xD9);               /* D9 /0, FLD m32real */
12318 12339    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12319 12340                Pop_Reg_F(dst) );
12320 12341    ins_pipe( fpu_reg_mem );
12321 12342  %}
12322 12343  
12323 12344  instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
12324 12345    predicate(UseSSE>=1);
12325 12346    match(Set dst (MoveI2F src));
12326 12347    effect( DEF dst, USE src );
12327 12348  
12328 12349    ins_cost(95);
12329 12350    format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
12330 12351    ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12331 12352    ins_pipe( pipe_slow );
12332 12353  %}
12333 12354  
12334 12355  instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
12335 12356    predicate(UseSSE>=2);
12336 12357    match(Set dst (MoveI2F src));
12337 12358    effect( DEF dst, USE src );
12338 12359  
12339 12360    ins_cost(85);
12340 12361    format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
12341 12362    ins_encode( MovI2X_reg(dst, src) );
12342 12363    ins_pipe( pipe_slow );
12343 12364  %}
12344 12365  
12345 12366  instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12346 12367    match(Set dst (MoveD2L src));
12347 12368    effect(DEF dst, USE src);
12348 12369  
12349 12370    ins_cost(250);
12350 12371    format %{ "MOV    $dst.lo,$src\n\t"
12351 12372              "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12352 12373    opcode(0x8B, 0x8B);
12353 12374    ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12354 12375    ins_pipe( ialu_mem_long_reg );
12355 12376  %}
12356 12377  
12357 12378  instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12358 12379    predicate(UseSSE<=1);
12359 12380    match(Set dst (MoveD2L src));
12360 12381    effect(DEF dst, USE src);
12361 12382  
12362 12383    ins_cost(125);
12363 12384    format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
12364 12385    ins_encode( Pop_Mem_Reg_D(dst, src) );
12365 12386    ins_pipe( fpu_mem_reg );
12366 12387  %}
12367 12388  
12368 12389  instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12369 12390    predicate(UseSSE>=2);
12370 12391    match(Set dst (MoveD2L src));
12371 12392    effect(DEF dst, USE src);
12372 12393    ins_cost(95);
12373 12394  
12374 12395    format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
12375 12396    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
12376 12397    ins_pipe( pipe_slow );
12377 12398  %}
12378 12399  
12379 12400  instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12380 12401    predicate(UseSSE>=2);
12381 12402    match(Set dst (MoveD2L src));
12382 12403    effect(DEF dst, USE src, TEMP tmp);
12383 12404    ins_cost(85);
12384 12405    format %{ "MOVD   $dst.lo,$src\n\t"
12385 12406              "PSHUFLW $tmp,$src,0x4E\n\t"
12386 12407              "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12387 12408    ins_encode( MovXD2L_reg(dst, src, tmp) );
12388 12409    ins_pipe( pipe_slow );
12389 12410  %}
12390 12411  
12391 12412  instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12392 12413    match(Set dst (MoveL2D src));
12393 12414    effect(DEF dst, USE src);
12394 12415  
12395 12416    ins_cost(200);
12396 12417    format %{ "MOV    $dst,$src.lo\n\t"
12397 12418              "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12398 12419    opcode(0x89, 0x89);
12399 12420    ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12400 12421    ins_pipe( ialu_mem_long_reg );
12401 12422  %}
12402 12423  
12403 12424  
12404 12425  instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12405 12426    predicate(UseSSE<=1);
12406 12427    match(Set dst (MoveL2D src));
12407 12428    effect(DEF dst, USE src);
12408 12429    ins_cost(125);
12409 12430  
12410 12431    format %{ "FLD_D  $src\n\t"
12411 12432              "FSTP   $dst\t# MoveL2D_stack_reg" %}
12412 12433    opcode(0xDD);               /* DD /0, FLD m64real */
12413 12434    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12414 12435                Pop_Reg_D(dst) );
12415 12436    ins_pipe( fpu_reg_mem );
12416 12437  %}
12417 12438  
12418 12439  
12419 12440  instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12420 12441    predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12421 12442    match(Set dst (MoveL2D src));
12422 12443    effect(DEF dst, USE src);
12423 12444  
12424 12445    ins_cost(95);
12425 12446    format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
12426 12447    ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12427 12448    ins_pipe( pipe_slow );
12428 12449  %}
12429 12450  
12430 12451  instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12431 12452    predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12432 12453    match(Set dst (MoveL2D src));
12433 12454    effect(DEF dst, USE src);
12434 12455  
12435 12456    ins_cost(95);
12436 12457    format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12437 12458    ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
12438 12459    ins_pipe( pipe_slow );
12439 12460  %}
12440 12461  
12441 12462  instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12442 12463    predicate(UseSSE>=2);
12443 12464    match(Set dst (MoveL2D src));
12444 12465    effect(TEMP dst, USE src, TEMP tmp);
12445 12466    ins_cost(85);
12446 12467    format %{ "MOVD   $dst,$src.lo\n\t"
12447 12468              "MOVD   $tmp,$src.hi\n\t"
12448 12469              "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12449 12470    ins_encode( MovL2XD_reg(dst, src, tmp) );
12450 12471    ins_pipe( pipe_slow );
12451 12472  %}
12452 12473  
12453 12474  // Replicate scalar to packed byte (1 byte) values in xmm
12454 12475  instruct Repl8B_reg(regXD dst, regXD src) %{
12455 12476    predicate(UseSSE>=2);
12456 12477    match(Set dst (Replicate8B src));
12457 12478    format %{ "MOVDQA  $dst,$src\n\t"
12458 12479              "PUNPCKLBW $dst,$dst\n\t"
12459 12480              "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12460 12481    ins_encode( pshufd_8x8(dst, src));
12461 12482    ins_pipe( pipe_slow );
12462 12483  %}
12463 12484  
12464 12485  // Replicate scalar to packed byte (1 byte) values in xmm
12465 12486  instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12466 12487    predicate(UseSSE>=2);
12467 12488    match(Set dst (Replicate8B src));
12468 12489    format %{ "MOVD    $dst,$src\n\t"
12469 12490              "PUNPCKLBW $dst,$dst\n\t"
12470 12491              "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12471 12492    ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
12472 12493    ins_pipe( pipe_slow );
12473 12494  %}
12474 12495  
12475 12496  // Replicate scalar zero to packed byte (1 byte) values in xmm
12476 12497  instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12477 12498    predicate(UseSSE>=2);
12478 12499    match(Set dst (Replicate8B zero));
12479 12500    format %{ "PXOR  $dst,$dst\t! replicate8B" %}
12480 12501    ins_encode( pxor(dst, dst));
12481 12502    ins_pipe( fpu_reg_reg );
12482 12503  %}
12483 12504  
12484 12505  // Replicate scalar to packed shore (2 byte) values in xmm
12485 12506  instruct Repl4S_reg(regXD dst, regXD src) %{
12486 12507    predicate(UseSSE>=2);
12487 12508    match(Set dst (Replicate4S src));
12488 12509    format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12489 12510    ins_encode( pshufd_4x16(dst, src));
12490 12511    ins_pipe( fpu_reg_reg );
12491 12512  %}
12492 12513  
12493 12514  // Replicate scalar to packed shore (2 byte) values in xmm
12494 12515  instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12495 12516    predicate(UseSSE>=2);
12496 12517    match(Set dst (Replicate4S src));
12497 12518    format %{ "MOVD    $dst,$src\n\t"
12498 12519              "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12499 12520    ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12500 12521    ins_pipe( fpu_reg_reg );
12501 12522  %}
12502 12523  
12503 12524  // Replicate scalar zero to packed short (2 byte) values in xmm
12504 12525  instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12505 12526    predicate(UseSSE>=2);
12506 12527    match(Set dst (Replicate4S zero));
12507 12528    format %{ "PXOR  $dst,$dst\t! replicate4S" %}
12508 12529    ins_encode( pxor(dst, dst));
12509 12530    ins_pipe( fpu_reg_reg );
12510 12531  %}
12511 12532  
12512 12533  // Replicate scalar to packed char (2 byte) values in xmm
12513 12534  instruct Repl4C_reg(regXD dst, regXD src) %{
12514 12535    predicate(UseSSE>=2);
12515 12536    match(Set dst (Replicate4C src));
12516 12537    format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12517 12538    ins_encode( pshufd_4x16(dst, src));
12518 12539    ins_pipe( fpu_reg_reg );
12519 12540  %}
12520 12541  
12521 12542  // Replicate scalar to packed char (2 byte) values in xmm
12522 12543  instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12523 12544    predicate(UseSSE>=2);
12524 12545    match(Set dst (Replicate4C src));
12525 12546    format %{ "MOVD    $dst,$src\n\t"
12526 12547              "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12527 12548    ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12528 12549    ins_pipe( fpu_reg_reg );
12529 12550  %}
12530 12551  
12531 12552  // Replicate scalar zero to packed char (2 byte) values in xmm
12532 12553  instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12533 12554    predicate(UseSSE>=2);
12534 12555    match(Set dst (Replicate4C zero));
12535 12556    format %{ "PXOR  $dst,$dst\t! replicate4C" %}
12536 12557    ins_encode( pxor(dst, dst));
12537 12558    ins_pipe( fpu_reg_reg );
12538 12559  %}
12539 12560  
12540 12561  // Replicate scalar to packed integer (4 byte) values in xmm
12541 12562  instruct Repl2I_reg(regXD dst, regXD src) %{
12542 12563    predicate(UseSSE>=2);
12543 12564    match(Set dst (Replicate2I src));
12544 12565    format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12545 12566    ins_encode( pshufd(dst, src, 0x00));
12546 12567    ins_pipe( fpu_reg_reg );
12547 12568  %}
12548 12569  
12549 12570  // Replicate scalar to packed integer (4 byte) values in xmm
12550 12571  instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12551 12572    predicate(UseSSE>=2);
12552 12573    match(Set dst (Replicate2I src));
12553 12574    format %{ "MOVD   $dst,$src\n\t"
12554 12575              "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12555 12576    ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
12556 12577    ins_pipe( fpu_reg_reg );
12557 12578  %}
12558 12579  
12559 12580  // Replicate scalar zero to packed integer (2 byte) values in xmm
12560 12581  instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12561 12582    predicate(UseSSE>=2);
12562 12583    match(Set dst (Replicate2I zero));
12563 12584    format %{ "PXOR  $dst,$dst\t! replicate2I" %}
12564 12585    ins_encode( pxor(dst, dst));
12565 12586    ins_pipe( fpu_reg_reg );
12566 12587  %}
12567 12588  
12568 12589  // Replicate scalar to packed single precision floating point values in xmm
12569 12590  instruct Repl2F_reg(regXD dst, regXD src) %{
12570 12591    predicate(UseSSE>=2);
12571 12592    match(Set dst (Replicate2F src));
12572 12593    format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12573 12594    ins_encode( pshufd(dst, src, 0xe0));
12574 12595    ins_pipe( fpu_reg_reg );
12575 12596  %}
12576 12597  
12577 12598  // Replicate scalar to packed single precision floating point values in xmm
12578 12599  instruct Repl2F_regX(regXD dst, regX src) %{
12579 12600    predicate(UseSSE>=2);
12580 12601    match(Set dst (Replicate2F src));
12581 12602    format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12582 12603    ins_encode( pshufd(dst, src, 0xe0));
12583 12604    ins_pipe( fpu_reg_reg );
12584 12605  %}
12585 12606  
12586 12607  // Replicate scalar to packed single precision floating point values in xmm
12587 12608  instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12588 12609    predicate(UseSSE>=2);
12589 12610    match(Set dst (Replicate2F zero));
12590 12611    format %{ "PXOR  $dst,$dst\t! replicate2F" %}
12591 12612    ins_encode( pxor(dst, dst));
12592 12613    ins_pipe( fpu_reg_reg );
12593 12614  %}
12594 12615  
12595 12616  // =======================================================================
12596 12617  // fast clearing of an array
12597 12618  instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12598 12619    match(Set dummy (ClearArray cnt base));
12599 12620    effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12600 12621    format %{ "SHL    ECX,1\t# Convert doublewords to words\n\t"
12601 12622              "XOR    EAX,EAX\n\t"
12602 12623              "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12603 12624    opcode(0,0x4);
12604 12625    ins_encode( Opcode(0xD1), RegOpc(ECX),
12605 12626                OpcRegReg(0x33,EAX,EAX),
12606 12627                Opcode(0xF3), Opcode(0xAB) );
12607 12628    ins_pipe( pipe_slow );
12608 12629  %}
12609 12630  
12610 12631  instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eBXRegI cnt2,
12611 12632                          eAXRegI result, regXD tmp1, regXD tmp2, eFlagsReg cr) %{
12612 12633    match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12613 12634    effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12614 12635  
12615 12636    format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
12616 12637    ins_encode %{
12617 12638      __ string_compare($str1$$Register, $str2$$Register,
12618 12639                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
12619 12640                        $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12620 12641    %}
12621 12642    ins_pipe( pipe_slow );
12622 12643  %}
12623 12644  
12624 12645  // fast string equals
12625 12646  instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12626 12647                         regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12627 12648    match(Set result (StrEquals (Binary str1 str2) cnt));
12628 12649    effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12629 12650  
12630 12651    format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12631 12652    ins_encode %{
12632 12653      __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
12633 12654                            $cnt$$Register, $result$$Register, $tmp3$$Register,
12634 12655                            $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12635 12656    %}
12636 12657    ins_pipe( pipe_slow );
12637 12658  %}
12638 12659  
12639 12660  instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12640 12661                          eBXRegI result, regXD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12641 12662    predicate(UseSSE42Intrinsics);
12642 12663    match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12643 12664    effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
12644 12665  
12645 12666    format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp2, $tmp1" %}
12646 12667    ins_encode %{
12647 12668      __ string_indexof($str1$$Register, $str2$$Register,
12648 12669                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
12649 12670                        $tmp1$$XMMRegister, $tmp2$$Register);
12650 12671    %}
12651 12672    ins_pipe( pipe_slow );
12652 12673  %}
12653 12674  
12654 12675  // fast array equals
12655 12676  instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12656 12677                        regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12657 12678  %{
12658 12679    match(Set result (AryEq ary1 ary2));
12659 12680    effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12660 12681    //ins_cost(300);
12661 12682  
12662 12683    format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12663 12684    ins_encode %{
12664 12685      __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
12665 12686                            $tmp3$$Register, $result$$Register, $tmp4$$Register,
12666 12687                            $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12667 12688    %}
12668 12689    ins_pipe( pipe_slow );
12669 12690  %}
12670 12691  
12671 12692  //----------Control Flow Instructions------------------------------------------
12672 12693  // Signed compare Instructions
12673 12694  instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12674 12695    match(Set cr (CmpI op1 op2));
12675 12696    effect( DEF cr, USE op1, USE op2 );
12676 12697    format %{ "CMP    $op1,$op2" %}
12677 12698    opcode(0x3B);  /* Opcode 3B /r */
12678 12699    ins_encode( OpcP, RegReg( op1, op2) );
12679 12700    ins_pipe( ialu_cr_reg_reg );
12680 12701  %}
12681 12702  
12682 12703  instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
12683 12704    match(Set cr (CmpI op1 op2));
12684 12705    effect( DEF cr, USE op1 );
12685 12706    format %{ "CMP    $op1,$op2" %}
12686 12707    opcode(0x81,0x07);  /* Opcode 81 /7 */
12687 12708    // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12688 12709    ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12689 12710    ins_pipe( ialu_cr_reg_imm );
12690 12711  %}
12691 12712  
12692 12713  // Cisc-spilled version of cmpI_eReg
12693 12714  instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
12694 12715    match(Set cr (CmpI op1 (LoadI op2)));
12695 12716  
12696 12717    format %{ "CMP    $op1,$op2" %}
12697 12718    ins_cost(500);
12698 12719    opcode(0x3B);  /* Opcode 3B /r */
12699 12720    ins_encode( OpcP, RegMem( op1, op2) );
12700 12721    ins_pipe( ialu_cr_reg_mem );
12701 12722  %}
12702 12723  
12703 12724  instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
12704 12725    match(Set cr (CmpI src zero));
12705 12726    effect( DEF cr, USE src );
12706 12727  
12707 12728    format %{ "TEST   $src,$src" %}
12708 12729    opcode(0x85);
12709 12730    ins_encode( OpcP, RegReg( src, src ) );
12710 12731    ins_pipe( ialu_cr_reg_imm );
12711 12732  %}
12712 12733  
12713 12734  instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
12714 12735    match(Set cr (CmpI (AndI src con) zero));
12715 12736  
12716 12737    format %{ "TEST   $src,$con" %}
12717 12738    opcode(0xF7,0x00);
12718 12739    ins_encode( OpcP, RegOpc(src), Con32(con) );
12719 12740    ins_pipe( ialu_cr_reg_imm );
12720 12741  %}
12721 12742  
12722 12743  instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
12723 12744    match(Set cr (CmpI (AndI src mem) zero));
12724 12745  
12725 12746    format %{ "TEST   $src,$mem" %}
12726 12747    opcode(0x85);
12727 12748    ins_encode( OpcP, RegMem( src, mem ) );
12728 12749    ins_pipe( ialu_cr_reg_mem );
12729 12750  %}
12730 12751  
12731 12752  // Unsigned compare Instructions; really, same as signed except they
12732 12753  // produce an eFlagsRegU instead of eFlagsReg.
12733 12754  instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
12734 12755    match(Set cr (CmpU op1 op2));
12735 12756  
12736 12757    format %{ "CMPu   $op1,$op2" %}
12737 12758    opcode(0x3B);  /* Opcode 3B /r */
12738 12759    ins_encode( OpcP, RegReg( op1, op2) );
12739 12760    ins_pipe( ialu_cr_reg_reg );
12740 12761  %}
12741 12762  
12742 12763  instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
12743 12764    match(Set cr (CmpU op1 op2));
12744 12765  
12745 12766    format %{ "CMPu   $op1,$op2" %}
12746 12767    opcode(0x81,0x07);  /* Opcode 81 /7 */
12747 12768    ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12748 12769    ins_pipe( ialu_cr_reg_imm );
12749 12770  %}
12750 12771  
12751 12772  // // Cisc-spilled version of cmpU_eReg
12752 12773  instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
12753 12774    match(Set cr (CmpU op1 (LoadI op2)));
12754 12775  
12755 12776    format %{ "CMPu   $op1,$op2" %}
12756 12777    ins_cost(500);
12757 12778    opcode(0x3B);  /* Opcode 3B /r */
12758 12779    ins_encode( OpcP, RegMem( op1, op2) );
12759 12780    ins_pipe( ialu_cr_reg_mem );
12760 12781  %}
12761 12782  
12762 12783  // // Cisc-spilled version of cmpU_eReg
12763 12784  //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
12764 12785  //  match(Set cr (CmpU (LoadI op1) op2));
12765 12786  //
12766 12787  //  format %{ "CMPu   $op1,$op2" %}
12767 12788  //  ins_cost(500);
12768 12789  //  opcode(0x39);  /* Opcode 39 /r */
12769 12790  //  ins_encode( OpcP, RegMem( op1, op2) );
12770 12791  //%}
12771 12792  
12772 12793  instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
12773 12794    match(Set cr (CmpU src zero));
12774 12795  
12775 12796    format %{ "TESTu  $src,$src" %}
12776 12797    opcode(0x85);
12777 12798    ins_encode( OpcP, RegReg( src, src ) );
12778 12799    ins_pipe( ialu_cr_reg_imm );
12779 12800  %}
12780 12801  
12781 12802  // Unsigned pointer compare Instructions
12782 12803  instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12783 12804    match(Set cr (CmpP op1 op2));
12784 12805  
12785 12806    format %{ "CMPu   $op1,$op2" %}
12786 12807    opcode(0x3B);  /* Opcode 3B /r */
12787 12808    ins_encode( OpcP, RegReg( op1, op2) );
12788 12809    ins_pipe( ialu_cr_reg_reg );
12789 12810  %}
12790 12811  
12791 12812  instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12792 12813    match(Set cr (CmpP op1 op2));
12793 12814  
12794 12815    format %{ "CMPu   $op1,$op2" %}
12795 12816    opcode(0x81,0x07);  /* Opcode 81 /7 */
12796 12817    ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12797 12818    ins_pipe( ialu_cr_reg_imm );
12798 12819  %}
12799 12820  
12800 12821  // // Cisc-spilled version of cmpP_eReg
12801 12822  instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12802 12823    match(Set cr (CmpP op1 (LoadP op2)));
12803 12824  
12804 12825    format %{ "CMPu   $op1,$op2" %}
12805 12826    ins_cost(500);
12806 12827    opcode(0x3B);  /* Opcode 3B /r */
12807 12828    ins_encode( OpcP, RegMem( op1, op2) );
12808 12829    ins_pipe( ialu_cr_reg_mem );
12809 12830  %}
12810 12831  
12811 12832  // // Cisc-spilled version of cmpP_eReg
12812 12833  //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12813 12834  //  match(Set cr (CmpP (LoadP op1) op2));
12814 12835  //
12815 12836  //  format %{ "CMPu   $op1,$op2" %}
12816 12837  //  ins_cost(500);
12817 12838  //  opcode(0x39);  /* Opcode 39 /r */
12818 12839  //  ins_encode( OpcP, RegMem( op1, op2) );
12819 12840  //%}
12820 12841  
12821 12842  // Compare raw pointer (used in out-of-heap check).
12822 12843  // Only works because non-oop pointers must be raw pointers
12823 12844  // and raw pointers have no anti-dependencies.
12824 12845  instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12825 12846    predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
12826 12847    match(Set cr (CmpP op1 (LoadP op2)));
12827 12848  
12828 12849    format %{ "CMPu   $op1,$op2" %}
12829 12850    opcode(0x3B);  /* Opcode 3B /r */
12830 12851    ins_encode( OpcP, RegMem( op1, op2) );
12831 12852    ins_pipe( ialu_cr_reg_mem );
12832 12853  %}
12833 12854  
12834 12855  //
12835 12856  // This will generate a signed flags result. This should be ok
12836 12857  // since any compare to a zero should be eq/neq.
12837 12858  instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12838 12859    match(Set cr (CmpP src zero));
12839 12860  
12840 12861    format %{ "TEST   $src,$src" %}
12841 12862    opcode(0x85);
12842 12863    ins_encode( OpcP, RegReg( src, src ) );
12843 12864    ins_pipe( ialu_cr_reg_imm );
12844 12865  %}
12845 12866  
12846 12867  // Cisc-spilled version of testP_reg
12847 12868  // This will generate a signed flags result. This should be ok
12848 12869  // since any compare to a zero should be eq/neq.
12849 12870  instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12850 12871    match(Set cr (CmpP (LoadP op) zero));
12851 12872  
12852 12873    format %{ "TEST   $op,0xFFFFFFFF" %}
12853 12874    ins_cost(500);
12854 12875    opcode(0xF7);               /* Opcode F7 /0 */
12855 12876    ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12856 12877    ins_pipe( ialu_cr_reg_imm );
12857 12878  %}
12858 12879  
12859 12880  // Yanked all unsigned pointer compare operations.
12860 12881  // Pointer compares are done with CmpP which is already unsigned.
12861 12882  
12862 12883  //----------Max and Min--------------------------------------------------------
12863 12884  // Min Instructions
12864 12885  ////
12865 12886  //   *** Min and Max using the conditional move are slower than the
12866 12887  //   *** branch version on a Pentium III.
12867 12888  // // Conditional move for min
12868 12889  //instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12869 12890  //  effect( USE_DEF op2, USE op1, USE cr );
12870 12891  //  format %{ "CMOVlt $op2,$op1\t! min" %}
12871 12892  //  opcode(0x4C,0x0F);
12872 12893  //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12873 12894  //  ins_pipe( pipe_cmov_reg );
12874 12895  //%}
12875 12896  //
12876 12897  //// Min Register with Register (P6 version)
12877 12898  //instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
12878 12899  //  predicate(VM_Version::supports_cmov() );
12879 12900  //  match(Set op2 (MinI op1 op2));
12880 12901  //  ins_cost(200);
12881 12902  //  expand %{
12882 12903  //    eFlagsReg cr;
12883 12904  //    compI_eReg(cr,op1,op2);
12884 12905  //    cmovI_reg_lt(op2,op1,cr);
12885 12906  //  %}
12886 12907  //%}
12887 12908  
12888 12909  // Min Register with Register (generic version)
12889 12910  instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12890 12911    match(Set dst (MinI dst src));
12891 12912    effect(KILL flags);
12892 12913    ins_cost(300);
12893 12914  
12894 12915    format %{ "MIN    $dst,$src" %}
12895 12916    opcode(0xCC);
12896 12917    ins_encode( min_enc(dst,src) );
12897 12918    ins_pipe( pipe_slow );
12898 12919  %}
12899 12920  
12900 12921  // Max Register with Register
12901 12922  //   *** Min and Max using the conditional move are slower than the
12902 12923  //   *** branch version on a Pentium III.
12903 12924  // // Conditional move for max
12904 12925  //instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12905 12926  //  effect( USE_DEF op2, USE op1, USE cr );
12906 12927  //  format %{ "CMOVgt $op2,$op1\t! max" %}
12907 12928  //  opcode(0x4F,0x0F);
12908 12929  //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12909 12930  //  ins_pipe( pipe_cmov_reg );
12910 12931  //%}
12911 12932  //
12912 12933  // // Max Register with Register (P6 version)
12913 12934  //instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
12914 12935  //  predicate(VM_Version::supports_cmov() );
12915 12936  //  match(Set op2 (MaxI op1 op2));
12916 12937  //  ins_cost(200);
12917 12938  //  expand %{
12918 12939  //    eFlagsReg cr;
12919 12940  //    compI_eReg(cr,op1,op2);
12920 12941  //    cmovI_reg_gt(op2,op1,cr);
12921 12942  //  %}
12922 12943  //%}
12923 12944  
12924 12945  // Max Register with Register (generic version)
12925 12946  instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12926 12947    match(Set dst (MaxI dst src));
12927 12948    effect(KILL flags);
12928 12949    ins_cost(300);
12929 12950  
12930 12951    format %{ "MAX    $dst,$src" %}
12931 12952    opcode(0xCC);

↓ open down ↓

1337 lines elided

↑ open up ↑

12932 12953    ins_encode( max_enc(dst,src) );
12933 12954    ins_pipe( pipe_slow );
12934 12955  %}
12935 12956  
12936 12957  // ============================================================================
12937 12958  // Branch Instructions
12938 12959  // Jump Table
12939 12960  instruct jumpXtnd(eRegI switch_val) %{
12940 12961    match(Jump switch_val);
12941 12962    ins_cost(350);
12942      -
12943      -  format %{  "JMP    [table_base](,$switch_val,1)\n\t" %}
12944      -
     12963 +  format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12945 12964    ins_encode %{
12946      -    address table_base  = __ address_table_constant(_index2label);
12947      -
12948 12965      // Jump to Address(table_base + switch_reg)
12949      -    InternalAddress table(table_base);
12950 12966      Address index(noreg, $switch_val$$Register, Address::times_1);
12951      -    __ jump(ArrayAddress(table, index));
     12967 +    __ jump(ArrayAddress($constantaddress, index));
12952 12968    %}
12953 12969    ins_pc_relative(1);
12954 12970    ins_pipe(pipe_jmp);
12955 12971  %}
12956 12972  
12957 12973  // Jump Direct - Label defines a relative address from JMP+1
12958 12974  instruct jmpDir(label labl) %{
12959 12975    match(Goto);
12960 12976    effect(USE labl);
12961 12977

12962 12978    ins_cost(300);
12963 12979    format %{ "JMP    $labl" %}
12964 12980    size(5);
12965 12981    opcode(0xE9);
12966 12982    ins_encode( OpcP, Lbl( labl ) );
12967 12983    ins_pipe( pipe_jmp );
12968 12984    ins_pc_relative(1);
12969 12985  %}
12970 12986  
12971 12987  // Jump Direct Conditional - Label defines a relative address from Jcc+1
12972 12988  instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12973 12989    match(If cop cr);
12974 12990    effect(USE labl);
12975 12991  
12976 12992    ins_cost(300);
12977 12993    format %{ "J$cop    $labl" %}
12978 12994    size(6);
12979 12995    opcode(0x0F, 0x80);
12980 12996    ins_encode( Jcc( cop, labl) );
12981 12997    ins_pipe( pipe_jcc );
12982 12998    ins_pc_relative(1);
12983 12999  %}
12984 13000  
12985 13001  // Jump Direct Conditional - Label defines a relative address from Jcc+1
12986 13002  instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12987 13003    match(CountedLoopEnd cop cr);
12988 13004    effect(USE labl);
12989 13005  
12990 13006    ins_cost(300);
12991 13007    format %{ "J$cop    $labl\t# Loop end" %}
12992 13008    size(6);
12993 13009    opcode(0x0F, 0x80);
12994 13010    ins_encode( Jcc( cop, labl) );
12995 13011    ins_pipe( pipe_jcc );
12996 13012    ins_pc_relative(1);
12997 13013  %}
12998 13014  
12999 13015  // Jump Direct Conditional - Label defines a relative address from Jcc+1
13000 13016  instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13001 13017    match(CountedLoopEnd cop cmp);
13002 13018    effect(USE labl);
13003 13019  
13004 13020    ins_cost(300);
13005 13021    format %{ "J$cop,u  $labl\t# Loop end" %}
13006 13022    size(6);
13007 13023    opcode(0x0F, 0x80);
13008 13024    ins_encode( Jcc( cop, labl) );
13009 13025    ins_pipe( pipe_jcc );
13010 13026    ins_pc_relative(1);
13011 13027  %}
13012 13028  
13013 13029  instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13014 13030    match(CountedLoopEnd cop cmp);
13015 13031    effect(USE labl);
13016 13032  
13017 13033    ins_cost(200);
13018 13034    format %{ "J$cop,u  $labl\t# Loop end" %}
13019 13035    size(6);
13020 13036    opcode(0x0F, 0x80);
13021 13037    ins_encode( Jcc( cop, labl) );
13022 13038    ins_pipe( pipe_jcc );
13023 13039    ins_pc_relative(1);
13024 13040  %}
13025 13041  
13026 13042  // Jump Direct Conditional - using unsigned comparison
13027 13043  instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13028 13044    match(If cop cmp);
13029 13045    effect(USE labl);
13030 13046  
13031 13047    ins_cost(300);
13032 13048    format %{ "J$cop,u  $labl" %}
13033 13049    size(6);
13034 13050    opcode(0x0F, 0x80);
13035 13051    ins_encode(Jcc(cop, labl));
13036 13052    ins_pipe(pipe_jcc);
13037 13053    ins_pc_relative(1);
13038 13054  %}
13039 13055  
13040 13056  instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13041 13057    match(If cop cmp);
13042 13058    effect(USE labl);
13043 13059  
13044 13060    ins_cost(200);
13045 13061    format %{ "J$cop,u  $labl" %}
13046 13062    size(6);
13047 13063    opcode(0x0F, 0x80);
13048 13064    ins_encode(Jcc(cop, labl));
13049 13065    ins_pipe(pipe_jcc);
13050 13066    ins_pc_relative(1);
13051 13067  %}
13052 13068  
13053 13069  instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13054 13070    match(If cop cmp);
13055 13071    effect(USE labl);
13056 13072  
13057 13073    ins_cost(200);
13058 13074    format %{ $$template
13059 13075      if ($cop$$cmpcode == Assembler::notEqual) {
13060 13076        $$emit$$"JP,u   $labl\n\t"
13061 13077        $$emit$$"J$cop,u   $labl"
13062 13078      } else {
13063 13079        $$emit$$"JP,u   done\n\t"
13064 13080        $$emit$$"J$cop,u   $labl\n\t"
13065 13081        $$emit$$"done:"
13066 13082      }
13067 13083    %}
13068 13084    size(12);
13069 13085    opcode(0x0F, 0x80);
13070 13086    ins_encode %{
13071 13087      Label* l = $labl$$label;
13072 13088      $$$emit8$primary;
13073 13089      emit_cc(cbuf, $secondary, Assembler::parity);
13074 13090      int parity_disp = -1;
13075 13091      bool ok = false;
13076 13092      if ($cop$$cmpcode == Assembler::notEqual) {
13077 13093         // the two jumps 6 bytes apart so the jump distances are too
13078 13094         parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
13079 13095      } else if ($cop$$cmpcode == Assembler::equal) {
13080 13096         parity_disp = 6;
13081 13097         ok = true;
13082 13098      } else {
13083 13099         ShouldNotReachHere();
13084 13100      }
13085 13101      emit_d32(cbuf, parity_disp);
13086 13102      $$$emit8$primary;
13087 13103      emit_cc(cbuf, $secondary, $cop$$cmpcode);
13088 13104      int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
13089 13105      emit_d32(cbuf, disp);
13090 13106    %}
13091 13107    ins_pipe(pipe_jcc);
13092 13108    ins_pc_relative(1);
13093 13109  %}
13094 13110  
13095 13111  // ============================================================================
13096 13112  // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
13097 13113  // array for an instance of the superklass.  Set a hidden internal cache on a
13098 13114  // hit (cache is checked with exposed code in gen_subtype_check()).  Return
13099 13115  // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
13100 13116  instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
13101 13117    match(Set result (PartialSubtypeCheck sub super));
13102 13118    effect( KILL rcx, KILL cr );
13103 13119  
13104 13120    ins_cost(1100);  // slightly larger than the next version
13105 13121    format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
13106 13122              "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13107 13123              "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13108 13124              "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13109 13125              "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
13110 13126              "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
13111 13127              "XOR    $result,$result\t\t Hit: EDI zero\n\t"
13112 13128       "miss:\t" %}
13113 13129  
13114 13130    opcode(0x1); // Force a XOR of EDI
13115 13131    ins_encode( enc_PartialSubtypeCheck() );
13116 13132    ins_pipe( pipe_slow );
13117 13133  %}
13118 13134  
13119 13135  instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
13120 13136    match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13121 13137    effect( KILL rcx, KILL result );
13122 13138  
13123 13139    ins_cost(1000);
13124 13140    format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
13125 13141              "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13126 13142              "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13127 13143              "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13128 13144              "JNE,s  miss\t\t# Missed: flags NZ\n\t"
13129 13145              "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
13130 13146       "miss:\t" %}
13131 13147  
13132 13148    opcode(0x0);  // No need to XOR EDI
13133 13149    ins_encode( enc_PartialSubtypeCheck() );
13134 13150    ins_pipe( pipe_slow );
13135 13151  %}
13136 13152  
13137 13153  // ============================================================================
13138 13154  // Branch Instructions -- short offset versions
13139 13155  //
13140 13156  // These instructions are used to replace jumps of a long offset (the default
13141 13157  // match) with jumps of a shorter offset.  These instructions are all tagged
13142 13158  // with the ins_short_branch attribute, which causes the ADLC to suppress the
13143 13159  // match rules in general matching.  Instead, the ADLC generates a conversion
13144 13160  // method in the MachNode which can be used to do in-place replacement of the
13145 13161  // long variant with the shorter variant.  The compiler will determine if a
13146 13162  // branch can be taken by the is_short_branch_offset() predicate in the machine
13147 13163  // specific code section of the file.
13148 13164  
13149 13165  // Jump Direct - Label defines a relative address from JMP+1
13150 13166  instruct jmpDir_short(label labl) %{
13151 13167    match(Goto);
13152 13168    effect(USE labl);
13153 13169  
13154 13170    ins_cost(300);
13155 13171    format %{ "JMP,s  $labl" %}
13156 13172    size(2);
13157 13173    opcode(0xEB);
13158 13174    ins_encode( OpcP, LblShort( labl ) );
13159 13175    ins_pipe( pipe_jmp );
13160 13176    ins_pc_relative(1);
13161 13177    ins_short_branch(1);
13162 13178  %}
13163 13179  
13164 13180  // Jump Direct Conditional - Label defines a relative address from Jcc+1
13165 13181  instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
13166 13182    match(If cop cr);
13167 13183    effect(USE labl);
13168 13184  
13169 13185    ins_cost(300);
13170 13186    format %{ "J$cop,s  $labl" %}
13171 13187    size(2);
13172 13188    opcode(0x70);
13173 13189    ins_encode( JccShort( cop, labl) );
13174 13190    ins_pipe( pipe_jcc );
13175 13191    ins_pc_relative(1);
13176 13192    ins_short_branch(1);
13177 13193  %}
13178 13194  
13179 13195  // Jump Direct Conditional - Label defines a relative address from Jcc+1
13180 13196  instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
13181 13197    match(CountedLoopEnd cop cr);
13182 13198    effect(USE labl);
13183 13199  
13184 13200    ins_cost(300);
13185 13201    format %{ "J$cop,s  $labl\t# Loop end" %}
13186 13202    size(2);
13187 13203    opcode(0x70);
13188 13204    ins_encode( JccShort( cop, labl) );
13189 13205    ins_pipe( pipe_jcc );
13190 13206    ins_pc_relative(1);
13191 13207    ins_short_branch(1);
13192 13208  %}
13193 13209  
13194 13210  // Jump Direct Conditional - Label defines a relative address from Jcc+1
13195 13211  instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13196 13212    match(CountedLoopEnd cop cmp);
13197 13213    effect(USE labl);
13198 13214  
13199 13215    ins_cost(300);
13200 13216    format %{ "J$cop,us $labl\t# Loop end" %}
13201 13217    size(2);
13202 13218    opcode(0x70);
13203 13219    ins_encode( JccShort( cop, labl) );
13204 13220    ins_pipe( pipe_jcc );
13205 13221    ins_pc_relative(1);
13206 13222    ins_short_branch(1);
13207 13223  %}
13208 13224  
13209 13225  instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13210 13226    match(CountedLoopEnd cop cmp);
13211 13227    effect(USE labl);
13212 13228  
13213 13229    ins_cost(300);
13214 13230    format %{ "J$cop,us $labl\t# Loop end" %}
13215 13231    size(2);
13216 13232    opcode(0x70);
13217 13233    ins_encode( JccShort( cop, labl) );
13218 13234    ins_pipe( pipe_jcc );
13219 13235    ins_pc_relative(1);
13220 13236    ins_short_branch(1);
13221 13237  %}
13222 13238  
13223 13239  // Jump Direct Conditional - using unsigned comparison
13224 13240  instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13225 13241    match(If cop cmp);
13226 13242    effect(USE labl);
13227 13243  
13228 13244    ins_cost(300);
13229 13245    format %{ "J$cop,us $labl" %}
13230 13246    size(2);
13231 13247    opcode(0x70);
13232 13248    ins_encode( JccShort( cop, labl) );
13233 13249    ins_pipe( pipe_jcc );
13234 13250    ins_pc_relative(1);
13235 13251    ins_short_branch(1);
13236 13252  %}
13237 13253  
13238 13254  instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13239 13255    match(If cop cmp);
13240 13256    effect(USE labl);
13241 13257  
13242 13258    ins_cost(300);
13243 13259    format %{ "J$cop,us $labl" %}
13244 13260    size(2);
13245 13261    opcode(0x70);
13246 13262    ins_encode( JccShort( cop, labl) );
13247 13263    ins_pipe( pipe_jcc );
13248 13264    ins_pc_relative(1);
13249 13265    ins_short_branch(1);
13250 13266  %}
13251 13267  
13252 13268  instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13253 13269    match(If cop cmp);
13254 13270    effect(USE labl);
13255 13271  
13256 13272    ins_cost(300);
13257 13273    format %{ $$template
13258 13274      if ($cop$$cmpcode == Assembler::notEqual) {
13259 13275        $$emit$$"JP,u,s   $labl\n\t"
13260 13276        $$emit$$"J$cop,u,s   $labl"
13261 13277      } else {
13262 13278        $$emit$$"JP,u,s   done\n\t"
13263 13279        $$emit$$"J$cop,u,s  $labl\n\t"
13264 13280        $$emit$$"done:"
13265 13281      }
13266 13282    %}
13267 13283    size(4);
13268 13284    opcode(0x70);
13269 13285    ins_encode %{
13270 13286      Label* l = $labl$$label;
13271 13287      emit_cc(cbuf, $primary, Assembler::parity);
13272 13288      int parity_disp = -1;
13273 13289      if ($cop$$cmpcode == Assembler::notEqual) {
13274 13290        parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
13275 13291      } else if ($cop$$cmpcode == Assembler::equal) {
13276 13292        parity_disp = 2;
13277 13293      } else {
13278 13294        ShouldNotReachHere();
13279 13295      }
13280 13296      emit_d8(cbuf, parity_disp);
13281 13297      emit_cc(cbuf, $primary, $cop$$cmpcode);
13282 13298      int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
13283 13299      emit_d8(cbuf, disp);
13284 13300      assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
13285 13301      assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
13286 13302    %}
13287 13303    ins_pipe(pipe_jcc);
13288 13304    ins_pc_relative(1);
13289 13305    ins_short_branch(1);
13290 13306  %}
13291 13307  
13292 13308  // ============================================================================
13293 13309  // Long Compare
13294 13310  //
13295 13311  // Currently we hold longs in 2 registers.  Comparing such values efficiently
13296 13312  // is tricky.  The flavor of compare used depends on whether we are testing
13297 13313  // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
13298 13314  // The GE test is the negated LT test.  The LE test can be had by commuting
13299 13315  // the operands (yielding a GE test) and then negating; negate again for the
13300 13316  // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
13301 13317  // NE test is negated from that.
13302 13318  
13303 13319  // Due to a shortcoming in the ADLC, it mixes up expressions like:
13304 13320  // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
13305 13321  // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
13306 13322  // are collapsed internally in the ADLC's dfa-gen code.  The match for
13307 13323  // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
13308 13324  // foo match ends up with the wrong leaf.  One fix is to not match both
13309 13325  // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
13310 13326  // both forms beat the trinary form of long-compare and both are very useful
13311 13327  // on Intel which has so few registers.
13312 13328  
13313 13329  // Manifest a CmpL result in an integer register.  Very painful.
13314 13330  // This is the test to avoid.
13315 13331  instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
13316 13332    match(Set dst (CmpL3 src1 src2));
13317 13333    effect( KILL flags );
13318 13334    ins_cost(1000);
13319 13335    format %{ "XOR    $dst,$dst\n\t"
13320 13336              "CMP    $src1.hi,$src2.hi\n\t"
13321 13337              "JLT,s  m_one\n\t"
13322 13338              "JGT,s  p_one\n\t"
13323 13339              "CMP    $src1.lo,$src2.lo\n\t"
13324 13340              "JB,s   m_one\n\t"
13325 13341              "JEQ,s  done\n"
13326 13342      "p_one:\tINC    $dst\n\t"
13327 13343              "JMP,s  done\n"
13328 13344      "m_one:\tDEC    $dst\n"
13329 13345       "done:" %}
13330 13346    ins_encode %{
13331 13347      Label p_one, m_one, done;
13332 13348      __ xorptr($dst$$Register, $dst$$Register);
13333 13349      __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13334 13350      __ jccb(Assembler::less,    m_one);
13335 13351      __ jccb(Assembler::greater, p_one);
13336 13352      __ cmpl($src1$$Register, $src2$$Register);
13337 13353      __ jccb(Assembler::below,   m_one);
13338 13354      __ jccb(Assembler::equal,   done);
13339 13355      __ bind(p_one);
13340 13356      __ incrementl($dst$$Register);
13341 13357      __ jmpb(done);
13342 13358      __ bind(m_one);
13343 13359      __ decrementl($dst$$Register);
13344 13360      __ bind(done);
13345 13361    %}
13346 13362    ins_pipe( pipe_slow );
13347 13363  %}
13348 13364  
13349 13365  //======
13350 13366  // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13351 13367  // compares.  Can be used for LE or GT compares by reversing arguments.
13352 13368  // NOT GOOD FOR EQ/NE tests.
13353 13369  instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13354 13370    match( Set flags (CmpL src zero ));
13355 13371    ins_cost(100);
13356 13372    format %{ "TEST   $src.hi,$src.hi" %}
13357 13373    opcode(0x85);
13358 13374    ins_encode( OpcP, RegReg_Hi2( src, src ) );
13359 13375    ins_pipe( ialu_cr_reg_reg );
13360 13376  %}
13361 13377  
13362 13378  // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13363 13379  // compares.  Can be used for LE or GT compares by reversing arguments.
13364 13380  // NOT GOOD FOR EQ/NE tests.
13365 13381  instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13366 13382    match( Set flags (CmpL src1 src2 ));
13367 13383    effect( TEMP tmp );
13368 13384    ins_cost(300);
13369 13385    format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13370 13386              "MOV    $tmp,$src1.hi\n\t"
13371 13387              "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13372 13388    ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13373 13389    ins_pipe( ialu_cr_reg_reg );
13374 13390  %}
13375 13391  
13376 13392  // Long compares reg < zero/req OR reg >= zero/req.
13377 13393  // Just a wrapper for a normal branch, plus the predicate test.
13378 13394  instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13379 13395    match(If cmp flags);
13380 13396    effect(USE labl);
13381 13397    predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13382 13398    expand %{
13383 13399      jmpCon(cmp,flags,labl);    // JLT or JGE...
13384 13400    %}
13385 13401  %}
13386 13402  
13387 13403  // Compare 2 longs and CMOVE longs.
13388 13404  instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13389 13405    match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13390 13406    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13391 13407    ins_cost(400);
13392 13408    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13393 13409              "CMOV$cmp $dst.hi,$src.hi" %}
13394 13410    opcode(0x0F,0x40);
13395 13411    ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13396 13412    ins_pipe( pipe_cmov_reg_long );
13397 13413  %}
13398 13414  
13399 13415  instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13400 13416    match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13401 13417    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13402 13418    ins_cost(500);
13403 13419    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13404 13420              "CMOV$cmp $dst.hi,$src.hi" %}
13405 13421    opcode(0x0F,0x40);
13406 13422    ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13407 13423    ins_pipe( pipe_cmov_reg_long );
13408 13424  %}
13409 13425  
13410 13426  // Compare 2 longs and CMOVE ints.
13411 13427  instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
13412 13428    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13413 13429    match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13414 13430    ins_cost(200);
13415 13431    format %{ "CMOV$cmp $dst,$src" %}
13416 13432    opcode(0x0F,0x40);
13417 13433    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13418 13434    ins_pipe( pipe_cmov_reg );
13419 13435  %}
13420 13436  
13421 13437  instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
13422 13438    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13423 13439    match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13424 13440    ins_cost(250);
13425 13441    format %{ "CMOV$cmp $dst,$src" %}
13426 13442    opcode(0x0F,0x40);
13427 13443    ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13428 13444    ins_pipe( pipe_cmov_mem );
13429 13445  %}
13430 13446  
13431 13447  // Compare 2 longs and CMOVE ints.
13432 13448  instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13433 13449    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13434 13450    match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13435 13451    ins_cost(200);
13436 13452    format %{ "CMOV$cmp $dst,$src" %}
13437 13453    opcode(0x0F,0x40);
13438 13454    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13439 13455    ins_pipe( pipe_cmov_reg );
13440 13456  %}
13441 13457  
13442 13458  // Compare 2 longs and CMOVE doubles
13443 13459  instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13444 13460    predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13445 13461    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13446 13462    ins_cost(200);
13447 13463    expand %{
13448 13464      fcmovD_regS(cmp,flags,dst,src);
13449 13465    %}
13450 13466  %}
13451 13467  
13452 13468  // Compare 2 longs and CMOVE doubles
13453 13469  instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
13454 13470    predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13455 13471    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13456 13472    ins_cost(200);
13457 13473    expand %{
13458 13474      fcmovXD_regS(cmp,flags,dst,src);
13459 13475    %}
13460 13476  %}
13461 13477  
13462 13478  instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13463 13479    predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13464 13480    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13465 13481    ins_cost(200);
13466 13482    expand %{
13467 13483      fcmovF_regS(cmp,flags,dst,src);
13468 13484    %}
13469 13485  %}
13470 13486  
13471 13487  instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
13472 13488    predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13473 13489    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13474 13490    ins_cost(200);
13475 13491    expand %{
13476 13492      fcmovX_regS(cmp,flags,dst,src);
13477 13493    %}
13478 13494  %}
13479 13495  
13480 13496  //======
13481 13497  // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13482 13498  instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
13483 13499    match( Set flags (CmpL src zero ));
13484 13500    effect(TEMP tmp);
13485 13501    ins_cost(200);
13486 13502    format %{ "MOV    $tmp,$src.lo\n\t"
13487 13503              "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13488 13504    ins_encode( long_cmp_flags0( src, tmp ) );
13489 13505    ins_pipe( ialu_reg_reg_long );
13490 13506  %}
13491 13507  
13492 13508  // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13493 13509  instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13494 13510    match( Set flags (CmpL src1 src2 ));
13495 13511    ins_cost(200+300);
13496 13512    format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13497 13513              "JNE,s  skip\n\t"
13498 13514              "CMP    $src1.hi,$src2.hi\n\t"
13499 13515       "skip:\t" %}
13500 13516    ins_encode( long_cmp_flags1( src1, src2 ) );
13501 13517    ins_pipe( ialu_cr_reg_reg );
13502 13518  %}
13503 13519  
13504 13520  // Long compare reg == zero/reg OR reg != zero/reg
13505 13521  // Just a wrapper for a normal branch, plus the predicate test.
13506 13522  instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13507 13523    match(If cmp flags);
13508 13524    effect(USE labl);
13509 13525    predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13510 13526    expand %{
13511 13527      jmpCon(cmp,flags,labl);    // JEQ or JNE...
13512 13528    %}
13513 13529  %}
13514 13530  
13515 13531  // Compare 2 longs and CMOVE longs.
13516 13532  instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13517 13533    match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13518 13534    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13519 13535    ins_cost(400);
13520 13536    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13521 13537              "CMOV$cmp $dst.hi,$src.hi" %}
13522 13538    opcode(0x0F,0x40);
13523 13539    ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13524 13540    ins_pipe( pipe_cmov_reg_long );
13525 13541  %}
13526 13542  
13527 13543  instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13528 13544    match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13529 13545    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13530 13546    ins_cost(500);
13531 13547    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13532 13548              "CMOV$cmp $dst.hi,$src.hi" %}
13533 13549    opcode(0x0F,0x40);
13534 13550    ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13535 13551    ins_pipe( pipe_cmov_reg_long );
13536 13552  %}
13537 13553  
13538 13554  // Compare 2 longs and CMOVE ints.
13539 13555  instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
13540 13556    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13541 13557    match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13542 13558    ins_cost(200);
13543 13559    format %{ "CMOV$cmp $dst,$src" %}
13544 13560    opcode(0x0F,0x40);
13545 13561    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13546 13562    ins_pipe( pipe_cmov_reg );
13547 13563  %}
13548 13564  
13549 13565  instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
13550 13566    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13551 13567    match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13552 13568    ins_cost(250);
13553 13569    format %{ "CMOV$cmp $dst,$src" %}
13554 13570    opcode(0x0F,0x40);
13555 13571    ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13556 13572    ins_pipe( pipe_cmov_mem );
13557 13573  %}
13558 13574  
13559 13575  // Compare 2 longs and CMOVE ints.
13560 13576  instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13561 13577    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13562 13578    match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13563 13579    ins_cost(200);
13564 13580    format %{ "CMOV$cmp $dst,$src" %}
13565 13581    opcode(0x0F,0x40);
13566 13582    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13567 13583    ins_pipe( pipe_cmov_reg );
13568 13584  %}
13569 13585  
13570 13586  // Compare 2 longs and CMOVE doubles
13571 13587  instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13572 13588    predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13573 13589    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13574 13590    ins_cost(200);
13575 13591    expand %{
13576 13592      fcmovD_regS(cmp,flags,dst,src);
13577 13593    %}
13578 13594  %}
13579 13595  
13580 13596  // Compare 2 longs and CMOVE doubles
13581 13597  instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
13582 13598    predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13583 13599    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13584 13600    ins_cost(200);
13585 13601    expand %{
13586 13602      fcmovXD_regS(cmp,flags,dst,src);
13587 13603    %}
13588 13604  %}
13589 13605  
13590 13606  instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13591 13607    predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13592 13608    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13593 13609    ins_cost(200);
13594 13610    expand %{
13595 13611      fcmovF_regS(cmp,flags,dst,src);
13596 13612    %}
13597 13613  %}
13598 13614  
13599 13615  instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
13600 13616    predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13601 13617    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13602 13618    ins_cost(200);
13603 13619    expand %{
13604 13620      fcmovX_regS(cmp,flags,dst,src);
13605 13621    %}
13606 13622  %}
13607 13623  
13608 13624  //======
13609 13625  // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13610 13626  // Same as cmpL_reg_flags_LEGT except must negate src
13611 13627  instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
13612 13628    match( Set flags (CmpL src zero ));
13613 13629    effect( TEMP tmp );
13614 13630    ins_cost(300);
13615 13631    format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13616 13632              "CMP    $tmp,$src.lo\n\t"
13617 13633              "SBB    $tmp,$src.hi\n\t" %}
13618 13634    ins_encode( long_cmp_flags3(src, tmp) );
13619 13635    ins_pipe( ialu_reg_reg_long );
13620 13636  %}
13621 13637  
13622 13638  // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13623 13639  // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13624 13640  // requires a commuted test to get the same result.
13625 13641  instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13626 13642    match( Set flags (CmpL src1 src2 ));
13627 13643    effect( TEMP tmp );
13628 13644    ins_cost(300);
13629 13645    format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13630 13646              "MOV    $tmp,$src2.hi\n\t"
13631 13647              "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13632 13648    ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13633 13649    ins_pipe( ialu_cr_reg_reg );
13634 13650  %}
13635 13651  
13636 13652  // Long compares reg < zero/req OR reg >= zero/req.
13637 13653  // Just a wrapper for a normal branch, plus the predicate test
13638 13654  instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13639 13655    match(If cmp flags);
13640 13656    effect(USE labl);
13641 13657    predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13642 13658    ins_cost(300);
13643 13659    expand %{
13644 13660      jmpCon(cmp,flags,labl);    // JGT or JLE...
13645 13661    %}
13646 13662  %}
13647 13663  
13648 13664  // Compare 2 longs and CMOVE longs.
13649 13665  instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13650 13666    match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13651 13667    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13652 13668    ins_cost(400);
13653 13669    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13654 13670              "CMOV$cmp $dst.hi,$src.hi" %}
13655 13671    opcode(0x0F,0x40);
13656 13672    ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13657 13673    ins_pipe( pipe_cmov_reg_long );
13658 13674  %}
13659 13675  
13660 13676  instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13661 13677    match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13662 13678    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13663 13679    ins_cost(500);
13664 13680    format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13665 13681              "CMOV$cmp $dst.hi,$src.hi+4" %}
13666 13682    opcode(0x0F,0x40);
13667 13683    ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13668 13684    ins_pipe( pipe_cmov_reg_long );
13669 13685  %}
13670 13686  
13671 13687  // Compare 2 longs and CMOVE ints.
13672 13688  instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
13673 13689    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13674 13690    match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13675 13691    ins_cost(200);
13676 13692    format %{ "CMOV$cmp $dst,$src" %}
13677 13693    opcode(0x0F,0x40);
13678 13694    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13679 13695    ins_pipe( pipe_cmov_reg );
13680 13696  %}
13681 13697  
13682 13698  instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
13683 13699    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13684 13700    match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13685 13701    ins_cost(250);
13686 13702    format %{ "CMOV$cmp $dst,$src" %}
13687 13703    opcode(0x0F,0x40);
13688 13704    ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13689 13705    ins_pipe( pipe_cmov_mem );
13690 13706  %}
13691 13707  
13692 13708  // Compare 2 longs and CMOVE ptrs.
13693 13709  instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13694 13710    predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13695 13711    match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13696 13712    ins_cost(200);
13697 13713    format %{ "CMOV$cmp $dst,$src" %}
13698 13714    opcode(0x0F,0x40);
13699 13715    ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13700 13716    ins_pipe( pipe_cmov_reg );
13701 13717  %}
13702 13718  
13703 13719  // Compare 2 longs and CMOVE doubles
13704 13720  instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13705 13721    predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13706 13722    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13707 13723    ins_cost(200);
13708 13724    expand %{
13709 13725      fcmovD_regS(cmp,flags,dst,src);
13710 13726    %}
13711 13727  %}
13712 13728  
13713 13729  // Compare 2 longs and CMOVE doubles
13714 13730  instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
13715 13731    predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13716 13732    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13717 13733    ins_cost(200);
13718 13734    expand %{
13719 13735      fcmovXD_regS(cmp,flags,dst,src);
13720 13736    %}
13721 13737  %}
13722 13738  
13723 13739  instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13724 13740    predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13725 13741    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13726 13742    ins_cost(200);
13727 13743    expand %{
13728 13744      fcmovF_regS(cmp,flags,dst,src);
13729 13745    %}
13730 13746  %}
13731 13747  
13732 13748  
13733 13749  instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
13734 13750    predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13735 13751    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13736 13752    ins_cost(200);
13737 13753    expand %{
13738 13754      fcmovX_regS(cmp,flags,dst,src);
13739 13755    %}
13740 13756  %}
13741 13757  
13742 13758  
13743 13759  // ============================================================================
13744 13760  // Procedure Call/Return Instructions
13745 13761  // Call Java Static Instruction
13746 13762  // Note: If this code changes, the corresponding ret_addr_offset() and
13747 13763  //       compute_padding() functions will have to be adjusted.
13748 13764  instruct CallStaticJavaDirect(method meth) %{
13749 13765    match(CallStaticJava);
13750 13766    predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13751 13767    effect(USE meth);
13752 13768  
13753 13769    ins_cost(300);
13754 13770    format %{ "CALL,static " %}
13755 13771    opcode(0xE8); /* E8 cd */
13756 13772    ins_encode( pre_call_FPU,
13757 13773                Java_Static_Call( meth ),
13758 13774                call_epilog,
13759 13775                post_call_FPU );
13760 13776    ins_pipe( pipe_slow );
13761 13777    ins_pc_relative(1);
13762 13778    ins_alignment(4);
13763 13779  %}
13764 13780  
13765 13781  // Call Java Static Instruction (method handle version)
13766 13782  // Note: If this code changes, the corresponding ret_addr_offset() and
13767 13783  //       compute_padding() functions will have to be adjusted.
13768 13784  instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
13769 13785    match(CallStaticJava);
13770 13786    predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13771 13787    effect(USE meth);
13772 13788    // EBP is saved by all callees (for interpreter stack correction).
13773 13789    // We use it here for a similar purpose, in {preserve,restore}_SP.
13774 13790  
13775 13791    ins_cost(300);
13776 13792    format %{ "CALL,static/MethodHandle " %}
13777 13793    opcode(0xE8); /* E8 cd */
13778 13794    ins_encode( pre_call_FPU,
13779 13795                preserve_SP,
13780 13796                Java_Static_Call( meth ),
13781 13797                restore_SP,
13782 13798                call_epilog,
13783 13799                post_call_FPU );
13784 13800    ins_pipe( pipe_slow );
13785 13801    ins_pc_relative(1);
13786 13802    ins_alignment(4);
13787 13803  %}
13788 13804  
13789 13805  // Call Java Dynamic Instruction
13790 13806  // Note: If this code changes, the corresponding ret_addr_offset() and
13791 13807  //       compute_padding() functions will have to be adjusted.
13792 13808  instruct CallDynamicJavaDirect(method meth) %{
13793 13809    match(CallDynamicJava);
13794 13810    effect(USE meth);
13795 13811  
13796 13812    ins_cost(300);
13797 13813    format %{ "MOV    EAX,(oop)-1\n\t"
13798 13814              "CALL,dynamic" %}
13799 13815    opcode(0xE8); /* E8 cd */
13800 13816    ins_encode( pre_call_FPU,
13801 13817                Java_Dynamic_Call( meth ),
13802 13818                call_epilog,
13803 13819                post_call_FPU );
13804 13820    ins_pipe( pipe_slow );
13805 13821    ins_pc_relative(1);
13806 13822    ins_alignment(4);
13807 13823  %}
13808 13824  
13809 13825  // Call Runtime Instruction
13810 13826  instruct CallRuntimeDirect(method meth) %{
13811 13827    match(CallRuntime );
13812 13828    effect(USE meth);
13813 13829  
13814 13830    ins_cost(300);
13815 13831    format %{ "CALL,runtime " %}
13816 13832    opcode(0xE8); /* E8 cd */
13817 13833    // Use FFREEs to clear entries in float stack
13818 13834    ins_encode( pre_call_FPU,
13819 13835                FFree_Float_Stack_All,
13820 13836                Java_To_Runtime( meth ),
13821 13837                post_call_FPU );
13822 13838    ins_pipe( pipe_slow );
13823 13839    ins_pc_relative(1);
13824 13840  %}
13825 13841  
13826 13842  // Call runtime without safepoint
13827 13843  instruct CallLeafDirect(method meth) %{
13828 13844    match(CallLeaf);
13829 13845    effect(USE meth);
13830 13846  
13831 13847    ins_cost(300);
13832 13848    format %{ "CALL_LEAF,runtime " %}
13833 13849    opcode(0xE8); /* E8 cd */
13834 13850    ins_encode( pre_call_FPU,
13835 13851                FFree_Float_Stack_All,
13836 13852                Java_To_Runtime( meth ),
13837 13853                Verify_FPU_For_Leaf, post_call_FPU );
13838 13854    ins_pipe( pipe_slow );
13839 13855    ins_pc_relative(1);
13840 13856  %}
13841 13857  
13842 13858  instruct CallLeafNoFPDirect(method meth) %{
13843 13859    match(CallLeafNoFP);
13844 13860    effect(USE meth);
13845 13861  
13846 13862    ins_cost(300);
13847 13863    format %{ "CALL_LEAF_NOFP,runtime " %}
13848 13864    opcode(0xE8); /* E8 cd */
13849 13865    ins_encode(Java_To_Runtime(meth));
13850 13866    ins_pipe( pipe_slow );
13851 13867    ins_pc_relative(1);
13852 13868  %}
13853 13869  
13854 13870  
13855 13871  // Return Instruction
13856 13872  // Remove the return address & jump to it.
13857 13873  instruct Ret() %{
13858 13874    match(Return);
13859 13875    format %{ "RET" %}
13860 13876    opcode(0xC3);
13861 13877    ins_encode(OpcP);
13862 13878    ins_pipe( pipe_jmp );
13863 13879  %}
13864 13880  
13865 13881  // Tail Call; Jump from runtime stub to Java code.
13866 13882  // Also known as an 'interprocedural jump'.
13867 13883  // Target of jump will eventually return to caller.
13868 13884  // TailJump below removes the return address.
13869 13885  instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13870 13886    match(TailCall jump_target method_oop );
13871 13887    ins_cost(300);
13872 13888    format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13873 13889    opcode(0xFF, 0x4);  /* Opcode FF /4 */
13874 13890    ins_encode( OpcP, RegOpc(jump_target) );
13875 13891    ins_pipe( pipe_jmp );
13876 13892  %}
13877 13893  
13878 13894  
13879 13895  // Tail Jump; remove the return address; jump to target.
13880 13896  // TailCall above leaves the return address around.
13881 13897  instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13882 13898    match( TailJump jump_target ex_oop );
13883 13899    ins_cost(300);
13884 13900    format %{ "POP    EDX\t# pop return address into dummy\n\t"
13885 13901              "JMP    $jump_target " %}
13886 13902    opcode(0xFF, 0x4);  /* Opcode FF /4 */
13887 13903    ins_encode( enc_pop_rdx,
13888 13904                OpcP, RegOpc(jump_target) );
13889 13905    ins_pipe( pipe_jmp );
13890 13906  %}
13891 13907  
13892 13908  // Create exception oop: created by stack-crawling runtime code.
13893 13909  // Created exception is now available to this handler, and is setup
13894 13910  // just prior to jumping to this handler.  No code emitted.
13895 13911  instruct CreateException( eAXRegP ex_oop )
13896 13912  %{
13897 13913    match(Set ex_oop (CreateEx));
13898 13914  
13899 13915    size(0);
13900 13916    // use the following format syntax
13901 13917    format %{ "# exception oop is in EAX; no code emitted" %}
13902 13918    ins_encode();
13903 13919    ins_pipe( empty );
13904 13920  %}
13905 13921  
13906 13922  
13907 13923  // Rethrow exception:
13908 13924  // The exception oop will come in the first argument position.
13909 13925  // Then JUMP (not call) to the rethrow stub code.
13910 13926  instruct RethrowException()
13911 13927  %{
13912 13928    match(Rethrow);
13913 13929  
13914 13930    // use the following format syntax
13915 13931    format %{ "JMP    rethrow_stub" %}
13916 13932    ins_encode(enc_rethrow);
13917 13933    ins_pipe( pipe_jmp );
13918 13934  %}
13919 13935  
13920 13936  // inlined locking and unlocking
13921 13937  
13922 13938  
13923 13939  instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
13924 13940    match( Set cr (FastLock object box) );
13925 13941    effect( TEMP tmp, TEMP scr );
13926 13942    ins_cost(300);
13927 13943    format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
13928 13944    ins_encode( Fast_Lock(object,box,tmp,scr) );
13929 13945    ins_pipe( pipe_slow );
13930 13946    ins_pc_relative(1);
13931 13947  %}
13932 13948  
13933 13949  instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13934 13950    match( Set cr (FastUnlock object box) );
13935 13951    effect( TEMP tmp );
13936 13952    ins_cost(300);
13937 13953    format %{ "FASTUNLOCK $object, $box, $tmp" %}
13938 13954    ins_encode( Fast_Unlock(object,box,tmp) );
13939 13955    ins_pipe( pipe_slow );
13940 13956    ins_pc_relative(1);
13941 13957  %}
13942 13958  
13943 13959  
13944 13960  
13945 13961  // ============================================================================
13946 13962  // Safepoint Instruction
13947 13963  instruct safePoint_poll(eFlagsReg cr) %{
13948 13964    match(SafePoint);
13949 13965    effect(KILL cr);
13950 13966  
13951 13967    // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13952 13968    // On SPARC that might be acceptable as we can generate the address with
13953 13969    // just a sethi, saving an or.  By polling at offset 0 we can end up
13954 13970    // putting additional pressure on the index-0 in the D$.  Because of
13955 13971    // alignment (just like the situation at hand) the lower indices tend
13956 13972    // to see more traffic.  It'd be better to change the polling address
13957 13973    // to offset 0 of the last $line in the polling page.
13958 13974  
13959 13975    format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13960 13976    ins_cost(125);
13961 13977    size(6) ;
13962 13978    ins_encode( Safepoint_Poll() );
13963 13979    ins_pipe( ialu_reg_mem );
13964 13980  %}
13965 13981  
13966 13982  //----------PEEPHOLE RULES-----------------------------------------------------
13967 13983  // These must follow all instruction definitions as they use the names
13968 13984  // defined in the instructions definitions.
13969 13985  //
13970 13986  // peepmatch ( root_instr_name [preceding_instruction]* );
13971 13987  //
13972 13988  // peepconstraint %{
13973 13989  // (instruction_number.operand_name relational_op instruction_number.operand_name
13974 13990  //  [, ...] );
13975 13991  // // instruction numbers are zero-based using left to right order in peepmatch
13976 13992  //
13977 13993  // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13978 13994  // // provide an instruction_number.operand_name for each operand that appears
13979 13995  // // in the replacement instruction's match rule
13980 13996  //
13981 13997  // ---------VM FLAGS---------------------------------------------------------
13982 13998  //
13983 13999  // All peephole optimizations can be turned off using -XX:-OptoPeephole
13984 14000  //
13985 14001  // Each peephole rule is given an identifying number starting with zero and
13986 14002  // increasing by one in the order seen by the parser.  An individual peephole
13987 14003  // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13988 14004  // on the command-line.
13989 14005  //
13990 14006  // ---------CURRENT LIMITATIONS----------------------------------------------
13991 14007  //
13992 14008  // Only match adjacent instructions in same basic block
13993 14009  // Only equality constraints
13994 14010  // Only constraints between operands, not (0.dest_reg == EAX_enc)
13995 14011  // Only one replacement instruction
13996 14012  //
13997 14013  // ---------EXAMPLE----------------------------------------------------------
13998 14014  //
13999 14015  // // pertinent parts of existing instructions in architecture description
14000 14016  // instruct movI(eRegI dst, eRegI src) %{
14001 14017  //   match(Set dst (CopyI src));
14002 14018  // %}
14003 14019  //
14004 14020  // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
14005 14021  //   match(Set dst (AddI dst src));
14006 14022  //   effect(KILL cr);
14007 14023  // %}
14008 14024  //
14009 14025  // // Change (inc mov) to lea
14010 14026  // peephole %{
14011 14027  //   // increment preceeded by register-register move
14012 14028  //   peepmatch ( incI_eReg movI );
14013 14029  //   // require that the destination register of the increment
14014 14030  //   // match the destination register of the move
14015 14031  //   peepconstraint ( 0.dst == 1.dst );
14016 14032  //   // construct a replacement instruction that sets
14017 14033  //   // the destination to ( move's source register + one )
14018 14034  //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14019 14035  // %}
14020 14036  //
14021 14037  // Implementation no longer uses movX instructions since
14022 14038  // machine-independent system no longer uses CopyX nodes.
14023 14039  //
14024 14040  // peephole %{
14025 14041  //   peepmatch ( incI_eReg movI );
14026 14042  //   peepconstraint ( 0.dst == 1.dst );
14027 14043  //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14028 14044  // %}
14029 14045  //
14030 14046  // peephole %{
14031 14047  //   peepmatch ( decI_eReg movI );
14032 14048  //   peepconstraint ( 0.dst == 1.dst );
14033 14049  //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14034 14050  // %}
14035 14051  //
14036 14052  // peephole %{
14037 14053  //   peepmatch ( addI_eReg_imm movI );
14038 14054  //   peepconstraint ( 0.dst == 1.dst );
14039 14055  //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14040 14056  // %}
14041 14057  //
14042 14058  // peephole %{
14043 14059  //   peepmatch ( addP_eReg_imm movP );
14044 14060  //   peepconstraint ( 0.dst == 1.dst );
14045 14061  //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14046 14062  // %}
14047 14063  
14048 14064  // // Change load of spilled value to only a spill
14049 14065  // instruct storeI(memory mem, eRegI src) %{
14050 14066  //   match(Set mem (StoreI mem src));
14051 14067  // %}
14052 14068  //
14053 14069  // instruct loadI(eRegI dst, memory mem) %{
14054 14070  //   match(Set dst (LoadI mem));
14055 14071  // %}
14056 14072  //
14057 14073  peephole %{
14058 14074    peepmatch ( loadI storeI );
14059 14075    peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14060 14076    peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14061 14077  %}
14062 14078  
14063 14079  //----------SMARTSPILL RULES---------------------------------------------------
14064 14080  // These must follow all instruction definitions as they use the names
14065 14081  // defined in the instructions definitions.

↓ open down ↓

1104 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX