6961690 Wdiff src/cpu/x86/vm/assembler_x86.hpp

Print this page

rev 1838 : 6961690: load oops from constant table on SPARC
Summary: oops should be loaded from the constant table of an nmethod instead of materializing them with a long code sequence.
Reviewed-by:

Split	Close
Expand all
Collapse all

          --- old/src/cpu/x86/vm/assembler_x86.hpp
          +++ new/src/cpu/x86/vm/assembler_x86.hpp

   1    1  /*
   2    2   * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that
  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version
  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  class BiasedLockingCounters;
  26   26  
  27   27  // Contains all the definitions needed for x86 assembly code generation.
  28   28  
  29   29  // Calling convention
  30   30  class Argument VALUE_OBJ_CLASS_SPEC {
  31   31   public:
  32   32    enum {
  33   33  #ifdef _LP64
  34   34  #ifdef _WIN64
  35   35      n_int_register_parameters_c   = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
  36   36      n_float_register_parameters_c = 4,  // xmm0 - xmm3 (c_farg0, c_farg1, ... )
  37   37  #else
  38   38      n_int_register_parameters_c   = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
  39   39      n_float_register_parameters_c = 8,  // xmm0 - xmm7 (c_farg0, c_farg1, ... )
  40   40  #endif // _WIN64
  41   41      n_int_register_parameters_j   = 6, // j_rarg0, j_rarg1, ...
  42   42      n_float_register_parameters_j = 8  // j_farg0, j_farg1, ...
  43   43  #else
  44   44      n_register_parameters = 0   // 0 registers used to pass arguments
  45   45  #endif // _LP64
  46   46    };
  47   47  };
  48   48  
  49   49  
  50   50  #ifdef _LP64
  51   51  // Symbolically name the register arguments used by the c calling convention.
  52   52  // Windows is different from linux/solaris. So much for standards...
  53   53  
  54   54  #ifdef _WIN64
  55   55  
  56   56  REGISTER_DECLARATION(Register, c_rarg0, rcx);
  57   57  REGISTER_DECLARATION(Register, c_rarg1, rdx);
  58   58  REGISTER_DECLARATION(Register, c_rarg2, r8);
  59   59  REGISTER_DECLARATION(Register, c_rarg3, r9);
  60   60  
  61   61  REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
  62   62  REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
  63   63  REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
  64   64  REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
  65   65  
  66   66  #else
  67   67  
  68   68  REGISTER_DECLARATION(Register, c_rarg0, rdi);
  69   69  REGISTER_DECLARATION(Register, c_rarg1, rsi);
  70   70  REGISTER_DECLARATION(Register, c_rarg2, rdx);
  71   71  REGISTER_DECLARATION(Register, c_rarg3, rcx);
  72   72  REGISTER_DECLARATION(Register, c_rarg4, r8);
  73   73  REGISTER_DECLARATION(Register, c_rarg5, r9);
  74   74  
  75   75  REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
  76   76  REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
  77   77  REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
  78   78  REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
  79   79  REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4);
  80   80  REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5);
  81   81  REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6);
  82   82  REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7);
  83   83  
  84   84  #endif // _WIN64
  85   85  
  86   86  // Symbolically name the register arguments used by the Java calling convention.
  87   87  // We have control over the convention for java so we can do what we please.
  88   88  // What pleases us is to offset the java calling convention so that when
  89   89  // we call a suitable jni method the arguments are lined up and we don't
  90   90  // have to do little shuffling. A suitable jni method is non-static and a
  91   91  // small number of arguments (two fewer args on windows)
  92   92  //
  93   93  //        |-------------------------------------------------------|
  94   94  //        | c_rarg0   c_rarg1  c_rarg2 c_rarg3 c_rarg4 c_rarg5    |
  95   95  //        |-------------------------------------------------------|
  96   96  //        | rcx       rdx      r8      r9      rdi*    rsi*       | windows (* not a c_rarg)
  97   97  //        | rdi       rsi      rdx     rcx     r8      r9         | solaris/linux
  98   98  //        |-------------------------------------------------------|
  99   99  //        | j_rarg5   j_rarg0  j_rarg1 j_rarg2 j_rarg3 j_rarg4    |
 100  100  //        |-------------------------------------------------------|
 101  101  
 102  102  REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
 103  103  REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
 104  104  REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
 105  105  // Windows runs out of register args here
 106  106  #ifdef _WIN64
 107  107  REGISTER_DECLARATION(Register, j_rarg3, rdi);
 108  108  REGISTER_DECLARATION(Register, j_rarg4, rsi);
 109  109  #else
 110  110  REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
 111  111  REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
 112  112  #endif /* _WIN64 */
 113  113  REGISTER_DECLARATION(Register, j_rarg5, c_rarg0);
 114  114  
 115  115  REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0);
 116  116  REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1);
 117  117  REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2);
 118  118  REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3);
 119  119  REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4);
 120  120  REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5);
 121  121  REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6);
 122  122  REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7);
 123  123  
 124  124  REGISTER_DECLARATION(Register, rscratch1, r10);  // volatile
 125  125  REGISTER_DECLARATION(Register, rscratch2, r11);  // volatile
 126  126  
 127  127  REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved
 128  128  REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved
 129  129  
 130  130  #else
 131  131  // rscratch1 will apear in 32bit code that is dead but of course must compile
 132  132  // Using noreg ensures if the dead code is incorrectly live and executed it
 133  133  // will cause an assertion failure
 134  134  #define rscratch1 noreg
 135  135  
 136  136  #endif // _LP64
 137  137  
 138  138  // JSR 292 fixed register usages:
 139  139  REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp);
 140  140  
 141  141  // Address is an abstraction used to represent a memory location
 142  142  // using any of the amd64 addressing modes with one object.
 143  143  //
 144  144  // Note: A register location is represented via a Register, not
 145  145  //       via an address for efficiency & simplicity reasons.
 146  146  
 147  147  class ArrayAddress;
 148  148  
 149  149  class Address VALUE_OBJ_CLASS_SPEC {
 150  150   public:
 151  151    enum ScaleFactor {
 152  152      no_scale = -1,
 153  153      times_1  =  0,
 154  154      times_2  =  1,
 155  155      times_4  =  2,
 156  156      times_8  =  3,
 157  157      times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)
 158  158    };
 159  159    static ScaleFactor times(int size) {
 160  160      assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
 161  161      if (size == 8)  return times_8;
 162  162      if (size == 4)  return times_4;
 163  163      if (size == 2)  return times_2;
 164  164      return times_1;
 165  165    }
 166  166    static int scale_size(ScaleFactor scale) {
 167  167      assert(scale != no_scale, "");
 168  168      assert(((1 << (int)times_1) == 1 &&
 169  169              (1 << (int)times_2) == 2 &&
 170  170              (1 << (int)times_4) == 4 &&
 171  171              (1 << (int)times_8) == 8), "");
 172  172      return (1 << (int)scale);
 173  173    }
 174  174  
 175  175   private:
 176  176    Register         _base;
 177  177    Register         _index;
 178  178    ScaleFactor      _scale;
 179  179    int              _disp;
 180  180    RelocationHolder _rspec;
 181  181  
 182  182    // Easily misused constructors make them private
 183  183    // %%% can we make these go away?
 184  184    NOT_LP64(Address(address loc, RelocationHolder spec);)
 185  185    Address(int disp, address loc, relocInfo::relocType rtype);
 186  186    Address(int disp, address loc, RelocationHolder spec);
 187  187  
 188  188   public:
 189  189  
 190  190   int disp() { return _disp; }
 191  191    // creation
 192  192    Address()
 193  193      : _base(noreg),
 194  194        _index(noreg),
 195  195        _scale(no_scale),
 196  196        _disp(0) {
 197  197    }
 198  198  
 199  199    // No default displacement otherwise Register can be implicitly
 200  200    // converted to 0(Register) which is quite a different animal.
 201  201  
 202  202    Address(Register base, int disp)
 203  203      : _base(base),
 204  204        _index(noreg),
 205  205        _scale(no_scale),
 206  206        _disp(disp) {
 207  207    }
 208  208  
 209  209    Address(Register base, Register index, ScaleFactor scale, int disp = 0)
 210  210      : _base (base),
 211  211        _index(index),
 212  212        _scale(scale),
 213  213        _disp (disp) {
 214  214      assert(!index->is_valid() == (scale == Address::no_scale),
 215  215             "inconsistent address");
 216  216    }
 217  217  
 218  218    Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
 219  219      : _base (base),
 220  220        _index(index.register_or_noreg()),
 221  221        _scale(scale),
 222  222        _disp (disp + (index.constant_or_zero() * scale_size(scale))) {
 223  223      if (!index.is_register())  scale = Address::no_scale;
 224  224      assert(!_index->is_valid() == (scale == Address::no_scale),
 225  225             "inconsistent address");
 226  226    }
 227  227  
 228  228    Address plus_disp(int disp) const {
 229  229      Address a = (*this);
 230  230      a._disp += disp;
 231  231      return a;
 232  232    }
 233  233  
 234  234    // The following two overloads are used in connection with the
 235  235    // ByteSize type (see sizes.hpp).  They simplify the use of
 236  236    // ByteSize'd arguments in assembly code. Note that their equivalent
 237  237    // for the optimized build are the member functions with int disp
 238  238    // argument since ByteSize is mapped to an int type in that case.
 239  239    //
 240  240    // Note: DO NOT introduce similar overloaded functions for WordSize
 241  241    // arguments as in the optimized mode, both ByteSize and WordSize
 242  242    // are mapped to the same type and thus the compiler cannot make a
 243  243    // distinction anymore (=> compiler errors).
 244  244  
 245  245  #ifdef ASSERT
 246  246    Address(Register base, ByteSize disp)
 247  247      : _base(base),
 248  248        _index(noreg),
 249  249        _scale(no_scale),
 250  250        _disp(in_bytes(disp)) {
 251  251    }
 252  252  
 253  253    Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
 254  254      : _base(base),
 255  255        _index(index),
 256  256        _scale(scale),
 257  257        _disp(in_bytes(disp)) {
 258  258      assert(!index->is_valid() == (scale == Address::no_scale),
 259  259             "inconsistent address");
 260  260    }
 261  261  
 262  262    Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
 263  263      : _base (base),
 264  264        _index(index.register_or_noreg()),
 265  265        _scale(scale),
 266  266        _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) {
 267  267      if (!index.is_register())  scale = Address::no_scale;
 268  268      assert(!_index->is_valid() == (scale == Address::no_scale),
 269  269             "inconsistent address");
 270  270    }
 271  271  
 272  272  #endif // ASSERT
 273  273  
 274  274    // accessors
 275  275    bool        uses(Register reg) const { return _base == reg || _index == reg; }
 276  276    Register    base()             const { return _base;  }
 277  277    Register    index()            const { return _index; }
 278  278    ScaleFactor scale()            const { return _scale; }
 279  279    int         disp()             const { return _disp;  }
 280  280  
 281  281    // Convert the raw encoding form into the form expected by the constructor for
 282  282    // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 283  283    // that to noreg for the Address constructor.
 284  284    static Address make_raw(int base, int index, int scale, int disp, bool disp_is_oop);
 285  285  
 286  286    static Address make_array(ArrayAddress);
 287  287  
 288  288   private:
 289  289    bool base_needs_rex() const {
 290  290      return _base != noreg && _base->encoding() >= 8;
 291  291    }
 292  292  
 293  293    bool index_needs_rex() const {
 294  294      return _index != noreg &&_index->encoding() >= 8;
 295  295    }
 296  296  
 297  297    relocInfo::relocType reloc() const { return _rspec.type(); }
 298  298  
 299  299    friend class Assembler;
 300  300    friend class MacroAssembler;
 301  301    friend class LIR_Assembler; // base/index/scale/disp
 302  302  };
 303  303  
 304  304  //
 305  305  // AddressLiteral has been split out from Address because operands of this type
 306  306  // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
 307  307  // the few instructions that need to deal with address literals are unique and the
 308  308  // MacroAssembler does not have to implement every instruction in the Assembler
 309  309  // in order to search for address literals that may need special handling depending
 310  310  // on the instruction and the platform. As small step on the way to merging i486/amd64
 311  311  // directories.
 312  312  //
 313  313  class AddressLiteral VALUE_OBJ_CLASS_SPEC {
 314  314    friend class ArrayAddress;
 315  315    RelocationHolder _rspec;
 316  316    // Typically we use AddressLiterals we want to use their rval
 317  317    // However in some situations we want the lval (effect address) of the item.
 318  318    // We provide a special factory for making those lvals.
 319  319    bool _is_lval;
 320  320  
 321  321    // If the target is far we'll need to load the ea of this to
 322  322    // a register to reach it. Otherwise if near we can do rip
 323  323    // relative addressing.
 324  324  
 325  325    address          _target;
 326  326  
 327  327   protected:
 328  328    // creation
 329  329    AddressLiteral()
 330  330      : _is_lval(false),
 331  331        _target(NULL)
 332  332    {}
 333  333  
 334  334    public:
 335  335  
 336  336  
 337  337    AddressLiteral(address target, relocInfo::relocType rtype);
 338  338  
 339  339    AddressLiteral(address target, RelocationHolder const& rspec)
 340  340      : _rspec(rspec),
 341  341        _is_lval(false),
 342  342        _target(target)
 343  343    {}
 344  344  
 345  345    AddressLiteral addr() {
 346  346      AddressLiteral ret = *this;
 347  347      ret._is_lval = true;
 348  348      return ret;
 349  349    }
 350  350  
 351  351  
 352  352   private:
 353  353  
 354  354    address target() { return _target; }
 355  355    bool is_lval() { return _is_lval; }
 356  356  
 357  357    relocInfo::relocType reloc() const { return _rspec.type(); }
 358  358    const RelocationHolder& rspec() const { return _rspec; }
 359  359  
 360  360    friend class Assembler;
 361  361    friend class MacroAssembler;
 362  362    friend class Address;
 363  363    friend class LIR_Assembler;
 364  364  };
 365  365  
 366  366  // Convience classes
 367  367  class RuntimeAddress: public AddressLiteral {
 368  368  
 369  369    public:
 370  370  
 371  371    RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
 372  372  
 373  373  };
 374  374  
 375  375  class OopAddress: public AddressLiteral {
 376  376  
 377  377    public:
 378  378  
 379  379    OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
 380  380  
 381  381  };
 382  382  
 383  383  class ExternalAddress: public AddressLiteral {
 384  384  
 385  385    public:
 386  386  
 387  387    ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
 388  388  
 389  389  };
 390  390  
 391  391  class InternalAddress: public AddressLiteral {
 392  392  
 393  393    public:
 394  394  
 395  395    InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
 396  396  
 397  397  };
 398  398  
 399  399  // x86 can do array addressing as a single operation since disp can be an absolute
 400  400  // address amd64 can't. We create a class that expresses the concept but does extra
 401  401  // magic on amd64 to get the final result
 402  402  
 403  403  class ArrayAddress VALUE_OBJ_CLASS_SPEC {
 404  404    private:
 405  405  
 406  406    AddressLiteral _base;
 407  407    Address        _index;
 408  408  
 409  409    public:
 410  410  
 411  411    ArrayAddress() {};
 412  412    ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
 413  413    AddressLiteral base() { return _base; }
 414  414    Address index() { return _index; }
 415  415  
 416  416  };
 417  417  
 418  418  const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize);
 419  419  
 420  420  // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
 421  421  // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
 422  422  // is what you get. The Assembler is generating code into a CodeBuffer.
 423  423  
 424  424  class Assembler : public AbstractAssembler  {
 425  425    friend class AbstractAssembler; // for the non-virtual hack
 426  426    friend class LIR_Assembler; // as_Address()
 427  427    friend class StubGenerator;
 428  428  
 429  429   public:
 430  430    enum Condition {                     // The x86 condition codes used for conditional jumps/moves.
 431  431      zero          = 0x4,
 432  432      notZero       = 0x5,
 433  433      equal         = 0x4,
 434  434      notEqual      = 0x5,
 435  435      less          = 0xc,
 436  436      lessEqual     = 0xe,
 437  437      greater       = 0xf,
 438  438      greaterEqual  = 0xd,
 439  439      below         = 0x2,
 440  440      belowEqual    = 0x6,
 441  441      above         = 0x7,
 442  442      aboveEqual    = 0x3,
 443  443      overflow      = 0x0,
 444  444      noOverflow    = 0x1,
 445  445      carrySet      = 0x2,
 446  446      carryClear    = 0x3,
 447  447      negative      = 0x8,
 448  448      positive      = 0x9,
 449  449      parity        = 0xa,
 450  450      noParity      = 0xb
 451  451    };
 452  452  
 453  453    enum Prefix {
 454  454      // segment overrides
 455  455      CS_segment = 0x2e,
 456  456      SS_segment = 0x36,
 457  457      DS_segment = 0x3e,
 458  458      ES_segment = 0x26,
 459  459      FS_segment = 0x64,
 460  460      GS_segment = 0x65,
 461  461  
 462  462      REX        = 0x40,
 463  463  
 464  464      REX_B      = 0x41,
 465  465      REX_X      = 0x42,
 466  466      REX_XB     = 0x43,
 467  467      REX_R      = 0x44,
 468  468      REX_RB     = 0x45,
 469  469      REX_RX     = 0x46,
 470  470      REX_RXB    = 0x47,
 471  471  
 472  472      REX_W      = 0x48,
 473  473  
 474  474      REX_WB     = 0x49,
 475  475      REX_WX     = 0x4A,
 476  476      REX_WXB    = 0x4B,
 477  477      REX_WR     = 0x4C,
 478  478      REX_WRB    = 0x4D,
 479  479      REX_WRX    = 0x4E,
 480  480      REX_WRXB   = 0x4F
 481  481    };
 482  482  
 483  483    enum WhichOperand {
 484  484      // input to locate_operand, and format code for relocations
 485  485      imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
 486  486      disp32_operand = 1,          // embedded 32-bit displacement or address
 487  487      call32_operand = 2,          // embedded 32-bit self-relative displacement
 488  488  #ifndef _LP64
 489  489      _WhichOperand_limit = 3
 490  490  #else
 491  491       narrow_oop_operand = 3,     // embedded 32-bit immediate narrow oop
 492  492      _WhichOperand_limit = 4
 493  493  #endif
 494  494    };
 495  495  
 496  496  
 497  497  
 498  498    // NOTE: The general philopsophy of the declarations here is that 64bit versions
 499  499    // of instructions are freely declared without the need for wrapping them an ifdef.
 500  500    // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
 501  501    // In the .cpp file the implementations are wrapped so that they are dropped out
 502  502    // of the resulting jvm. This is done mostly to keep the footprint of KERNEL
 503  503    // to the size it was prior to merging up the 32bit and 64bit assemblers.
 504  504    //
 505  505    // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
 506  506    // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
 507  507  
 508  508  private:
 509  509  
 510  510  
 511  511    // 64bit prefixes
 512  512    int prefix_and_encode(int reg_enc, bool byteinst = false);
 513  513    int prefixq_and_encode(int reg_enc);
 514  514  
 515  515    int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);
 516  516    int prefixq_and_encode(int dst_enc, int src_enc);
 517  517  
 518  518    void prefix(Register reg);
 519  519    void prefix(Address adr);
 520  520    void prefixq(Address adr);
 521  521  
 522  522    void prefix(Address adr, Register reg,  bool byteinst = false);
 523  523    void prefixq(Address adr, Register reg);
 524  524  
 525  525    void prefix(Address adr, XMMRegister reg);
 526  526  
 527  527    void prefetch_prefix(Address src);
 528  528  
 529  529    // Helper functions for groups of instructions
 530  530    void emit_arith_b(int op1, int op2, Register dst, int imm8);
 531  531  
 532  532    void emit_arith(int op1, int op2, Register dst, int32_t imm32);
 533  533    // only 32bit??
 534  534    void emit_arith(int op1, int op2, Register dst, jobject obj);
 535  535    void emit_arith(int op1, int op2, Register dst, Register src);
 536  536  
 537  537    void emit_operand(Register reg,
 538  538                      Register base, Register index, Address::ScaleFactor scale,
 539  539                      int disp,
 540  540                      RelocationHolder const& rspec,
 541  541                      int rip_relative_correction = 0);
 542  542  
 543  543    void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
 544  544  
 545  545    // operands that only take the original 32bit registers
 546  546    void emit_operand32(Register reg, Address adr);
 547  547  
 548  548    void emit_operand(XMMRegister reg,
 549  549                      Register base, Register index, Address::ScaleFactor scale,
 550  550                      int disp,
 551  551                      RelocationHolder const& rspec);
 552  552  
 553  553    void emit_operand(XMMRegister reg, Address adr);
 554  554  
 555  555    void emit_operand(MMXRegister reg, Address adr);
 556  556  
 557  557    // workaround gcc (3.2.1-7) bug
 558  558    void emit_operand(Address adr, MMXRegister reg);
 559  559  
 560  560  
 561  561    // Immediate-to-memory forms
 562  562    void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
 563  563  
 564  564    void emit_farith(int b1, int b2, int i);
 565  565  
 566  566  
 567  567   protected:
 568  568    #ifdef ASSERT
 569  569    void check_relocation(RelocationHolder const& rspec, int format);
 570  570    #endif
 571  571  
 572  572    inline void emit_long64(jlong x);
 573  573  
 574  574    void emit_data(jint data, relocInfo::relocType    rtype, int format);
 575  575    void emit_data(jint data, RelocationHolder const& rspec, int format);
 576  576    void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
 577  577    void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
 578  578  
 579  579  
 580  580    bool reachable(AddressLiteral adr) NOT_LP64({ return true;});
 581  581  
 582  582    // These are all easily abused and hence protected
 583  583  
 584  584    // 32BIT ONLY SECTION
 585  585  #ifndef _LP64
 586  586    // Make these disappear in 64bit mode since they would never be correct
 587  587    void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec);   // 32BIT ONLY
 588  588    void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
 589  589  
 590  590    void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
 591  591    void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec);     // 32BIT ONLY
 592  592  
 593  593    void push_literal32(int32_t imm32, RelocationHolder const& rspec);                 // 32BIT ONLY
 594  594  #else
 595  595    // 64BIT ONLY SECTION
 596  596    void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec);   // 64BIT ONLY
 597  597  
 598  598    void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
 599  599    void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
 600  600  
 601  601    void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
 602  602    void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
 603  603  #endif // _LP64
 604  604  
 605  605    // These are unique in that we are ensured by the caller that the 32bit
 606  606    // relative in these instructions will always be able to reach the potentially
 607  607    // 64bit address described by entry. Since they can take a 64bit address they
 608  608    // don't have the 32 suffix like the other instructions in this class.
 609  609  
 610  610    void call_literal(address entry, RelocationHolder const& rspec);
 611  611    void jmp_literal(address entry, RelocationHolder const& rspec);
 612  612  
 613  613    // Avoid using directly section
 614  614    // Instructions in this section are actually usable by anyone without danger
 615  615    // of failure but have performance issues that are addressed my enhanced
 616  616    // instructions which will do the proper thing base on the particular cpu.
 617  617    // We protect them because we don't trust you...
 618  618  
 619  619    // Don't use next inc() and dec() methods directly. INC & DEC instructions
 620  620    // could cause a partial flag stall since they don't set CF flag.
 621  621    // Use MacroAssembler::decrement() & MacroAssembler::increment() methods
 622  622    // which call inc() & dec() or add() & sub() in accordance with
 623  623    // the product flag UseIncDec value.
 624  624  
 625  625    void decl(Register dst);
 626  626    void decl(Address dst);
 627  627    void decq(Register dst);
 628  628    void decq(Address dst);
 629  629  
 630  630    void incl(Register dst);
 631  631    void incl(Address dst);
 632  632    void incq(Register dst);
 633  633    void incq(Address dst);
 634  634  
 635  635    // New cpus require use of movsd and movss to avoid partial register stall
 636  636    // when loading from memory. But for old Opteron use movlpd instead of movsd.
 637  637    // The selection is done in MacroAssembler::movdbl() and movflt().
 638  638  
 639  639    // Move Scalar Single-Precision Floating-Point Values
 640  640    void movss(XMMRegister dst, Address src);
 641  641    void movss(XMMRegister dst, XMMRegister src);
 642  642    void movss(Address dst, XMMRegister src);
 643  643  
 644  644    // Move Scalar Double-Precision Floating-Point Values
 645  645    void movsd(XMMRegister dst, Address src);
 646  646    void movsd(XMMRegister dst, XMMRegister src);
 647  647    void movsd(Address dst, XMMRegister src);
 648  648    void movlpd(XMMRegister dst, Address src);
 649  649  
 650  650    // New cpus require use of movaps and movapd to avoid partial register stall
 651  651    // when moving between registers.
 652  652    void movaps(XMMRegister dst, XMMRegister src);
 653  653    void movapd(XMMRegister dst, XMMRegister src);
 654  654  
 655  655    // End avoid using directly
 656  656  
 657  657  
 658  658    // Instruction prefixes
 659  659    void prefix(Prefix p);
 660  660  
 661  661    public:
 662  662  
 663  663    // Creation
 664  664    Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
 665  665  
 666  666    // Decoding
 667  667    static address locate_operand(address inst, WhichOperand which);
 668  668    static address locate_next_instruction(address inst);
 669  669  
 670  670    // Utilities
 671  671  
 672  672  #ifdef _LP64
 673  673   static bool is_simm(int64_t x, int nbits) { return -( CONST64(1) << (nbits-1) )  <= x   &&   x  <  ( CONST64(1) << (nbits-1) ); }
 674  674   static bool is_simm32(int64_t x) { return x == (int64_t)(int32_t)x; }
 675  675  #else
 676  676   static bool is_simm(int32_t x, int nbits) { return -( 1 << (nbits-1) )  <= x   &&   x  <  ( 1 << (nbits-1) ); }
 677  677   static bool is_simm32(int32_t x) { return true; }
 678  678  #endif // LP64
 679  679  
 680  680    // Generic instructions
 681  681    // Does 32bit or 64bit as needed for the platform. In some sense these
 682  682    // belong in macro assembler but there is no need for both varieties to exist
 683  683  
 684  684    void lea(Register dst, Address src);
 685  685  
 686  686    void mov(Register dst, Register src);
 687  687  
 688  688    void pusha();
 689  689    void popa();
 690  690  
 691  691    void pushf();
 692  692    void popf();
 693  693  
 694  694    void push(int32_t imm32);
 695  695  
 696  696    void push(Register src);
 697  697  
 698  698    void pop(Register dst);
 699  699  
 700  700    // These are dummies to prevent surprise implicit conversions to Register
 701  701    void push(void* v);
 702  702    void pop(void* v);
 703  703  
 704  704  
 705  705    // These do register sized moves/scans
 706  706    void rep_mov();
 707  707    void rep_set();
 708  708    void repne_scan();
 709  709  #ifdef _LP64
 710  710    void repne_scanl();
 711  711  #endif
 712  712  
 713  713    // Vanilla instructions in lexical order
 714  714  
 715  715    void adcl(Register dst, int32_t imm32);
 716  716    void adcl(Register dst, Address src);
 717  717    void adcl(Register dst, Register src);
 718  718  
 719  719    void adcq(Register dst, int32_t imm32);
 720  720    void adcq(Register dst, Address src);
 721  721    void adcq(Register dst, Register src);
 722  722  
 723  723  
 724  724    void addl(Address dst, int32_t imm32);
 725  725    void addl(Address dst, Register src);
 726  726    void addl(Register dst, int32_t imm32);
 727  727    void addl(Register dst, Address src);
 728  728    void addl(Register dst, Register src);
 729  729  
 730  730    void addq(Address dst, int32_t imm32);
 731  731    void addq(Address dst, Register src);
 732  732    void addq(Register dst, int32_t imm32);
 733  733    void addq(Register dst, Address src);
 734  734    void addq(Register dst, Register src);
 735  735  
 736  736  
 737  737    void addr_nop_4();
 738  738    void addr_nop_5();
 739  739    void addr_nop_7();
 740  740    void addr_nop_8();
 741  741  
 742  742    // Add Scalar Double-Precision Floating-Point Values
 743  743    void addsd(XMMRegister dst, Address src);
 744  744    void addsd(XMMRegister dst, XMMRegister src);
 745  745  
 746  746    // Add Scalar Single-Precision Floating-Point Values
 747  747    void addss(XMMRegister dst, Address src);
 748  748    void addss(XMMRegister dst, XMMRegister src);
 749  749  
 750  750    void andl(Register dst, int32_t imm32);
 751  751    void andl(Register dst, Address src);
 752  752    void andl(Register dst, Register src);
 753  753  
 754  754    void andq(Register dst, int32_t imm32);
 755  755    void andq(Register dst, Address src);
 756  756    void andq(Register dst, Register src);
 757  757  
 758  758  
 759  759    // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
 760  760    void andpd(XMMRegister dst, Address src);
 761  761    void andpd(XMMRegister dst, XMMRegister src);
 762  762  
 763  763    void bsfl(Register dst, Register src);
 764  764    void bsrl(Register dst, Register src);
 765  765  
 766  766  #ifdef _LP64
 767  767    void bsfq(Register dst, Register src);
 768  768    void bsrq(Register dst, Register src);
 769  769  #endif
 770  770  
 771  771    void bswapl(Register reg);
 772  772  
 773  773    void bswapq(Register reg);
 774  774  
 775  775    void call(Label& L, relocInfo::relocType rtype);
 776  776    void call(Register reg);  // push pc; pc <- reg
 777  777    void call(Address adr);   // push pc; pc <- adr
 778  778  
 779  779    void cdql();
 780  780  
 781  781    void cdqq();
 782  782  
 783  783    void cld() { emit_byte(0xfc); }
 784  784  
 785  785    void clflush(Address adr);
 786  786  
 787  787    void cmovl(Condition cc, Register dst, Register src);
 788  788    void cmovl(Condition cc, Register dst, Address src);
 789  789  
 790  790    void cmovq(Condition cc, Register dst, Register src);
 791  791    void cmovq(Condition cc, Register dst, Address src);
 792  792  
 793  793  
 794  794    void cmpb(Address dst, int imm8);
 795  795  
 796  796    void cmpl(Address dst, int32_t imm32);
 797  797  
 798  798    void cmpl(Register dst, int32_t imm32);
 799  799    void cmpl(Register dst, Register src);
 800  800    void cmpl(Register dst, Address src);
 801  801  
 802  802    void cmpq(Address dst, int32_t imm32);
 803  803    void cmpq(Address dst, Register src);
 804  804  
 805  805    void cmpq(Register dst, int32_t imm32);
 806  806    void cmpq(Register dst, Register src);
 807  807    void cmpq(Register dst, Address src);
 808  808  
 809  809    // these are dummies used to catch attempting to convert NULL to Register
 810  810    void cmpl(Register dst, void* junk); // dummy
 811  811    void cmpq(Register dst, void* junk); // dummy
 812  812  
 813  813    void cmpw(Address dst, int imm16);
 814  814  
 815  815    void cmpxchg8 (Address adr);
 816  816  
 817  817    void cmpxchgl(Register reg, Address adr);
 818  818  
 819  819    void cmpxchgq(Register reg, Address adr);
 820  820  
 821  821    // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
 822  822    void comisd(XMMRegister dst, Address src);
 823  823  
 824  824    // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
 825  825    void comiss(XMMRegister dst, Address src);
 826  826  
 827  827    // Identify processor type and features
 828  828    void cpuid() {
 829  829      emit_byte(0x0F);
 830  830      emit_byte(0xA2);
 831  831    }
 832  832  
 833  833    // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
 834  834    void cvtsd2ss(XMMRegister dst, XMMRegister src);
 835  835  
 836  836    // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
 837  837    void cvtsi2sdl(XMMRegister dst, Register src);
 838  838    void cvtsi2sdq(XMMRegister dst, Register src);
 839  839  
 840  840    // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
 841  841    void cvtsi2ssl(XMMRegister dst, Register src);
 842  842    void cvtsi2ssq(XMMRegister dst, Register src);
 843  843  
 844  844    // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
 845  845    void cvtdq2pd(XMMRegister dst, XMMRegister src);
 846  846  
 847  847    // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
 848  848    void cvtdq2ps(XMMRegister dst, XMMRegister src);
 849  849  
 850  850    // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
 851  851    void cvtss2sd(XMMRegister dst, XMMRegister src);
 852  852  
 853  853    // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
 854  854    void cvttsd2sil(Register dst, Address src);
 855  855    void cvttsd2sil(Register dst, XMMRegister src);
 856  856    void cvttsd2siq(Register dst, XMMRegister src);
 857  857  
 858  858    // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
 859  859    void cvttss2sil(Register dst, XMMRegister src);
 860  860    void cvttss2siq(Register dst, XMMRegister src);
 861  861  
 862  862    // Divide Scalar Double-Precision Floating-Point Values
 863  863    void divsd(XMMRegister dst, Address src);
 864  864    void divsd(XMMRegister dst, XMMRegister src);
 865  865  
 866  866    // Divide Scalar Single-Precision Floating-Point Values
 867  867    void divss(XMMRegister dst, Address src);
 868  868    void divss(XMMRegister dst, XMMRegister src);
 869  869  
 870  870    void emms();
 871  871  
 872  872    void fabs();
 873  873  
 874  874    void fadd(int i);
 875  875  
 876  876    void fadd_d(Address src);
 877  877    void fadd_s(Address src);
 878  878  
 879  879    // "Alternate" versions of x87 instructions place result down in FPU
 880  880    // stack instead of on TOS
 881  881  
 882  882    void fadda(int i); // "alternate" fadd
 883  883    void faddp(int i = 1);
 884  884  
 885  885    void fchs();
 886  886  
 887  887    void fcom(int i);
 888  888  
 889  889    void fcomp(int i = 1);
 890  890    void fcomp_d(Address src);
 891  891    void fcomp_s(Address src);
 892  892  
 893  893    void fcompp();
 894  894  
 895  895    void fcos();
 896  896  
 897  897    void fdecstp();
 898  898  
 899  899    void fdiv(int i);
 900  900    void fdiv_d(Address src);
 901  901    void fdivr_s(Address src);
 902  902    void fdiva(int i);  // "alternate" fdiv
 903  903    void fdivp(int i = 1);
 904  904  
 905  905    void fdivr(int i);
 906  906    void fdivr_d(Address src);
 907  907    void fdiv_s(Address src);
 908  908  
 909  909    void fdivra(int i); // "alternate" reversed fdiv
 910  910  
 911  911    void fdivrp(int i = 1);
 912  912  
 913  913    void ffree(int i = 0);
 914  914  
 915  915    void fild_d(Address adr);
 916  916    void fild_s(Address adr);
 917  917  
 918  918    void fincstp();
 919  919  
 920  920    void finit();
 921  921  
 922  922    void fist_s (Address adr);
 923  923    void fistp_d(Address adr);
 924  924    void fistp_s(Address adr);
 925  925  
 926  926    void fld1();
 927  927  
 928  928    void fld_d(Address adr);
 929  929    void fld_s(Address adr);
 930  930    void fld_s(int index);
 931  931    void fld_x(Address adr);  // extended-precision (80-bit) format
 932  932  
 933  933    void fldcw(Address src);
 934  934  
 935  935    void fldenv(Address src);
 936  936  
 937  937    void fldlg2();
 938  938  
 939  939    void fldln2();
 940  940  
 941  941    void fldz();
 942  942  
 943  943    void flog();
 944  944    void flog10();
 945  945  
 946  946    void fmul(int i);
 947  947  
 948  948    void fmul_d(Address src);
 949  949    void fmul_s(Address src);
 950  950  
 951  951    void fmula(int i);  // "alternate" fmul
 952  952  
 953  953    void fmulp(int i = 1);
 954  954  
 955  955    void fnsave(Address dst);
 956  956  
 957  957    void fnstcw(Address src);
 958  958  
 959  959    void fnstsw_ax();
 960  960  
 961  961    void fprem();
 962  962    void fprem1();
 963  963  
 964  964    void frstor(Address src);
 965  965  
 966  966    void fsin();
 967  967  
 968  968    void fsqrt();
 969  969  
 970  970    void fst_d(Address adr);
 971  971    void fst_s(Address adr);
 972  972  
 973  973    void fstp_d(Address adr);
 974  974    void fstp_d(int index);
 975  975    void fstp_s(Address adr);
 976  976    void fstp_x(Address adr); // extended-precision (80-bit) format
 977  977  
 978  978    void fsub(int i);
 979  979    void fsub_d(Address src);
 980  980    void fsub_s(Address src);
 981  981  
 982  982    void fsuba(int i);  // "alternate" fsub
 983  983  
 984  984    void fsubp(int i = 1);
 985  985  
 986  986    void fsubr(int i);
 987  987    void fsubr_d(Address src);
 988  988    void fsubr_s(Address src);
 989  989  
 990  990    void fsubra(int i); // "alternate" reversed fsub
 991  991  
 992  992    void fsubrp(int i = 1);
 993  993  
 994  994    void ftan();
 995  995  
 996  996    void ftst();
 997  997  
 998  998    void fucomi(int i = 1);
 999  999    void fucomip(int i = 1);
1000 1000  
1001 1001    void fwait();
1002 1002  
1003 1003    void fxch(int i = 1);
1004 1004  
1005 1005    void fxrstor(Address src);
1006 1006  
1007 1007    void fxsave(Address dst);
1008 1008  
1009 1009    void fyl2x();
1010 1010  
1011 1011    void hlt();
1012 1012  
1013 1013    void idivl(Register src);
1014 1014    void divl(Register src); // Unsigned division
1015 1015  
1016 1016    void idivq(Register src);
1017 1017  
1018 1018    void imull(Register dst, Register src);
1019 1019    void imull(Register dst, Register src, int value);
1020 1020  
1021 1021    void imulq(Register dst, Register src);
1022 1022    void imulq(Register dst, Register src, int value);
1023 1023  
1024 1024  
1025 1025    // jcc is the generic conditional branch generator to run-
1026 1026    // time routines, jcc is used for branches to labels. jcc
1027 1027    // takes a branch opcode (cc) and a label (L) and generates
1028 1028    // either a backward branch or a forward branch and links it
1029 1029    // to the label fixup chain. Usage:
1030 1030    //
1031 1031    // Label L;      // unbound label
1032 1032    // jcc(cc, L);   // forward branch to unbound label
1033 1033    // bind(L);      // bind label to the current pc
1034 1034    // jcc(cc, L);   // backward branch to bound label
1035 1035    // bind(L);      // illegal: a label may be bound only once
1036 1036    //
1037 1037    // Note: The same Label can be used for forward and backward branches
1038 1038    // but it may be bound only once.
1039 1039  
1040 1040    void jcc(Condition cc, Label& L,
1041 1041             relocInfo::relocType rtype = relocInfo::none);
1042 1042  
1043 1043    // Conditional jump to a 8-bit offset to L.
1044 1044    // WARNING: be very careful using this for forward jumps.  If the label is
1045 1045    // not bound within an 8-bit offset of this instruction, a run-time error
1046 1046    // will occur.
1047 1047    void jccb(Condition cc, Label& L);
1048 1048  
1049 1049    void jmp(Address entry);    // pc <- entry
1050 1050  
1051 1051    // Label operations & relative jumps (PPUM Appendix D)
1052 1052    void jmp(Label& L, relocInfo::relocType rtype = relocInfo::none);   // unconditional jump to L
1053 1053  
1054 1054    void jmp(Register entry); // pc <- entry
1055 1055  
1056 1056    // Unconditional 8-bit offset jump to L.
1057 1057    // WARNING: be very careful using this for forward jumps.  If the label is
1058 1058    // not bound within an 8-bit offset of this instruction, a run-time error
1059 1059    // will occur.
1060 1060    void jmpb(Label& L);
1061 1061  
1062 1062    void ldmxcsr( Address src );
1063 1063  
1064 1064    void leal(Register dst, Address src);
1065 1065  
1066 1066    void leaq(Register dst, Address src);
1067 1067  
1068 1068    void lfence() {
1069 1069      emit_byte(0x0F);
1070 1070      emit_byte(0xAE);
1071 1071      emit_byte(0xE8);
1072 1072    }
1073 1073  
1074 1074    void lock();
1075 1075  
1076 1076    void lzcntl(Register dst, Register src);
1077 1077  
1078 1078  #ifdef _LP64
1079 1079    void lzcntq(Register dst, Register src);
1080 1080  #endif
1081 1081  
1082 1082    enum Membar_mask_bits {
1083 1083      StoreStore = 1 << 3,
1084 1084      LoadStore  = 1 << 2,
1085 1085      StoreLoad  = 1 << 1,
1086 1086      LoadLoad   = 1 << 0
1087 1087    };
1088 1088  
1089 1089    // Serializes memory and blows flags
1090 1090    void membar(Membar_mask_bits order_constraint) {
1091 1091      if (os::is_MP()) {
1092 1092        // We only have to handle StoreLoad
1093 1093        if (order_constraint & StoreLoad) {
1094 1094          // All usable chips support "locked" instructions which suffice
1095 1095          // as barriers, and are much faster than the alternative of
1096 1096          // using cpuid instruction. We use here a locked add [esp],0.
1097 1097          // This is conveniently otherwise a no-op except for blowing
1098 1098          // flags.
1099 1099          // Any change to this code may need to revisit other places in
1100 1100          // the code where this idiom is used, in particular the
1101 1101          // orderAccess code.
1102 1102          lock();
1103 1103          addl(Address(rsp, 0), 0);// Assert the lock# signal here
1104 1104        }
1105 1105      }
1106 1106    }
1107 1107  
1108 1108    void mfence();
1109 1109  
1110 1110    // Moves
1111 1111  
1112 1112    void mov64(Register dst, int64_t imm64);
1113 1113  
1114 1114    void movb(Address dst, Register src);
1115 1115    void movb(Address dst, int imm8);
1116 1116    void movb(Register dst, Address src);
1117 1117  
1118 1118    void movdl(XMMRegister dst, Register src);
1119 1119    void movdl(Register dst, XMMRegister src);
1120 1120  
1121 1121    // Move Double Quadword
1122 1122    void movdq(XMMRegister dst, Register src);
1123 1123    void movdq(Register dst, XMMRegister src);
1124 1124  
1125 1125    // Move Aligned Double Quadword
1126 1126    void movdqa(Address     dst, XMMRegister src);
1127 1127    void movdqa(XMMRegister dst, Address src);
1128 1128    void movdqa(XMMRegister dst, XMMRegister src);
1129 1129  
1130 1130    // Move Unaligned Double Quadword
1131 1131    void movdqu(Address     dst, XMMRegister src);
1132 1132    void movdqu(XMMRegister dst, Address src);
1133 1133    void movdqu(XMMRegister dst, XMMRegister src);
1134 1134  
1135 1135    void movl(Register dst, int32_t imm32);
1136 1136    void movl(Address dst, int32_t imm32);
1137 1137    void movl(Register dst, Register src);
1138 1138    void movl(Register dst, Address src);
1139 1139    void movl(Address dst, Register src);
1140 1140  
1141 1141    // These dummies prevent using movl from converting a zero (like NULL) into Register
1142 1142    // by giving the compiler two choices it can't resolve
1143 1143  
1144 1144    void movl(Address  dst, void* junk);
1145 1145    void movl(Register dst, void* junk);
1146 1146  
1147 1147  #ifdef _LP64
1148 1148    void movq(Register dst, Register src);
1149 1149    void movq(Register dst, Address src);
1150 1150    void movq(Address dst, Register src);
1151 1151  #endif
1152 1152  
1153 1153    void movq(Address     dst, MMXRegister src );
1154 1154    void movq(MMXRegister dst, Address src );
1155 1155  
1156 1156  #ifdef _LP64
1157 1157    // These dummies prevent using movq from converting a zero (like NULL) into Register
1158 1158    // by giving the compiler two choices it can't resolve
1159 1159  
1160 1160    void movq(Address  dst, void* dummy);
1161 1161    void movq(Register dst, void* dummy);
1162 1162  #endif
1163 1163  
1164 1164    // Move Quadword
1165 1165    void movq(Address     dst, XMMRegister src);
1166 1166    void movq(XMMRegister dst, Address src);
1167 1167  
1168 1168    void movsbl(Register dst, Address src);
1169 1169    void movsbl(Register dst, Register src);
1170 1170  
1171 1171  #ifdef _LP64
1172 1172    void movsbq(Register dst, Address src);
1173 1173    void movsbq(Register dst, Register src);
1174 1174  
1175 1175    // Move signed 32bit immediate to 64bit extending sign
1176 1176    void movslq(Address dst, int32_t imm64);
1177 1177    void movslq(Register dst, int32_t imm64);
1178 1178  
1179 1179    void movslq(Register dst, Address src);
1180 1180    void movslq(Register dst, Register src);
1181 1181    void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
1182 1182  #endif
1183 1183  
1184 1184    void movswl(Register dst, Address src);
1185 1185    void movswl(Register dst, Register src);
1186 1186  
1187 1187  #ifdef _LP64
1188 1188    void movswq(Register dst, Address src);
1189 1189    void movswq(Register dst, Register src);
1190 1190  #endif
1191 1191  
1192 1192    void movw(Address dst, int imm16);
1193 1193    void movw(Register dst, Address src);
1194 1194    void movw(Address dst, Register src);
1195 1195  
1196 1196    void movzbl(Register dst, Address src);
1197 1197    void movzbl(Register dst, Register src);
1198 1198  
1199 1199  #ifdef _LP64
1200 1200    void movzbq(Register dst, Address src);
1201 1201    void movzbq(Register dst, Register src);
1202 1202  #endif
1203 1203  
1204 1204    void movzwl(Register dst, Address src);
1205 1205    void movzwl(Register dst, Register src);
1206 1206  
1207 1207  #ifdef _LP64
1208 1208    void movzwq(Register dst, Address src);
1209 1209    void movzwq(Register dst, Register src);
1210 1210  #endif
1211 1211  
1212 1212    void mull(Address src);
1213 1213    void mull(Register src);
1214 1214  
1215 1215    // Multiply Scalar Double-Precision Floating-Point Values
1216 1216    void mulsd(XMMRegister dst, Address src);
1217 1217    void mulsd(XMMRegister dst, XMMRegister src);
1218 1218  
1219 1219    // Multiply Scalar Single-Precision Floating-Point Values
1220 1220    void mulss(XMMRegister dst, Address src);
1221 1221    void mulss(XMMRegister dst, XMMRegister src);
1222 1222  
1223 1223    void negl(Register dst);
1224 1224  
1225 1225  #ifdef _LP64
1226 1226    void negq(Register dst);
1227 1227  #endif
1228 1228  
1229 1229    void nop(int i = 1);
1230 1230  
1231 1231    void notl(Register dst);
1232 1232  
1233 1233  #ifdef _LP64
1234 1234    void notq(Register dst);
1235 1235  #endif
1236 1236  
1237 1237    void orl(Address dst, int32_t imm32);
1238 1238    void orl(Register dst, int32_t imm32);
1239 1239    void orl(Register dst, Address src);
1240 1240    void orl(Register dst, Register src);
1241 1241  
1242 1242    void orq(Address dst, int32_t imm32);
1243 1243    void orq(Register dst, int32_t imm32);
1244 1244    void orq(Register dst, Address src);
1245 1245    void orq(Register dst, Register src);
1246 1246  
1247 1247    // SSE4.2 string instructions
1248 1248    void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1249 1249    void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1250 1250  
1251 1251  #ifndef _LP64 // no 32bit push/pop on amd64
1252 1252    void popl(Address dst);
1253 1253  #endif
1254 1254  
1255 1255  #ifdef _LP64
1256 1256    void popq(Address dst);
1257 1257  #endif
1258 1258  
1259 1259    void popcntl(Register dst, Address src);
1260 1260    void popcntl(Register dst, Register src);
1261 1261  
1262 1262  #ifdef _LP64
1263 1263    void popcntq(Register dst, Address src);
1264 1264    void popcntq(Register dst, Register src);
1265 1265  #endif
1266 1266  
1267 1267    // Prefetches (SSE, SSE2, 3DNOW only)
1268 1268  
1269 1269    void prefetchnta(Address src);
1270 1270    void prefetchr(Address src);
1271 1271    void prefetcht0(Address src);
1272 1272    void prefetcht1(Address src);
1273 1273    void prefetcht2(Address src);
1274 1274    void prefetchw(Address src);
1275 1275  
1276 1276    // Shuffle Packed Doublewords
1277 1277    void pshufd(XMMRegister dst, XMMRegister src, int mode);
1278 1278    void pshufd(XMMRegister dst, Address src,     int mode);
1279 1279  
1280 1280    // Shuffle Packed Low Words
1281 1281    void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1282 1282    void pshuflw(XMMRegister dst, Address src,     int mode);
1283 1283  
1284 1284    // Shift Right Logical Quadword Immediate
1285 1285    void psrlq(XMMRegister dst, int shift);
1286 1286  
1287 1287    // Logical Compare Double Quadword
1288 1288    void ptest(XMMRegister dst, XMMRegister src);
1289 1289    void ptest(XMMRegister dst, Address src);
1290 1290  
1291 1291    // Interleave Low Bytes
1292 1292    void punpcklbw(XMMRegister dst, XMMRegister src);
1293 1293  
1294 1294  #ifndef _LP64 // no 32bit push/pop on amd64
1295 1295    void pushl(Address src);
1296 1296  #endif
1297 1297  
1298 1298    void pushq(Address src);
1299 1299  
1300 1300    // Xor Packed Byte Integer Values
1301 1301    void pxor(XMMRegister dst, Address src);
1302 1302    void pxor(XMMRegister dst, XMMRegister src);
1303 1303  
1304 1304    void rcll(Register dst, int imm8);
1305 1305  
1306 1306    void rclq(Register dst, int imm8);
1307 1307  
1308 1308    void ret(int imm16);
1309 1309  
1310 1310    void sahf();
1311 1311  
1312 1312    void sarl(Register dst, int imm8);
1313 1313    void sarl(Register dst);
1314 1314  
1315 1315    void sarq(Register dst, int imm8);
1316 1316    void sarq(Register dst);
1317 1317  
1318 1318    void sbbl(Address dst, int32_t imm32);
1319 1319    void sbbl(Register dst, int32_t imm32);
1320 1320    void sbbl(Register dst, Address src);
1321 1321    void sbbl(Register dst, Register src);
1322 1322  
1323 1323    void sbbq(Address dst, int32_t imm32);
1324 1324    void sbbq(Register dst, int32_t imm32);
1325 1325    void sbbq(Register dst, Address src);
1326 1326    void sbbq(Register dst, Register src);
1327 1327  
1328 1328    void setb(Condition cc, Register dst);
1329 1329  
1330 1330    void shldl(Register dst, Register src);
1331 1331  
1332 1332    void shll(Register dst, int imm8);
1333 1333    void shll(Register dst);
1334 1334  
1335 1335    void shlq(Register dst, int imm8);
1336 1336    void shlq(Register dst);
1337 1337  
1338 1338    void shrdl(Register dst, Register src);
1339 1339  
1340 1340    void shrl(Register dst, int imm8);
1341 1341    void shrl(Register dst);

↓ open down ↓

1341 lines elided

↑ open up ↑

1342 1342  
1343 1343    void shrq(Register dst, int imm8);
1344 1344    void shrq(Register dst);
1345 1345  
1346 1346    void smovl(); // QQQ generic?
1347 1347  
1348 1348    // Compute Square Root of Scalar Double-Precision Floating-Point Value
1349 1349    void sqrtsd(XMMRegister dst, Address src);
1350 1350    void sqrtsd(XMMRegister dst, XMMRegister src);
1351 1351  
     1352 +  // Compute Square Root of Scalar Single-Precision Floating-Point Value
     1353 +  void sqrtss(XMMRegister dst, Address src);
     1354 +  void sqrtss(XMMRegister dst, XMMRegister src);
     1355 +
1352 1356    void std() { emit_byte(0xfd); }
1353 1357  
1354 1358    void stmxcsr( Address dst );
1355 1359  
1356 1360    void subl(Address dst, int32_t imm32);
1357 1361    void subl(Address dst, Register src);
1358 1362    void subl(Register dst, int32_t imm32);
1359 1363    void subl(Register dst, Address src);
1360 1364    void subl(Register dst, Register src);
1361 1365

1362 1366    void subq(Address dst, int32_t imm32);
1363 1367    void subq(Address dst, Register src);
1364 1368    void subq(Register dst, int32_t imm32);
1365 1369    void subq(Register dst, Address src);
1366 1370    void subq(Register dst, Register src);
1367 1371  
1368 1372  
1369 1373    // Subtract Scalar Double-Precision Floating-Point Values
1370 1374    void subsd(XMMRegister dst, Address src);
1371 1375    void subsd(XMMRegister dst, XMMRegister src);
1372 1376  
1373 1377    // Subtract Scalar Single-Precision Floating-Point Values
1374 1378    void subss(XMMRegister dst, Address src);
1375 1379    void subss(XMMRegister dst, XMMRegister src);
1376 1380  
1377 1381    void testb(Register dst, int imm8);
1378 1382  
1379 1383    void testl(Register dst, int32_t imm32);
1380 1384    void testl(Register dst, Register src);
1381 1385    void testl(Register dst, Address src);
1382 1386  
1383 1387    void testq(Register dst, int32_t imm32);
1384 1388    void testq(Register dst, Register src);
1385 1389  
1386 1390  
1387 1391    // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
1388 1392    void ucomisd(XMMRegister dst, Address src);
1389 1393    void ucomisd(XMMRegister dst, XMMRegister src);
1390 1394  
1391 1395    // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
1392 1396    void ucomiss(XMMRegister dst, Address src);
1393 1397    void ucomiss(XMMRegister dst, XMMRegister src);
1394 1398  
1395 1399    void xaddl(Address dst, Register src);
1396 1400  
1397 1401    void xaddq(Address dst, Register src);
1398 1402  
1399 1403    void xchgl(Register reg, Address adr);
1400 1404    void xchgl(Register dst, Register src);
1401 1405  
1402 1406    void xchgq(Register reg, Address adr);
1403 1407    void xchgq(Register dst, Register src);
1404 1408  
1405 1409    void xorl(Register dst, int32_t imm32);
1406 1410    void xorl(Register dst, Address src);
1407 1411    void xorl(Register dst, Register src);
1408 1412  
1409 1413    void xorq(Register dst, Address src);
1410 1414    void xorq(Register dst, Register src);
1411 1415  
1412 1416    // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
1413 1417    void xorpd(XMMRegister dst, Address src);
1414 1418    void xorpd(XMMRegister dst, XMMRegister src);
1415 1419  
1416 1420    // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
1417 1421    void xorps(XMMRegister dst, Address src);
1418 1422    void xorps(XMMRegister dst, XMMRegister src);
1419 1423  
1420 1424    void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
1421 1425  };
1422 1426  
1423 1427  
1424 1428  // MacroAssembler extends Assembler by frequently used macros.
1425 1429  //
1426 1430  // Instructions for which a 'better' code sequence exists depending
1427 1431  // on arguments should also go in here.
1428 1432  
1429 1433  class MacroAssembler: public Assembler {
1430 1434    friend class LIR_Assembler;
1431 1435    friend class Runtime1;      // as_Address()
1432 1436   protected:
1433 1437  
1434 1438    Address as_Address(AddressLiteral adr);
1435 1439    Address as_Address(ArrayAddress adr);
1436 1440  
1437 1441    // Support for VM calls
1438 1442    //
1439 1443    // This is the base routine called by the different versions of call_VM_leaf. The interpreter
1440 1444    // may customize this version by overriding it for its purposes (e.g., to save/restore
1441 1445    // additional registers when doing a VM call).
1442 1446  #ifdef CC_INTERP
1443 1447    // c++ interpreter never wants to use interp_masm version of call_VM
1444 1448    #define VIRTUAL
1445 1449  #else
1446 1450    #define VIRTUAL virtual
1447 1451  #endif
1448 1452  
1449 1453    VIRTUAL void call_VM_leaf_base(
1450 1454      address entry_point,               // the entry point
1451 1455      int     number_of_arguments        // the number of arguments to pop after the call
1452 1456    );
1453 1457  
1454 1458    // This is the base routine called by the different versions of call_VM. The interpreter
1455 1459    // may customize this version by overriding it for its purposes (e.g., to save/restore
1456 1460    // additional registers when doing a VM call).
1457 1461    //
1458 1462    // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base
1459 1463    // returns the register which contains the thread upon return. If a thread register has been
1460 1464    // specified, the return value will correspond to that register. If no last_java_sp is specified
1461 1465    // (noreg) than rsp will be used instead.
1462 1466    VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
1463 1467      Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
1464 1468      Register java_thread,              // the thread if computed before     ; use noreg otherwise
1465 1469      Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
1466 1470      address  entry_point,              // the entry point
1467 1471      int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
1468 1472      bool     check_exceptions          // whether to check for pending exceptions after return
1469 1473    );
1470 1474  
1471 1475    // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
1472 1476    // The implementation is only non-empty for the InterpreterMacroAssembler,
1473 1477    // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
1474 1478    virtual void check_and_handle_popframe(Register java_thread);
1475 1479    virtual void check_and_handle_earlyret(Register java_thread);
1476 1480  
1477 1481    void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
1478 1482  
1479 1483    // helpers for FPU flag access
1480 1484    // tmp is a temporary register, if none is available use noreg
1481 1485    void save_rax   (Register tmp);
1482 1486    void restore_rax(Register tmp);
1483 1487  
1484 1488   public:
1485 1489    MacroAssembler(CodeBuffer* code) : Assembler(code) {}
1486 1490  
1487 1491    // Support for NULL-checks
1488 1492    //
1489 1493    // Generates code that causes a NULL OS exception if the content of reg is NULL.
1490 1494    // If the accessed location is M[reg + offset] and the offset is known, provide the
1491 1495    // offset. No explicit code generation is needed if the offset is within a certain
1492 1496    // range (0 <= offset <= page_size).
1493 1497  
1494 1498    void null_check(Register reg, int offset = -1);
1495 1499    static bool needs_explicit_null_check(intptr_t offset);
1496 1500  
1497 1501    // Required platform-specific helpers for Label::patch_instructions.
1498 1502    // They _shadow_ the declarations in AbstractAssembler, which are undefined.
1499 1503    void pd_patch_instruction(address branch, address target);
1500 1504  #ifndef PRODUCT
1501 1505    static void pd_print_patched_instruction(address branch);
1502 1506  #endif
1503 1507  
1504 1508    // The following 4 methods return the offset of the appropriate move instruction
1505 1509  
1506 1510    // Support for fast byte/short loading with zero extension (depending on particular CPU)
1507 1511    int load_unsigned_byte(Register dst, Address src);
1508 1512    int load_unsigned_short(Register dst, Address src);
1509 1513  
1510 1514    // Support for fast byte/short loading with sign extension (depending on particular CPU)
1511 1515    int load_signed_byte(Register dst, Address src);
1512 1516    int load_signed_short(Register dst, Address src);
1513 1517  
1514 1518    // Support for sign-extension (hi:lo = extend_sign(lo))
1515 1519    void extend_sign(Register hi, Register lo);
1516 1520  
1517 1521    // Loading values by size and signed-ness
1518 1522    void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
1519 1523  
1520 1524    // Support for inc/dec with optimal instruction selection depending on value
1521 1525  
1522 1526    void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; }
1523 1527    void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; }
1524 1528  
1525 1529    void decrementl(Address dst, int value = 1);
1526 1530    void decrementl(Register reg, int value = 1);
1527 1531  
1528 1532    void decrementq(Register reg, int value = 1);
1529 1533    void decrementq(Address dst, int value = 1);
1530 1534  
1531 1535    void incrementl(Address dst, int value = 1);
1532 1536    void incrementl(Register reg, int value = 1);
1533 1537  
1534 1538    void incrementq(Register reg, int value = 1);
1535 1539    void incrementq(Address dst, int value = 1);
1536 1540  
1537 1541  
1538 1542    // Support optimal SSE move instructions.
1539 1543    void movflt(XMMRegister dst, XMMRegister src) {
1540 1544      if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
1541 1545      else                       { movss (dst, src); return; }
1542 1546    }
1543 1547    void movflt(XMMRegister dst, Address src) { movss(dst, src); }
1544 1548    void movflt(XMMRegister dst, AddressLiteral src);
1545 1549    void movflt(Address dst, XMMRegister src) { movss(dst, src); }
1546 1550  
1547 1551    void movdbl(XMMRegister dst, XMMRegister src) {
1548 1552      if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; }
1549 1553      else                       { movsd (dst, src); return; }
1550 1554    }
1551 1555  
1552 1556    void movdbl(XMMRegister dst, AddressLiteral src);
1553 1557  
1554 1558    void movdbl(XMMRegister dst, Address src) {
1555 1559      if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; }
1556 1560      else                         { movlpd(dst, src); return; }
1557 1561    }
1558 1562    void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }
1559 1563  
1560 1564    void incrementl(AddressLiteral dst);
1561 1565    void incrementl(ArrayAddress dst);
1562 1566  
1563 1567    // Alignment
1564 1568    void align(int modulus);
1565 1569  
1566 1570    // Misc
1567 1571    void fat_nop(); // 5 byte nop
1568 1572  
1569 1573    // Stack frame creation/removal
1570 1574    void enter();
1571 1575    void leave();
1572 1576  
1573 1577    // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
1574 1578    // The pointer will be loaded into the thread register.
1575 1579    void get_thread(Register thread);
1576 1580  
1577 1581  
1578 1582    // Support for VM calls
1579 1583    //
1580 1584    // It is imperative that all calls into the VM are handled via the call_VM macros.
1581 1585    // They make sure that the stack linkage is setup correctly. call_VM's correspond
1582 1586    // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
1583 1587  
1584 1588  
1585 1589    void call_VM(Register oop_result,
1586 1590                 address entry_point,
1587 1591                 bool check_exceptions = true);
1588 1592    void call_VM(Register oop_result,
1589 1593                 address entry_point,
1590 1594                 Register arg_1,
1591 1595                 bool check_exceptions = true);
1592 1596    void call_VM(Register oop_result,
1593 1597                 address entry_point,
1594 1598                 Register arg_1, Register arg_2,
1595 1599                 bool check_exceptions = true);
1596 1600    void call_VM(Register oop_result,
1597 1601                 address entry_point,
1598 1602                 Register arg_1, Register arg_2, Register arg_3,
1599 1603                 bool check_exceptions = true);
1600 1604  
1601 1605    // Overloadings with last_Java_sp
1602 1606    void call_VM(Register oop_result,
1603 1607                 Register last_java_sp,
1604 1608                 address entry_point,
1605 1609                 int number_of_arguments = 0,
1606 1610                 bool check_exceptions = true);
1607 1611    void call_VM(Register oop_result,
1608 1612                 Register last_java_sp,
1609 1613                 address entry_point,
1610 1614                 Register arg_1, bool
1611 1615                 check_exceptions = true);
1612 1616    void call_VM(Register oop_result,
1613 1617                 Register last_java_sp,
1614 1618                 address entry_point,
1615 1619                 Register arg_1, Register arg_2,
1616 1620                 bool check_exceptions = true);
1617 1621    void call_VM(Register oop_result,
1618 1622                 Register last_java_sp,
1619 1623                 address entry_point,
1620 1624                 Register arg_1, Register arg_2, Register arg_3,
1621 1625                 bool check_exceptions = true);
1622 1626  
1623 1627    void call_VM_leaf(address entry_point,
1624 1628                      int number_of_arguments = 0);
1625 1629    void call_VM_leaf(address entry_point,
1626 1630                      Register arg_1);
1627 1631    void call_VM_leaf(address entry_point,
1628 1632                      Register arg_1, Register arg_2);
1629 1633    void call_VM_leaf(address entry_point,
1630 1634                      Register arg_1, Register arg_2, Register arg_3);
1631 1635  
1632 1636    // last Java Frame (fills frame anchor)
1633 1637    void set_last_Java_frame(Register thread,
1634 1638                             Register last_java_sp,
1635 1639                             Register last_java_fp,
1636 1640                             address last_java_pc);
1637 1641  
1638 1642    // thread in the default location (r15_thread on 64bit)
1639 1643    void set_last_Java_frame(Register last_java_sp,
1640 1644                             Register last_java_fp,
1641 1645                             address last_java_pc);
1642 1646  
1643 1647    void reset_last_Java_frame(Register thread, bool clear_fp, bool clear_pc);
1644 1648  
1645 1649    // thread in the default location (r15_thread on 64bit)
1646 1650    void reset_last_Java_frame(bool clear_fp, bool clear_pc);
1647 1651  
1648 1652    // Stores
1649 1653    void store_check(Register obj);                // store check for obj - register is destroyed afterwards
1650 1654    void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
1651 1655  
1652 1656    void g1_write_barrier_pre(Register obj,
1653 1657  #ifndef _LP64
1654 1658                              Register thread,
1655 1659  #endif
1656 1660                              Register tmp,
1657 1661                              Register tmp2,
1658 1662                              bool     tosca_live);
1659 1663    void g1_write_barrier_post(Register store_addr,
1660 1664                               Register new_val,
1661 1665  #ifndef _LP64
1662 1666                               Register thread,
1663 1667  #endif
1664 1668                               Register tmp,
1665 1669                               Register tmp2);
1666 1670  
1667 1671  
1668 1672    // split store_check(Register obj) to enhance instruction interleaving
1669 1673    void store_check_part_1(Register obj);
1670 1674    void store_check_part_2(Register obj);
1671 1675  
1672 1676    // C 'boolean' to Java boolean: x == 0 ? 0 : 1
1673 1677    void c2bool(Register x);
1674 1678  
1675 1679    // C++ bool manipulation
1676 1680  
1677 1681    void movbool(Register dst, Address src);
1678 1682    void movbool(Address dst, bool boolconst);
1679 1683    void movbool(Address dst, Register src);
1680 1684    void testbool(Register dst);
1681 1685  
1682 1686    // oop manipulations
1683 1687    void load_klass(Register dst, Register src);
1684 1688    void store_klass(Register dst, Register src);
1685 1689  
1686 1690    void load_heap_oop(Register dst, Address src);
1687 1691    void store_heap_oop(Address dst, Register src);
1688 1692  
1689 1693    // Used for storing NULL. All other oop constants should be
1690 1694    // stored using routines that take a jobject.
1691 1695    void store_heap_oop_null(Address dst);
1692 1696  
1693 1697    void load_prototype_header(Register dst, Register src);
1694 1698  
1695 1699  #ifdef _LP64
1696 1700    void store_klass_gap(Register dst, Register src);
1697 1701  
1698 1702    // This dummy is to prevent a call to store_heap_oop from
1699 1703    // converting a zero (like NULL) into a Register by giving
1700 1704    // the compiler two choices it can't resolve
1701 1705  
1702 1706    void store_heap_oop(Address dst, void* dummy);
1703 1707  
1704 1708    void encode_heap_oop(Register r);
1705 1709    void decode_heap_oop(Register r);
1706 1710    void encode_heap_oop_not_null(Register r);
1707 1711    void decode_heap_oop_not_null(Register r);
1708 1712    void encode_heap_oop_not_null(Register dst, Register src);
1709 1713    void decode_heap_oop_not_null(Register dst, Register src);
1710 1714  
1711 1715    void set_narrow_oop(Register dst, jobject obj);
1712 1716    void set_narrow_oop(Address dst, jobject obj);
1713 1717    void cmp_narrow_oop(Register dst, jobject obj);
1714 1718    void cmp_narrow_oop(Address dst, jobject obj);
1715 1719  
1716 1720    // if heap base register is used - reinit it with the correct value
1717 1721    void reinit_heapbase();
1718 1722  
1719 1723    DEBUG_ONLY(void verify_heapbase(const char* msg);)
1720 1724  
1721 1725  #endif // _LP64
1722 1726  
1723 1727    // Int division/remainder for Java
1724 1728    // (as idivl, but checks for special case as described in JVM spec.)
1725 1729    // returns idivl instruction offset for implicit exception handling
1726 1730    int corrected_idivl(Register reg);
1727 1731  
1728 1732    // Long division/remainder for Java
1729 1733    // (as idivq, but checks for special case as described in JVM spec.)
1730 1734    // returns idivq instruction offset for implicit exception handling
1731 1735    int corrected_idivq(Register reg);
1732 1736  
1733 1737    void int3();
1734 1738  
1735 1739    // Long operation macros for a 32bit cpu
1736 1740    // Long negation for Java
1737 1741    void lneg(Register hi, Register lo);
1738 1742  
1739 1743    // Long multiplication for Java
1740 1744    // (destroys contents of eax, ebx, ecx and edx)
1741 1745    void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y
1742 1746  
1743 1747    // Long shifts for Java
1744 1748    // (semantics as described in JVM spec.)
1745 1749    void lshl(Register hi, Register lo);                               // hi:lo << (rcx & 0x3f)
1746 1750    void lshr(Register hi, Register lo, bool sign_extension = false);  // hi:lo >> (rcx & 0x3f)
1747 1751  
1748 1752    // Long compare for Java
1749 1753    // (semantics as described in JVM spec.)
1750 1754    void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y)
1751 1755  
1752 1756  
1753 1757    // misc
1754 1758  
1755 1759    // Sign extension
1756 1760    void sign_extend_short(Register reg);
1757 1761    void sign_extend_byte(Register reg);
1758 1762  
1759 1763    // Division by power of 2, rounding towards 0
1760 1764    void division_with_shift(Register reg, int shift_value);
1761 1765  
1762 1766    // Compares the top-most stack entries on the FPU stack and sets the eflags as follows:
1763 1767    //
1764 1768    // CF (corresponds to C0) if x < y
1765 1769    // PF (corresponds to C2) if unordered
1766 1770    // ZF (corresponds to C3) if x = y
1767 1771    //
1768 1772    // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
1769 1773    // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code)
1770 1774    void fcmp(Register tmp);
1771 1775    // Variant of the above which allows y to be further down the stack
1772 1776    // and which only pops x and y if specified. If pop_right is
1773 1777    // specified then pop_left must also be specified.
1774 1778    void fcmp(Register tmp, int index, bool pop_left, bool pop_right);
1775 1779  
1776 1780    // Floating-point comparison for Java
1777 1781    // Compares the top-most stack entries on the FPU stack and stores the result in dst.
1778 1782    // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
1779 1783    // (semantics as described in JVM spec.)
1780 1784    void fcmp2int(Register dst, bool unordered_is_less);
1781 1785    // Variant of the above which allows y to be further down the stack
1782 1786    // and which only pops x and y if specified. If pop_right is
1783 1787    // specified then pop_left must also be specified.
1784 1788    void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right);
1785 1789  
1786 1790    // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards)
1787 1791    // tmp is a temporary register, if none is available use noreg
1788 1792    void fremr(Register tmp);
1789 1793  
1790 1794  
1791 1795    // same as fcmp2int, but using SSE2
1792 1796    void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
1793 1797    void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
1794 1798  
1795 1799    // Inlined sin/cos generator for Java; must not use CPU instruction
1796 1800    // directly on Intel as it does not have high enough precision
1797 1801    // outside of the range [-pi/4, pi/4]. Extra argument indicate the
1798 1802    // number of FPU stack slots in use; all but the topmost will
1799 1803    // require saving if a slow case is necessary. Assumes argument is
1800 1804    // on FP TOS; result is on FP TOS.  No cpu registers are changed by
1801 1805    // this code.
1802 1806    void trigfunc(char trig, int num_fpu_regs_in_use = 1);
1803 1807  
1804 1808    // branch to L if FPU flag C2 is set/not set
1805 1809    // tmp is a temporary register, if none is available use noreg
1806 1810    void jC2 (Register tmp, Label& L);
1807 1811    void jnC2(Register tmp, Label& L);
1808 1812  
1809 1813    // Pop ST (ffree & fincstp combined)
1810 1814    void fpop();
1811 1815  
1812 1816    // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
1813 1817    void push_fTOS();
1814 1818  
1815 1819    // pops double TOS element from CPU stack and pushes on FPU stack
1816 1820    void pop_fTOS();
1817 1821  
1818 1822    void empty_FPU_stack();
1819 1823  
1820 1824    void push_IU_state();
1821 1825    void pop_IU_state();
1822 1826  
1823 1827    void push_FPU_state();
1824 1828    void pop_FPU_state();
1825 1829  
1826 1830    void push_CPU_state();
1827 1831    void pop_CPU_state();
1828 1832  
1829 1833    // Round up to a power of two
1830 1834    void round_to(Register reg, int modulus);
1831 1835  
1832 1836    // Callee saved registers handling
1833 1837    void push_callee_saved_registers();
1834 1838    void pop_callee_saved_registers();
1835 1839  
1836 1840    // allocation
1837 1841    void eden_allocate(
1838 1842      Register obj,                      // result: pointer to object after successful allocation
1839 1843      Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
1840 1844      int      con_size_in_bytes,        // object size in bytes if   known at compile time
1841 1845      Register t1,                       // temp register
1842 1846      Label&   slow_case                 // continuation point if fast allocation fails
1843 1847    );
1844 1848    void tlab_allocate(
1845 1849      Register obj,                      // result: pointer to object after successful allocation
1846 1850      Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
1847 1851      int      con_size_in_bytes,        // object size in bytes if   known at compile time
1848 1852      Register t1,                       // temp register
1849 1853      Register t2,                       // temp register
1850 1854      Label&   slow_case                 // continuation point if fast allocation fails
1851 1855    );
1852 1856    void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
1853 1857  
1854 1858    // interface method calling
1855 1859    void lookup_interface_method(Register recv_klass,
1856 1860                                 Register intf_klass,
1857 1861                                 RegisterOrConstant itable_index,
1858 1862                                 Register method_result,
1859 1863                                 Register scan_temp,
1860 1864                                 Label& no_such_interface);
1861 1865  
1862 1866    // Test sub_klass against super_klass, with fast and slow paths.
1863 1867  
1864 1868    // The fast path produces a tri-state answer: yes / no / maybe-slow.
1865 1869    // One of the three labels can be NULL, meaning take the fall-through.
1866 1870    // If super_check_offset is -1, the value is loaded up from super_klass.
1867 1871    // No registers are killed, except temp_reg.
1868 1872    void check_klass_subtype_fast_path(Register sub_klass,
1869 1873                                       Register super_klass,
1870 1874                                       Register temp_reg,
1871 1875                                       Label* L_success,
1872 1876                                       Label* L_failure,
1873 1877                                       Label* L_slow_path,
1874 1878                  RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
1875 1879  
1876 1880    // The rest of the type check; must be wired to a corresponding fast path.
1877 1881    // It does not repeat the fast path logic, so don't use it standalone.
1878 1882    // The temp_reg and temp2_reg can be noreg, if no temps are available.
1879 1883    // Updates the sub's secondary super cache as necessary.
1880 1884    // If set_cond_codes, condition codes will be Z on success, NZ on failure.
1881 1885    void check_klass_subtype_slow_path(Register sub_klass,
1882 1886                                       Register super_klass,
1883 1887                                       Register temp_reg,
1884 1888                                       Register temp2_reg,
1885 1889                                       Label* L_success,
1886 1890                                       Label* L_failure,
1887 1891                                       bool set_cond_codes = false);
1888 1892  
1889 1893    // Simplified, combined version, good for typical uses.
1890 1894    // Falls through on failure.
1891 1895    void check_klass_subtype(Register sub_klass,
1892 1896                             Register super_klass,
1893 1897                             Register temp_reg,
1894 1898                             Label& L_success);
1895 1899  
1896 1900    // method handles (JSR 292)
1897 1901    void check_method_handle_type(Register mtype_reg, Register mh_reg,
1898 1902                                  Register temp_reg,
1899 1903                                  Label& wrong_method_type);
1900 1904    void load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
1901 1905                                    Register temp_reg);
1902 1906    void jump_to_method_handle_entry(Register mh_reg, Register temp_reg);
1903 1907    Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
1904 1908  
1905 1909  
1906 1910    //----
1907 1911    void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
1908 1912  
1909 1913    // Debugging
1910 1914  
1911 1915    // only if +VerifyOops
1912 1916    void verify_oop(Register reg, const char* s = "broken oop");
1913 1917    void verify_oop_addr(Address addr, const char * s = "broken oop addr");
1914 1918  
1915 1919    // only if +VerifyFPU
1916 1920    void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
1917 1921  
1918 1922    // prints msg, dumps registers and stops execution
1919 1923    void stop(const char* msg);
1920 1924  
1921 1925    // prints msg and continues
1922 1926    void warn(const char* msg);
1923 1927  
1924 1928    static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
1925 1929    static void debug64(char* msg, int64_t pc, int64_t regs[]);
1926 1930  
1927 1931    void os_breakpoint();
1928 1932  
1929 1933    void untested()                                { stop("untested"); }
1930 1934  
1931 1935    void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, 1024, "unimplemented: %s", what);  stop(b); }
1932 1936  
1933 1937    void should_not_reach_here()                   { stop("should not reach here"); }
1934 1938  
1935 1939    void print_CPU_state();
1936 1940  
1937 1941    // Stack overflow checking
1938 1942    void bang_stack_with_offset(int offset) {
1939 1943      // stack grows down, caller passes positive offset
1940 1944      assert(offset > 0, "must bang with negative offset");
1941 1945      movl(Address(rsp, (-offset)), rax);
1942 1946    }
1943 1947  
1944 1948    // Writes to stack successive pages until offset reached to check for
1945 1949    // stack overflow + shadow pages.  Also, clobbers tmp
1946 1950    void bang_stack_size(Register size, Register tmp);
1947 1951  
1948 1952    virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
1949 1953                                                  Register tmp,
1950 1954                                                  int offset);
1951 1955  
1952 1956    // Support for serializing memory accesses between threads
1953 1957    void serialize_memory(Register thread, Register tmp);
1954 1958  
1955 1959    void verify_tlab();
1956 1960  
1957 1961    // Biased locking support
1958 1962    // lock_reg and obj_reg must be loaded up with the appropriate values.
1959 1963    // swap_reg must be rax, and is killed.
1960 1964    // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
1961 1965    // be killed; if not supplied, push/pop will be used internally to
1962 1966    // allocate a temporary (inefficient, avoid if possible).
1963 1967    // Optional slow case is for implementations (interpreter and C1) which branch to
1964 1968    // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
1965 1969    // Returns offset of first potentially-faulting instruction for null
1966 1970    // check info (currently consumed only by C1). If
1967 1971    // swap_reg_contains_mark is true then returns -1 as it is assumed
1968 1972    // the calling code has already passed any potential faults.
1969 1973    int biased_locking_enter(Register lock_reg, Register obj_reg,
1970 1974                             Register swap_reg, Register tmp_reg,
1971 1975                             bool swap_reg_contains_mark,
1972 1976                             Label& done, Label* slow_case = NULL,
1973 1977                             BiasedLockingCounters* counters = NULL);
1974 1978    void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
1975 1979  
1976 1980  
1977 1981    Condition negate_condition(Condition cond);
1978 1982  
1979 1983    // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit
1980 1984    // operands. In general the names are modified to avoid hiding the instruction in Assembler
1981 1985    // so that we don't need to implement all the varieties in the Assembler with trivial wrappers
1982 1986    // here in MacroAssembler. The major exception to this rule is call
1983 1987  
1984 1988    // Arithmetics
1985 1989  
1986 1990  
1987 1991    void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; }
1988 1992    void addptr(Address dst, Register src);
1989 1993  
1990 1994    void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); }
1991 1995    void addptr(Register dst, int32_t src);
1992 1996    void addptr(Register dst, Register src);
1993 1997  
1994 1998    void andptr(Register dst, int32_t src);
1995 1999    void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; }
1996 2000  
1997 2001    void cmp8(AddressLiteral src1, int imm);
1998 2002  
1999 2003    // renamed to drag out the casting of address to int32_t/intptr_t
2000 2004    void cmp32(Register src1, int32_t imm);
2001 2005  
2002 2006    void cmp32(AddressLiteral src1, int32_t imm);
2003 2007    // compare reg - mem, or reg - &mem
2004 2008    void cmp32(Register src1, AddressLiteral src2);
2005 2009  
2006 2010    void cmp32(Register src1, Address src2);
2007 2011  
2008 2012  #ifndef _LP64
2009 2013    void cmpoop(Address dst, jobject obj);
2010 2014    void cmpoop(Register dst, jobject obj);
2011 2015  #endif // _LP64
2012 2016  
2013 2017    // NOTE src2 must be the lval. This is NOT an mem-mem compare
2014 2018    void cmpptr(Address src1, AddressLiteral src2);
2015 2019  
2016 2020    void cmpptr(Register src1, AddressLiteral src2);
2017 2021  
2018 2022    void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
2019 2023    void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
2020 2024    // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
2021 2025  
2022 2026    void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
2023 2027    void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
2024 2028  
2025 2029    // cmp64 to avoild hiding cmpq
2026 2030    void cmp64(Register src1, AddressLiteral src);
2027 2031  
2028 2032    void cmpxchgptr(Register reg, Address adr);
2029 2033  
2030 2034    void locked_cmpxchgptr(Register reg, AddressLiteral adr);
2031 2035  
2032 2036  
2033 2037    void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
2034 2038  
2035 2039  
2036 2040    void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); }
2037 2041  
2038 2042    void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); }
2039 2043  
2040 2044    void shlptr(Register dst, int32_t shift);
2041 2045    void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); }
2042 2046  
2043 2047    void shrptr(Register dst, int32_t shift);
2044 2048    void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); }
2045 2049  
2046 2050    void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); }
2047 2051    void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); }
2048 2052  
2049 2053    void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
2050 2054  
2051 2055    void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
2052 2056    void subptr(Register dst, int32_t src);
2053 2057    void subptr(Register dst, Register src);
2054 2058  
2055 2059  
2056 2060    void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
2057 2061    void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
2058 2062  
2059 2063    void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
2060 2064    void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
2061 2065  
2062 2066    void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; }
2063 2067  
2064 2068  
2065 2069  
2066 2070    // Helper functions for statistics gathering.
2067 2071    // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
2068 2072    void cond_inc32(Condition cond, AddressLiteral counter_addr);
2069 2073    // Unconditional atomic increment.
2070 2074    void atomic_incl(AddressLiteral counter_addr);
2071 2075  
2072 2076    void lea(Register dst, AddressLiteral adr);
2073 2077    void lea(Address dst, AddressLiteral adr);
2074 2078    void lea(Register dst, Address adr) { Assembler::lea(dst, adr); }
2075 2079  
2076 2080    void leal32(Register dst, Address src) { leal(dst, src); }
2077 2081  
2078 2082    void test32(Register src1, AddressLiteral src2);
2079 2083  
2080 2084    void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
2081 2085    void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
2082 2086    void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
2083 2087  
2084 2088    void testptr(Register src, int32_t imm32) {  LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); }
2085 2089    void testptr(Register src1, Register src2);
2086 2090  
2087 2091    void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
2088 2092    void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
2089 2093  
2090 2094    // Calls
2091 2095  
2092 2096    void call(Label& L, relocInfo::relocType rtype);
2093 2097    void call(Register entry);
2094 2098  
2095 2099    // NOTE: this call tranfers to the effective address of entry NOT
2096 2100    // the address contained by entry. This is because this is more natural
2097 2101    // for jumps/calls.
2098 2102    void call(AddressLiteral entry);
2099 2103  
2100 2104    // Jumps
2101 2105  
2102 2106    // NOTE: these jumps tranfer to the effective address of dst NOT
2103 2107    // the address contained by dst. This is because this is more natural
2104 2108    // for jumps/calls.
2105 2109    void jump(AddressLiteral dst);
2106 2110    void jump_cc(Condition cc, AddressLiteral dst);
2107 2111  
2108 2112    // 32bit can do a case table jump in one instruction but we no longer allow the base
2109 2113    // to be installed in the Address class. This jump will tranfers to the address
2110 2114    // contained in the location described by entry (not the address of entry)
2111 2115    void jump(ArrayAddress entry);
2112 2116  
2113 2117    // Floating

↓ open down ↓

752 lines elided

↑ open up ↑

2114 2118  
2115 2119    void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
2116 2120    void andpd(XMMRegister dst, AddressLiteral src);
2117 2121  
2118 2122    void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
2119 2123    void comiss(XMMRegister dst, AddressLiteral src);
2120 2124  
2121 2125    void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
2122 2126    void comisd(XMMRegister dst, AddressLiteral src);
2123 2127  
     2128 +  void fadd_s(Address src)        { Assembler::fadd_s(src); }
     2129 +  void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); }
     2130 +
2124 2131    void fldcw(Address src) { Assembler::fldcw(src); }
2125 2132    void fldcw(AddressLiteral src);
2126 2133  
2127 2134    void fld_s(int index)   { Assembler::fld_s(index); }
2128 2135    void fld_s(Address src) { Assembler::fld_s(src); }
2129 2136    void fld_s(AddressLiteral src);
2130 2137  
2131 2138    void fld_d(Address src) { Assembler::fld_d(src); }
2132 2139    void fld_d(AddressLiteral src);
2133 2140  
2134 2141    void fld_x(Address src) { Assembler::fld_x(src); }
2135 2142    void fld_x(AddressLiteral src);
2136 2143  
     2144 +  void fmul_s(Address src)        { Assembler::fmul_s(src); }
     2145 +  void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); }
     2146 +
2137 2147    void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
2138 2148    void ldmxcsr(AddressLiteral src);
2139 2149  
2140 2150  private:
2141 2151    // these are private because users should be doing movflt/movdbl
2142 2152  
2143 2153    void movss(Address dst, XMMRegister src)     { Assembler::movss(dst, src); }
2144 2154    void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
2145 2155    void movss(XMMRegister dst, Address src)     { Assembler::movss(dst, src); }
2146 2156    void movss(XMMRegister dst, AddressLiteral src);
2147 2157  
2148 2158    void movlpd(XMMRegister dst, Address src)      {Assembler::movlpd(dst, src); }
2149 2159    void movlpd(XMMRegister dst, AddressLiteral src);
2150 2160  
2151 2161  public:
2152 2162  
2153      -  void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
2154      -  void movsd(Address dst, XMMRegister src)     { Assembler::movsd(dst, src); }
2155      -  void movsd(XMMRegister dst, Address src)     { Assembler::movsd(dst, src); }
2156      -  void movsd(XMMRegister dst, AddressLiteral src);
     2163 +  void addsd(XMMRegister dst, XMMRegister src)    { Assembler::addsd(dst, src); }
     2164 +  void addsd(XMMRegister dst, Address src)        { Assembler::addsd(dst, src); }
     2165 +  void addsd(XMMRegister dst, AddressLiteral src) { Assembler::addsd(dst, as_Address(src)); }
     2166 +
     2167 +  void addss(XMMRegister dst, XMMRegister src)    { Assembler::addss(dst, src); }
     2168 +  void addss(XMMRegister dst, Address src)        { Assembler::addss(dst, src); }
     2169 +  void addss(XMMRegister dst, AddressLiteral src) { Assembler::addss(dst, as_Address(src)); }
     2170 +
     2171 +  void divsd(XMMRegister dst, XMMRegister src)    { Assembler::divsd(dst, src); }
     2172 +  void divsd(XMMRegister dst, Address src)        { Assembler::divsd(dst, src); }
     2173 +  void divsd(XMMRegister dst, AddressLiteral src) { Assembler::divsd(dst, as_Address(src)); }
     2174 +
     2175 +  void divss(XMMRegister dst, XMMRegister src)    { Assembler::divss(dst, src); }
     2176 +  void divss(XMMRegister dst, Address src)        { Assembler::divss(dst, src); }
     2177 +  void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); }
     2178 +
     2179 +  void movsd(XMMRegister dst, XMMRegister src)    { Assembler::movsd(dst, src); }
     2180 +  void movsd(Address dst, XMMRegister src)        { Assembler::movsd(dst, src); }
     2181 +  void movsd(XMMRegister dst, Address src)        { Assembler::movsd(dst, src); }
     2182 +  void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); }
     2183 +
     2184 +  void mulsd(XMMRegister dst, XMMRegister src)    { Assembler::mulsd(dst, src); }
     2185 +  void mulsd(XMMRegister dst, Address src)        { Assembler::mulsd(dst, src); }
     2186 +  void mulsd(XMMRegister dst, AddressLiteral src) { Assembler::mulsd(dst, as_Address(src)); }
     2187 +
     2188 +  void mulss(XMMRegister dst, XMMRegister src)    { Assembler::mulss(dst, src); }
     2189 +  void mulss(XMMRegister dst, Address src)        { Assembler::mulss(dst, src); }
     2190 +  void mulss(XMMRegister dst, AddressLiteral src) { Assembler::mulss(dst, as_Address(src)); }
     2191 +
     2192 +  void sqrtsd(XMMRegister dst, XMMRegister src)    { Assembler::sqrtsd(dst, src); }
     2193 +  void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
     2194 +  void sqrtsd(XMMRegister dst, AddressLiteral src) { Assembler::sqrtsd(dst, as_Address(src)); }
     2195 +
     2196 +  void sqrtss(XMMRegister dst, XMMRegister src)    { Assembler::sqrtss(dst, src); }
     2197 +  void sqrtss(XMMRegister dst, Address src)        { Assembler::sqrtss(dst, src); }
     2198 +  void sqrtss(XMMRegister dst, AddressLiteral src) { Assembler::sqrtss(dst, as_Address(src)); }
     2199 +
     2200 +  void subsd(XMMRegister dst, XMMRegister src)    { Assembler::subsd(dst, src); }
     2201 +  void subsd(XMMRegister dst, Address src)        { Assembler::subsd(dst, src); }
     2202 +  void subsd(XMMRegister dst, AddressLiteral src) { Assembler::subsd(dst, as_Address(src)); }
     2203 +
     2204 +  void subss(XMMRegister dst, XMMRegister src)    { Assembler::subss(dst, src); }
     2205 +  void subss(XMMRegister dst, Address src)        { Assembler::subss(dst, src); }
     2206 +  void subss(XMMRegister dst, AddressLiteral src) { Assembler::subss(dst, as_Address(src)); }
2157 2207  
2158 2208    void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
2159 2209    void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
2160 2210    void ucomiss(XMMRegister dst, AddressLiteral src);
2161 2211  
2162 2212    void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
2163 2213    void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
2164 2214    void ucomisd(XMMRegister dst, AddressLiteral src);
2165 2215  
2166 2216    // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values

2167 2217    void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
2168 2218    void xorpd(XMMRegister dst, Address src)     { Assembler::xorpd(dst, src); }
2169 2219    void xorpd(XMMRegister dst, AddressLiteral src);
2170 2220  
2171 2221    // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
2172 2222    void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
2173 2223    void xorps(XMMRegister dst, Address src)     { Assembler::xorps(dst, src); }
2174 2224    void xorps(XMMRegister dst, AddressLiteral src);
2175 2225  
2176 2226    // Data
2177 2227  
2178 2228    void cmov(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmovl(cc, dst, src)); }
2179 2229  
2180 2230    void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmovl(cc, dst, src)); }
2181 2231    void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmovl(cc, dst, src)); }
2182 2232  
2183 2233    void movoop(Register dst, jobject obj);
2184 2234    void movoop(Address dst, jobject obj);
2185 2235  
2186 2236    void movptr(ArrayAddress dst, Register src);
2187 2237    // can this do an lea?
2188 2238    void movptr(Register dst, ArrayAddress src);
2189 2239  
2190 2240    void movptr(Register dst, Address src);
2191 2241  
2192 2242    void movptr(Register dst, AddressLiteral src);
2193 2243  
2194 2244    void movptr(Register dst, intptr_t src);
2195 2245    void movptr(Register dst, Register src);
2196 2246    void movptr(Address dst, intptr_t src);
2197 2247  
2198 2248    void movptr(Address dst, Register src);
2199 2249  
2200 2250  #ifdef _LP64
2201 2251    // Generally the next two are only used for moving NULL
2202 2252    // Although there are situations in initializing the mark word where
2203 2253    // they could be used. They are dangerous.
2204 2254  
2205 2255    // They only exist on LP64 so that int32_t and intptr_t are not the same
2206 2256    // and we have ambiguous declarations.
2207 2257  
2208 2258    void movptr(Address dst, int32_t imm32);
2209 2259    void movptr(Register dst, int32_t imm32);
2210 2260  #endif // _LP64
2211 2261  
2212 2262    // to avoid hiding movl
2213 2263    void mov32(AddressLiteral dst, Register src);
2214 2264    void mov32(Register dst, AddressLiteral src);
2215 2265  
2216 2266    // to avoid hiding movb
2217 2267    void movbyte(ArrayAddress dst, int src);
2218 2268  
2219 2269    // Can push value or effective address
2220 2270    void pushptr(AddressLiteral src);
2221 2271  
2222 2272    void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); }
2223 2273    void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); }
2224 2274  
2225 2275    void pushoop(jobject obj);
2226 2276  
2227 2277    // sign extend as need a l to ptr sized element
2228 2278    void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
2229 2279    void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
2230 2280  
2231 2281    // IndexOf strings.
2232 2282    void string_indexof(Register str1, Register str2,
2233 2283                        Register cnt1, Register cnt2, Register result,
2234 2284                        XMMRegister vec, Register tmp);
2235 2285  
2236 2286    // Compare strings.
2237 2287    void string_compare(Register str1, Register str2,
2238 2288                        Register cnt1, Register cnt2, Register result,
2239 2289                        XMMRegister vec1, XMMRegister vec2);
2240 2290  
2241 2291    // Compare char[] arrays.
2242 2292    void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
2243 2293                            Register limit, Register result, Register chr,
2244 2294                            XMMRegister vec1, XMMRegister vec2);
2245 2295  
2246 2296    // Fill primitive arrays
2247 2297    void generate_fill(BasicType t, bool aligned,
2248 2298                       Register to, Register value, Register count,
2249 2299                       Register rtmp, XMMRegister xtmp);
2250 2300  
2251 2301  #undef VIRTUAL
2252 2302  
2253 2303  };
2254 2304  
2255 2305  /**
2256 2306   * class SkipIfEqual:
2257 2307   *
2258 2308   * Instantiating this class will result in assembly code being output that will
2259 2309   * jump around any code emitted between the creation of the instance and it's
2260 2310   * automatic destruction at the end of a scope block, depending on the value of
2261 2311   * the flag passed to the constructor, which will be checked at run-time.
2262 2312   */
2263 2313  class SkipIfEqual {
2264 2314   private:
2265 2315    MacroAssembler* _masm;
2266 2316    Label _label;
2267 2317  
2268 2318   public:
2269 2319     SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
2270 2320     ~SkipIfEqual();
2271 2321  };
2272 2322  
2273 2323  #ifdef ASSERT
2274 2324  inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
2275 2325  #endif

↓ open down ↓

109 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX