7009266_intrinsify_get Wdiff src/cpu/x86/vm/assembler_x86.hpp

Print this page

rev 2161 : [mq]: initial-intrinsification-changes
rev 2162 : [mq]: code-review-comments-vladimir

Split	Close
Expand all
Collapse all

          --- old/src/cpu/x86/vm/assembler_x86.hpp
          +++ new/src/cpu/x86/vm/assembler_x86.hpp

   1    1  /*
   2    2   * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that
  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version
  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  #ifndef CPU_X86_VM_ASSEMBLER_X86_HPP
  26   26  #define CPU_X86_VM_ASSEMBLER_X86_HPP
  27   27  
  28   28  class BiasedLockingCounters;
  29   29  
  30   30  // Contains all the definitions needed for x86 assembly code generation.
  31   31  
  32   32  // Calling convention
  33   33  class Argument VALUE_OBJ_CLASS_SPEC {
  34   34   public:
  35   35    enum {
  36   36  #ifdef _LP64
  37   37  #ifdef _WIN64
  38   38      n_int_register_parameters_c   = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
  39   39      n_float_register_parameters_c = 4,  // xmm0 - xmm3 (c_farg0, c_farg1, ... )
  40   40  #else
  41   41      n_int_register_parameters_c   = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
  42   42      n_float_register_parameters_c = 8,  // xmm0 - xmm7 (c_farg0, c_farg1, ... )
  43   43  #endif // _WIN64
  44   44      n_int_register_parameters_j   = 6, // j_rarg0, j_rarg1, ...
  45   45      n_float_register_parameters_j = 8  // j_farg0, j_farg1, ...
  46   46  #else
  47   47      n_register_parameters = 0   // 0 registers used to pass arguments
  48   48  #endif // _LP64
  49   49    };
  50   50  };
  51   51  
  52   52  
  53   53  #ifdef _LP64
  54   54  // Symbolically name the register arguments used by the c calling convention.
  55   55  // Windows is different from linux/solaris. So much for standards...
  56   56  
  57   57  #ifdef _WIN64
  58   58  
  59   59  REGISTER_DECLARATION(Register, c_rarg0, rcx);
  60   60  REGISTER_DECLARATION(Register, c_rarg1, rdx);
  61   61  REGISTER_DECLARATION(Register, c_rarg2, r8);
  62   62  REGISTER_DECLARATION(Register, c_rarg3, r9);
  63   63  
  64   64  REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
  65   65  REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
  66   66  REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
  67   67  REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
  68   68  
  69   69  #else
  70   70  
  71   71  REGISTER_DECLARATION(Register, c_rarg0, rdi);
  72   72  REGISTER_DECLARATION(Register, c_rarg1, rsi);
  73   73  REGISTER_DECLARATION(Register, c_rarg2, rdx);
  74   74  REGISTER_DECLARATION(Register, c_rarg3, rcx);
  75   75  REGISTER_DECLARATION(Register, c_rarg4, r8);
  76   76  REGISTER_DECLARATION(Register, c_rarg5, r9);
  77   77  
  78   78  REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
  79   79  REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
  80   80  REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
  81   81  REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
  82   82  REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4);
  83   83  REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5);
  84   84  REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6);
  85   85  REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7);
  86   86  
  87   87  #endif // _WIN64
  88   88  
  89   89  // Symbolically name the register arguments used by the Java calling convention.
  90   90  // We have control over the convention for java so we can do what we please.
  91   91  // What pleases us is to offset the java calling convention so that when
  92   92  // we call a suitable jni method the arguments are lined up and we don't
  93   93  // have to do little shuffling. A suitable jni method is non-static and a
  94   94  // small number of arguments (two fewer args on windows)
  95   95  //
  96   96  //        |-------------------------------------------------------|
  97   97  //        | c_rarg0   c_rarg1  c_rarg2 c_rarg3 c_rarg4 c_rarg5    |
  98   98  //        |-------------------------------------------------------|
  99   99  //        | rcx       rdx      r8      r9      rdi*    rsi*       | windows (* not a c_rarg)
 100  100  //        | rdi       rsi      rdx     rcx     r8      r9         | solaris/linux
 101  101  //        |-------------------------------------------------------|
 102  102  //        | j_rarg5   j_rarg0  j_rarg1 j_rarg2 j_rarg3 j_rarg4    |
 103  103  //        |-------------------------------------------------------|
 104  104  
 105  105  REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
 106  106  REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
 107  107  REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
 108  108  // Windows runs out of register args here
 109  109  #ifdef _WIN64
 110  110  REGISTER_DECLARATION(Register, j_rarg3, rdi);
 111  111  REGISTER_DECLARATION(Register, j_rarg4, rsi);
 112  112  #else
 113  113  REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
 114  114  REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
 115  115  #endif /* _WIN64 */
 116  116  REGISTER_DECLARATION(Register, j_rarg5, c_rarg0);
 117  117  
 118  118  REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0);
 119  119  REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1);
 120  120  REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2);
 121  121  REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3);
 122  122  REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4);
 123  123  REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5);
 124  124  REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6);
 125  125  REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7);
 126  126  
 127  127  REGISTER_DECLARATION(Register, rscratch1, r10);  // volatile
 128  128  REGISTER_DECLARATION(Register, rscratch2, r11);  // volatile
 129  129  
 130  130  REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved
 131  131  REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved
 132  132  
 133  133  #else
 134  134  // rscratch1 will apear in 32bit code that is dead but of course must compile
 135  135  // Using noreg ensures if the dead code is incorrectly live and executed it
 136  136  // will cause an assertion failure
 137  137  #define rscratch1 noreg
 138  138  #define rscratch2 noreg
 139  139  
 140  140  #endif // _LP64
 141  141  
 142  142  // JSR 292 fixed register usages:
 143  143  REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp);
 144  144  
 145  145  // Address is an abstraction used to represent a memory location
 146  146  // using any of the amd64 addressing modes with one object.
 147  147  //
 148  148  // Note: A register location is represented via a Register, not
 149  149  //       via an address for efficiency & simplicity reasons.
 150  150  
 151  151  class ArrayAddress;
 152  152  
 153  153  class Address VALUE_OBJ_CLASS_SPEC {
 154  154   public:
 155  155    enum ScaleFactor {
 156  156      no_scale = -1,
 157  157      times_1  =  0,
 158  158      times_2  =  1,
 159  159      times_4  =  2,
 160  160      times_8  =  3,
 161  161      times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)
 162  162    };
 163  163    static ScaleFactor times(int size) {
 164  164      assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
 165  165      if (size == 8)  return times_8;
 166  166      if (size == 4)  return times_4;
 167  167      if (size == 2)  return times_2;
 168  168      return times_1;
 169  169    }
 170  170    static int scale_size(ScaleFactor scale) {
 171  171      assert(scale != no_scale, "");
 172  172      assert(((1 << (int)times_1) == 1 &&
 173  173              (1 << (int)times_2) == 2 &&
 174  174              (1 << (int)times_4) == 4 &&
 175  175              (1 << (int)times_8) == 8), "");
 176  176      return (1 << (int)scale);
 177  177    }
 178  178  
 179  179   private:
 180  180    Register         _base;
 181  181    Register         _index;
 182  182    ScaleFactor      _scale;
 183  183    int              _disp;
 184  184    RelocationHolder _rspec;
 185  185  
 186  186    // Easily misused constructors make them private
 187  187    // %%% can we make these go away?
 188  188    NOT_LP64(Address(address loc, RelocationHolder spec);)
 189  189    Address(int disp, address loc, relocInfo::relocType rtype);
 190  190    Address(int disp, address loc, RelocationHolder spec);
 191  191  
 192  192   public:
 193  193  
 194  194   int disp() { return _disp; }
 195  195    // creation
 196  196    Address()
 197  197      : _base(noreg),
 198  198        _index(noreg),
 199  199        _scale(no_scale),
 200  200        _disp(0) {
 201  201    }
 202  202  
 203  203    // No default displacement otherwise Register can be implicitly
 204  204    // converted to 0(Register) which is quite a different animal.
 205  205  
 206  206    Address(Register base, int disp)
 207  207      : _base(base),
 208  208        _index(noreg),
 209  209        _scale(no_scale),
 210  210        _disp(disp) {
 211  211    }
 212  212  
 213  213    Address(Register base, Register index, ScaleFactor scale, int disp = 0)
 214  214      : _base (base),
 215  215        _index(index),
 216  216        _scale(scale),
 217  217        _disp (disp) {
 218  218      assert(!index->is_valid() == (scale == Address::no_scale),
 219  219             "inconsistent address");
 220  220    }
 221  221  
 222  222    Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
 223  223      : _base (base),
 224  224        _index(index.register_or_noreg()),
 225  225        _scale(scale),
 226  226        _disp (disp + (index.constant_or_zero() * scale_size(scale))) {
 227  227      if (!index.is_register())  scale = Address::no_scale;
 228  228      assert(!_index->is_valid() == (scale == Address::no_scale),
 229  229             "inconsistent address");
 230  230    }
 231  231  
 232  232    Address plus_disp(int disp) const {
 233  233      Address a = (*this);
 234  234      a._disp += disp;
 235  235      return a;
 236  236    }
 237  237  
 238  238    // The following two overloads are used in connection with the
 239  239    // ByteSize type (see sizes.hpp).  They simplify the use of
 240  240    // ByteSize'd arguments in assembly code. Note that their equivalent
 241  241    // for the optimized build are the member functions with int disp
 242  242    // argument since ByteSize is mapped to an int type in that case.
 243  243    //
 244  244    // Note: DO NOT introduce similar overloaded functions for WordSize
 245  245    // arguments as in the optimized mode, both ByteSize and WordSize
 246  246    // are mapped to the same type and thus the compiler cannot make a
 247  247    // distinction anymore (=> compiler errors).
 248  248  
 249  249  #ifdef ASSERT
 250  250    Address(Register base, ByteSize disp)
 251  251      : _base(base),
 252  252        _index(noreg),
 253  253        _scale(no_scale),
 254  254        _disp(in_bytes(disp)) {
 255  255    }
 256  256  
 257  257    Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
 258  258      : _base(base),
 259  259        _index(index),
 260  260        _scale(scale),
 261  261        _disp(in_bytes(disp)) {
 262  262      assert(!index->is_valid() == (scale == Address::no_scale),
 263  263             "inconsistent address");
 264  264    }
 265  265  
 266  266    Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
 267  267      : _base (base),
 268  268        _index(index.register_or_noreg()),
 269  269        _scale(scale),
 270  270        _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) {
 271  271      if (!index.is_register())  scale = Address::no_scale;
 272  272      assert(!_index->is_valid() == (scale == Address::no_scale),
 273  273             "inconsistent address");
 274  274    }
 275  275  
 276  276  #endif // ASSERT
 277  277  
 278  278    // accessors
 279  279    bool        uses(Register reg) const { return _base == reg || _index == reg; }
 280  280    Register    base()             const { return _base;  }
 281  281    Register    index()            const { return _index; }
 282  282    ScaleFactor scale()            const { return _scale; }
 283  283    int         disp()             const { return _disp;  }
 284  284  
 285  285    // Convert the raw encoding form into the form expected by the constructor for
 286  286    // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 287  287    // that to noreg for the Address constructor.
 288  288    static Address make_raw(int base, int index, int scale, int disp, bool disp_is_oop);
 289  289  
 290  290    static Address make_array(ArrayAddress);
 291  291  
 292  292   private:
 293  293    bool base_needs_rex() const {
 294  294      return _base != noreg && _base->encoding() >= 8;
 295  295    }
 296  296  
 297  297    bool index_needs_rex() const {
 298  298      return _index != noreg &&_index->encoding() >= 8;
 299  299    }
 300  300  
 301  301    relocInfo::relocType reloc() const { return _rspec.type(); }
 302  302  
 303  303    friend class Assembler;
 304  304    friend class MacroAssembler;
 305  305    friend class LIR_Assembler; // base/index/scale/disp
 306  306  };
 307  307  
 308  308  //
 309  309  // AddressLiteral has been split out from Address because operands of this type
 310  310  // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
 311  311  // the few instructions that need to deal with address literals are unique and the
 312  312  // MacroAssembler does not have to implement every instruction in the Assembler
 313  313  // in order to search for address literals that may need special handling depending
 314  314  // on the instruction and the platform. As small step on the way to merging i486/amd64
 315  315  // directories.
 316  316  //
 317  317  class AddressLiteral VALUE_OBJ_CLASS_SPEC {
 318  318    friend class ArrayAddress;
 319  319    RelocationHolder _rspec;
 320  320    // Typically we use AddressLiterals we want to use their rval
 321  321    // However in some situations we want the lval (effect address) of the item.
 322  322    // We provide a special factory for making those lvals.
 323  323    bool _is_lval;
 324  324  
 325  325    // If the target is far we'll need to load the ea of this to
 326  326    // a register to reach it. Otherwise if near we can do rip
 327  327    // relative addressing.
 328  328  
 329  329    address          _target;
 330  330  
 331  331   protected:
 332  332    // creation
 333  333    AddressLiteral()
 334  334      : _is_lval(false),
 335  335        _target(NULL)
 336  336    {}
 337  337  
 338  338    public:
 339  339  
 340  340  
 341  341    AddressLiteral(address target, relocInfo::relocType rtype);
 342  342  
 343  343    AddressLiteral(address target, RelocationHolder const& rspec)
 344  344      : _rspec(rspec),
 345  345        _is_lval(false),
 346  346        _target(target)
 347  347    {}
 348  348  
 349  349    AddressLiteral addr() {
 350  350      AddressLiteral ret = *this;
 351  351      ret._is_lval = true;
 352  352      return ret;
 353  353    }
 354  354  
 355  355  
 356  356   private:
 357  357  
 358  358    address target() { return _target; }
 359  359    bool is_lval() { return _is_lval; }
 360  360  
 361  361    relocInfo::relocType reloc() const { return _rspec.type(); }
 362  362    const RelocationHolder& rspec() const { return _rspec; }
 363  363  
 364  364    friend class Assembler;
 365  365    friend class MacroAssembler;
 366  366    friend class Address;
 367  367    friend class LIR_Assembler;
 368  368  };
 369  369  
 370  370  // Convience classes
 371  371  class RuntimeAddress: public AddressLiteral {
 372  372  
 373  373    public:
 374  374  
 375  375    RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
 376  376  
 377  377  };
 378  378  
 379  379  class OopAddress: public AddressLiteral {
 380  380  
 381  381    public:
 382  382  
 383  383    OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
 384  384  
 385  385  };
 386  386  
 387  387  class ExternalAddress: public AddressLiteral {
 388  388  
 389  389    public:
 390  390  
 391  391    ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
 392  392  
 393  393  };
 394  394  
 395  395  class InternalAddress: public AddressLiteral {
 396  396  
 397  397    public:
 398  398  
 399  399    InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
 400  400  
 401  401  };
 402  402  
 403  403  // x86 can do array addressing as a single operation since disp can be an absolute
 404  404  // address amd64 can't. We create a class that expresses the concept but does extra
 405  405  // magic on amd64 to get the final result
 406  406  
 407  407  class ArrayAddress VALUE_OBJ_CLASS_SPEC {
 408  408    private:
 409  409  
 410  410    AddressLiteral _base;
 411  411    Address        _index;
 412  412  
 413  413    public:
 414  414  
 415  415    ArrayAddress() {};
 416  416    ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
 417  417    AddressLiteral base() { return _base; }
 418  418    Address index() { return _index; }
 419  419  
 420  420  };
 421  421  
 422  422  const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize);
 423  423  
 424  424  // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
 425  425  // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
 426  426  // is what you get. The Assembler is generating code into a CodeBuffer.
 427  427  
 428  428  class Assembler : public AbstractAssembler  {
 429  429    friend class AbstractAssembler; // for the non-virtual hack
 430  430    friend class LIR_Assembler; // as_Address()
 431  431    friend class StubGenerator;
 432  432  
 433  433   public:
 434  434    enum Condition {                     // The x86 condition codes used for conditional jumps/moves.
 435  435      zero          = 0x4,
 436  436      notZero       = 0x5,
 437  437      equal         = 0x4,
 438  438      notEqual      = 0x5,
 439  439      less          = 0xc,
 440  440      lessEqual     = 0xe,
 441  441      greater       = 0xf,
 442  442      greaterEqual  = 0xd,
 443  443      below         = 0x2,
 444  444      belowEqual    = 0x6,
 445  445      above         = 0x7,
 446  446      aboveEqual    = 0x3,
 447  447      overflow      = 0x0,
 448  448      noOverflow    = 0x1,
 449  449      carrySet      = 0x2,
 450  450      carryClear    = 0x3,
 451  451      negative      = 0x8,
 452  452      positive      = 0x9,
 453  453      parity        = 0xa,
 454  454      noParity      = 0xb
 455  455    };
 456  456  
 457  457    enum Prefix {
 458  458      // segment overrides
 459  459      CS_segment = 0x2e,
 460  460      SS_segment = 0x36,
 461  461      DS_segment = 0x3e,
 462  462      ES_segment = 0x26,
 463  463      FS_segment = 0x64,
 464  464      GS_segment = 0x65,
 465  465  
 466  466      REX        = 0x40,
 467  467  
 468  468      REX_B      = 0x41,
 469  469      REX_X      = 0x42,
 470  470      REX_XB     = 0x43,
 471  471      REX_R      = 0x44,
 472  472      REX_RB     = 0x45,
 473  473      REX_RX     = 0x46,
 474  474      REX_RXB    = 0x47,
 475  475  
 476  476      REX_W      = 0x48,
 477  477  
 478  478      REX_WB     = 0x49,
 479  479      REX_WX     = 0x4A,
 480  480      REX_WXB    = 0x4B,
 481  481      REX_WR     = 0x4C,
 482  482      REX_WRB    = 0x4D,
 483  483      REX_WRX    = 0x4E,
 484  484      REX_WRXB   = 0x4F
 485  485    };
 486  486  
 487  487    enum WhichOperand {
 488  488      // input to locate_operand, and format code for relocations
 489  489      imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
 490  490      disp32_operand = 1,          // embedded 32-bit displacement or address
 491  491      call32_operand = 2,          // embedded 32-bit self-relative displacement
 492  492  #ifndef _LP64
 493  493      _WhichOperand_limit = 3
 494  494  #else
 495  495       narrow_oop_operand = 3,     // embedded 32-bit immediate narrow oop
 496  496      _WhichOperand_limit = 4
 497  497  #endif
 498  498    };
 499  499  
 500  500  
 501  501  
 502  502    // NOTE: The general philopsophy of the declarations here is that 64bit versions
 503  503    // of instructions are freely declared without the need for wrapping them an ifdef.
 504  504    // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
 505  505    // In the .cpp file the implementations are wrapped so that they are dropped out
 506  506    // of the resulting jvm. This is done mostly to keep the footprint of KERNEL
 507  507    // to the size it was prior to merging up the 32bit and 64bit assemblers.
 508  508    //
 509  509    // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
 510  510    // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
 511  511  
 512  512  private:
 513  513  
 514  514  
 515  515    // 64bit prefixes
 516  516    int prefix_and_encode(int reg_enc, bool byteinst = false);
 517  517    int prefixq_and_encode(int reg_enc);
 518  518  
 519  519    int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);
 520  520    int prefixq_and_encode(int dst_enc, int src_enc);
 521  521  
 522  522    void prefix(Register reg);
 523  523    void prefix(Address adr);
 524  524    void prefixq(Address adr);
 525  525  
 526  526    void prefix(Address adr, Register reg,  bool byteinst = false);
 527  527    void prefixq(Address adr, Register reg);
 528  528  
 529  529    void prefix(Address adr, XMMRegister reg);
 530  530  
 531  531    void prefetch_prefix(Address src);
 532  532  
 533  533    // Helper functions for groups of instructions
 534  534    void emit_arith_b(int op1, int op2, Register dst, int imm8);
 535  535  
 536  536    void emit_arith(int op1, int op2, Register dst, int32_t imm32);
 537  537    // only 32bit??
 538  538    void emit_arith(int op1, int op2, Register dst, jobject obj);
 539  539    void emit_arith(int op1, int op2, Register dst, Register src);
 540  540  
 541  541    void emit_operand(Register reg,
 542  542                      Register base, Register index, Address::ScaleFactor scale,
 543  543                      int disp,
 544  544                      RelocationHolder const& rspec,
 545  545                      int rip_relative_correction = 0);
 546  546  
 547  547    void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
 548  548  
 549  549    // operands that only take the original 32bit registers
 550  550    void emit_operand32(Register reg, Address adr);
 551  551  
 552  552    void emit_operand(XMMRegister reg,
 553  553                      Register base, Register index, Address::ScaleFactor scale,
 554  554                      int disp,
 555  555                      RelocationHolder const& rspec);
 556  556  
 557  557    void emit_operand(XMMRegister reg, Address adr);
 558  558  
 559  559    void emit_operand(MMXRegister reg, Address adr);
 560  560  
 561  561    // workaround gcc (3.2.1-7) bug
 562  562    void emit_operand(Address adr, MMXRegister reg);
 563  563  
 564  564  
 565  565    // Immediate-to-memory forms
 566  566    void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
 567  567  
 568  568    void emit_farith(int b1, int b2, int i);
 569  569  
 570  570  
 571  571   protected:
 572  572    #ifdef ASSERT
 573  573    void check_relocation(RelocationHolder const& rspec, int format);
 574  574    #endif
 575  575  
 576  576    inline void emit_long64(jlong x);
 577  577  
 578  578    void emit_data(jint data, relocInfo::relocType    rtype, int format);
 579  579    void emit_data(jint data, RelocationHolder const& rspec, int format);
 580  580    void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
 581  581    void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
 582  582  
 583  583  
 584  584    bool reachable(AddressLiteral adr) NOT_LP64({ return true;});
 585  585  
 586  586    // These are all easily abused and hence protected
 587  587  
 588  588    // 32BIT ONLY SECTION
 589  589  #ifndef _LP64
 590  590    // Make these disappear in 64bit mode since they would never be correct
 591  591    void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec);   // 32BIT ONLY
 592  592    void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
 593  593  
 594  594    void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
 595  595    void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec);     // 32BIT ONLY
 596  596  
 597  597    void push_literal32(int32_t imm32, RelocationHolder const& rspec);                 // 32BIT ONLY
 598  598  #else
 599  599    // 64BIT ONLY SECTION
 600  600    void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec);   // 64BIT ONLY
 601  601  
 602  602    void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
 603  603    void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
 604  604  
 605  605    void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
 606  606    void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
 607  607  #endif // _LP64
 608  608  
 609  609    // These are unique in that we are ensured by the caller that the 32bit
 610  610    // relative in these instructions will always be able to reach the potentially
 611  611    // 64bit address described by entry. Since they can take a 64bit address they
 612  612    // don't have the 32 suffix like the other instructions in this class.
 613  613  
 614  614    void call_literal(address entry, RelocationHolder const& rspec);
 615  615    void jmp_literal(address entry, RelocationHolder const& rspec);
 616  616  
 617  617    // Avoid using directly section
 618  618    // Instructions in this section are actually usable by anyone without danger
 619  619    // of failure but have performance issues that are addressed my enhanced
 620  620    // instructions which will do the proper thing base on the particular cpu.
 621  621    // We protect them because we don't trust you...
 622  622  
 623  623    // Don't use next inc() and dec() methods directly. INC & DEC instructions
 624  624    // could cause a partial flag stall since they don't set CF flag.
 625  625    // Use MacroAssembler::decrement() & MacroAssembler::increment() methods
 626  626    // which call inc() & dec() or add() & sub() in accordance with
 627  627    // the product flag UseIncDec value.
 628  628  
 629  629    void decl(Register dst);
 630  630    void decl(Address dst);
 631  631    void decq(Register dst);
 632  632    void decq(Address dst);
 633  633  
 634  634    void incl(Register dst);
 635  635    void incl(Address dst);
 636  636    void incq(Register dst);
 637  637    void incq(Address dst);
 638  638  
 639  639    // New cpus require use of movsd and movss to avoid partial register stall
 640  640    // when loading from memory. But for old Opteron use movlpd instead of movsd.
 641  641    // The selection is done in MacroAssembler::movdbl() and movflt().
 642  642  
 643  643    // Move Scalar Single-Precision Floating-Point Values
 644  644    void movss(XMMRegister dst, Address src);
 645  645    void movss(XMMRegister dst, XMMRegister src);
 646  646    void movss(Address dst, XMMRegister src);
 647  647  
 648  648    // Move Scalar Double-Precision Floating-Point Values
 649  649    void movsd(XMMRegister dst, Address src);
 650  650    void movsd(XMMRegister dst, XMMRegister src);
 651  651    void movsd(Address dst, XMMRegister src);
 652  652    void movlpd(XMMRegister dst, Address src);
 653  653  
 654  654    // New cpus require use of movaps and movapd to avoid partial register stall
 655  655    // when moving between registers.
 656  656    void movaps(XMMRegister dst, XMMRegister src);
 657  657    void movapd(XMMRegister dst, XMMRegister src);
 658  658  
 659  659    // End avoid using directly
 660  660  
 661  661  
 662  662    // Instruction prefixes
 663  663    void prefix(Prefix p);
 664  664  
 665  665    public:
 666  666  
 667  667    // Creation
 668  668    Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
 669  669  
 670  670    // Decoding
 671  671    static address locate_operand(address inst, WhichOperand which);
 672  672    static address locate_next_instruction(address inst);
 673  673  
 674  674    // Utilities
 675  675  
 676  676  #ifdef _LP64
 677  677   static bool is_simm(int64_t x, int nbits) { return -(CONST64(1) << (nbits-1)) <= x &&
 678  678                                                      x < (CONST64(1) << (nbits-1)); }
 679  679   static bool is_simm32(int64_t x) { return x == (int64_t)(int32_t)x; }
 680  680  #else
 681  681   static bool is_simm(int32_t x, int nbits) { return -(1 << (nbits-1)) <= x &&
 682  682                                                      x < (1 << (nbits-1)); }
 683  683   static bool is_simm32(int32_t x) { return true; }
 684  684  #endif // _LP64
 685  685  
 686  686    // Generic instructions
 687  687    // Does 32bit or 64bit as needed for the platform. In some sense these
 688  688    // belong in macro assembler but there is no need for both varieties to exist
 689  689  
 690  690    void lea(Register dst, Address src);
 691  691  
 692  692    void mov(Register dst, Register src);
 693  693  
 694  694    void pusha();
 695  695    void popa();
 696  696  
 697  697    void pushf();
 698  698    void popf();
 699  699  
 700  700    void push(int32_t imm32);
 701  701  
 702  702    void push(Register src);
 703  703  
 704  704    void pop(Register dst);
 705  705  
 706  706    // These are dummies to prevent surprise implicit conversions to Register
 707  707    void push(void* v);
 708  708    void pop(void* v);
 709  709  
 710  710    // These do register sized moves/scans
 711  711    void rep_mov();
 712  712    void rep_set();
 713  713    void repne_scan();
 714  714  #ifdef _LP64
 715  715    void repne_scanl();
 716  716  #endif
 717  717  
 718  718    // Vanilla instructions in lexical order
 719  719  
 720  720    void adcl(Address dst, int32_t imm32);
 721  721    void adcl(Address dst, Register src);
 722  722    void adcl(Register dst, int32_t imm32);
 723  723    void adcl(Register dst, Address src);
 724  724    void adcl(Register dst, Register src);
 725  725  
 726  726    void adcq(Register dst, int32_t imm32);
 727  727    void adcq(Register dst, Address src);
 728  728    void adcq(Register dst, Register src);
 729  729  
 730  730    void addl(Address dst, int32_t imm32);
 731  731    void addl(Address dst, Register src);
 732  732    void addl(Register dst, int32_t imm32);
 733  733    void addl(Register dst, Address src);
 734  734    void addl(Register dst, Register src);
 735  735  
 736  736    void addq(Address dst, int32_t imm32);
 737  737    void addq(Address dst, Register src);
 738  738    void addq(Register dst, int32_t imm32);
 739  739    void addq(Register dst, Address src);
 740  740    void addq(Register dst, Register src);
 741  741  
 742  742    void addr_nop_4();
 743  743    void addr_nop_5();
 744  744    void addr_nop_7();
 745  745    void addr_nop_8();
 746  746  
 747  747    // Add Scalar Double-Precision Floating-Point Values
 748  748    void addsd(XMMRegister dst, Address src);
 749  749    void addsd(XMMRegister dst, XMMRegister src);
 750  750  
 751  751    // Add Scalar Single-Precision Floating-Point Values
 752  752    void addss(XMMRegister dst, Address src);
 753  753    void addss(XMMRegister dst, XMMRegister src);
 754  754  
 755  755    void andl(Register dst, int32_t imm32);
 756  756    void andl(Register dst, Address src);
 757  757    void andl(Register dst, Register src);
 758  758  
 759  759    void andq(Register dst, int32_t imm32);
 760  760    void andq(Register dst, Address src);
 761  761    void andq(Register dst, Register src);
 762  762  
 763  763    // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
 764  764    void andpd(XMMRegister dst, Address src);
 765  765    void andpd(XMMRegister dst, XMMRegister src);
 766  766  
 767  767    void bsfl(Register dst, Register src);
 768  768    void bsrl(Register dst, Register src);
 769  769  
 770  770  #ifdef _LP64
 771  771    void bsfq(Register dst, Register src);
 772  772    void bsrq(Register dst, Register src);
 773  773  #endif
 774  774  
 775  775    void bswapl(Register reg);
 776  776  
 777  777    void bswapq(Register reg);
 778  778  
 779  779    void call(Label& L, relocInfo::relocType rtype);
 780  780    void call(Register reg);  // push pc; pc <- reg
 781  781    void call(Address adr);   // push pc; pc <- adr
 782  782  
 783  783    void cdql();
 784  784  
 785  785    void cdqq();
 786  786  
 787  787    void cld() { emit_byte(0xfc); }
 788  788  
 789  789    void clflush(Address adr);
 790  790  
 791  791    void cmovl(Condition cc, Register dst, Register src);
 792  792    void cmovl(Condition cc, Register dst, Address src);
 793  793  
 794  794    void cmovq(Condition cc, Register dst, Register src);
 795  795    void cmovq(Condition cc, Register dst, Address src);
 796  796  
 797  797  
 798  798    void cmpb(Address dst, int imm8);
 799  799  
 800  800    void cmpl(Address dst, int32_t imm32);
 801  801  
 802  802    void cmpl(Register dst, int32_t imm32);
 803  803    void cmpl(Register dst, Register src);
 804  804    void cmpl(Register dst, Address src);
 805  805  
 806  806    void cmpq(Address dst, int32_t imm32);
 807  807    void cmpq(Address dst, Register src);
 808  808  
 809  809    void cmpq(Register dst, int32_t imm32);
 810  810    void cmpq(Register dst, Register src);
 811  811    void cmpq(Register dst, Address src);
 812  812  
 813  813    // these are dummies used to catch attempting to convert NULL to Register
 814  814    void cmpl(Register dst, void* junk); // dummy
 815  815    void cmpq(Register dst, void* junk); // dummy
 816  816  
 817  817    void cmpw(Address dst, int imm16);
 818  818  
 819  819    void cmpxchg8 (Address adr);
 820  820  
 821  821    void cmpxchgl(Register reg, Address adr);
 822  822  
 823  823    void cmpxchgq(Register reg, Address adr);
 824  824  
 825  825    // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
 826  826    void comisd(XMMRegister dst, Address src);
 827  827  
 828  828    // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
 829  829    void comiss(XMMRegister dst, Address src);
 830  830  
 831  831    // Identify processor type and features
 832  832    void cpuid() {
 833  833      emit_byte(0x0F);
 834  834      emit_byte(0xA2);
 835  835    }
 836  836  
 837  837    // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
 838  838    void cvtsd2ss(XMMRegister dst, XMMRegister src);
 839  839  
 840  840    // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
 841  841    void cvtsi2sdl(XMMRegister dst, Register src);
 842  842    void cvtsi2sdq(XMMRegister dst, Register src);
 843  843  
 844  844    // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
 845  845    void cvtsi2ssl(XMMRegister dst, Register src);
 846  846    void cvtsi2ssq(XMMRegister dst, Register src);
 847  847  
 848  848    // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
 849  849    void cvtdq2pd(XMMRegister dst, XMMRegister src);
 850  850  
 851  851    // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
 852  852    void cvtdq2ps(XMMRegister dst, XMMRegister src);
 853  853  
 854  854    // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
 855  855    void cvtss2sd(XMMRegister dst, XMMRegister src);
 856  856  
 857  857    // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
 858  858    void cvttsd2sil(Register dst, Address src);
 859  859    void cvttsd2sil(Register dst, XMMRegister src);
 860  860    void cvttsd2siq(Register dst, XMMRegister src);
 861  861  
 862  862    // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
 863  863    void cvttss2sil(Register dst, XMMRegister src);
 864  864    void cvttss2siq(Register dst, XMMRegister src);
 865  865  
 866  866    // Divide Scalar Double-Precision Floating-Point Values
 867  867    void divsd(XMMRegister dst, Address src);
 868  868    void divsd(XMMRegister dst, XMMRegister src);
 869  869  
 870  870    // Divide Scalar Single-Precision Floating-Point Values
 871  871    void divss(XMMRegister dst, Address src);
 872  872    void divss(XMMRegister dst, XMMRegister src);
 873  873  
 874  874    void emms();
 875  875  
 876  876    void fabs();
 877  877  
 878  878    void fadd(int i);
 879  879  
 880  880    void fadd_d(Address src);
 881  881    void fadd_s(Address src);
 882  882  
 883  883    // "Alternate" versions of x87 instructions place result down in FPU
 884  884    // stack instead of on TOS
 885  885  
 886  886    void fadda(int i); // "alternate" fadd
 887  887    void faddp(int i = 1);
 888  888  
 889  889    void fchs();
 890  890  
 891  891    void fcom(int i);
 892  892  
 893  893    void fcomp(int i = 1);
 894  894    void fcomp_d(Address src);
 895  895    void fcomp_s(Address src);
 896  896  
 897  897    void fcompp();
 898  898  
 899  899    void fcos();
 900  900  
 901  901    void fdecstp();
 902  902  
 903  903    void fdiv(int i);
 904  904    void fdiv_d(Address src);
 905  905    void fdivr_s(Address src);
 906  906    void fdiva(int i);  // "alternate" fdiv
 907  907    void fdivp(int i = 1);
 908  908  
 909  909    void fdivr(int i);
 910  910    void fdivr_d(Address src);
 911  911    void fdiv_s(Address src);
 912  912  
 913  913    void fdivra(int i); // "alternate" reversed fdiv
 914  914  
 915  915    void fdivrp(int i = 1);
 916  916  
 917  917    void ffree(int i = 0);
 918  918  
 919  919    void fild_d(Address adr);
 920  920    void fild_s(Address adr);
 921  921  
 922  922    void fincstp();
 923  923  
 924  924    void finit();
 925  925  
 926  926    void fist_s (Address adr);
 927  927    void fistp_d(Address adr);
 928  928    void fistp_s(Address adr);
 929  929  
 930  930    void fld1();
 931  931  
 932  932    void fld_d(Address adr);
 933  933    void fld_s(Address adr);
 934  934    void fld_s(int index);
 935  935    void fld_x(Address adr);  // extended-precision (80-bit) format
 936  936  
 937  937    void fldcw(Address src);
 938  938  
 939  939    void fldenv(Address src);
 940  940  
 941  941    void fldlg2();
 942  942  
 943  943    void fldln2();
 944  944  
 945  945    void fldz();
 946  946  
 947  947    void flog();
 948  948    void flog10();
 949  949  
 950  950    void fmul(int i);
 951  951  
 952  952    void fmul_d(Address src);
 953  953    void fmul_s(Address src);
 954  954  
 955  955    void fmula(int i);  // "alternate" fmul
 956  956  
 957  957    void fmulp(int i = 1);
 958  958  
 959  959    void fnsave(Address dst);
 960  960  
 961  961    void fnstcw(Address src);
 962  962  
 963  963    void fnstsw_ax();
 964  964  
 965  965    void fprem();
 966  966    void fprem1();
 967  967  
 968  968    void frstor(Address src);
 969  969  
 970  970    void fsin();
 971  971  
 972  972    void fsqrt();
 973  973  
 974  974    void fst_d(Address adr);
 975  975    void fst_s(Address adr);
 976  976  
 977  977    void fstp_d(Address adr);
 978  978    void fstp_d(int index);
 979  979    void fstp_s(Address adr);
 980  980    void fstp_x(Address adr); // extended-precision (80-bit) format
 981  981  
 982  982    void fsub(int i);
 983  983    void fsub_d(Address src);
 984  984    void fsub_s(Address src);
 985  985  
 986  986    void fsuba(int i);  // "alternate" fsub
 987  987  
 988  988    void fsubp(int i = 1);
 989  989  
 990  990    void fsubr(int i);
 991  991    void fsubr_d(Address src);
 992  992    void fsubr_s(Address src);
 993  993  
 994  994    void fsubra(int i); // "alternate" reversed fsub
 995  995  
 996  996    void fsubrp(int i = 1);
 997  997  
 998  998    void ftan();
 999  999  
1000 1000    void ftst();
1001 1001  
1002 1002    void fucomi(int i = 1);
1003 1003    void fucomip(int i = 1);
1004 1004  
1005 1005    void fwait();
1006 1006  
1007 1007    void fxch(int i = 1);
1008 1008  
1009 1009    void fxrstor(Address src);
1010 1010  
1011 1011    void fxsave(Address dst);
1012 1012  
1013 1013    void fyl2x();
1014 1014  
1015 1015    void hlt();
1016 1016  
1017 1017    void idivl(Register src);
1018 1018    void divl(Register src); // Unsigned division
1019 1019  
1020 1020    void idivq(Register src);
1021 1021  
1022 1022    void imull(Register dst, Register src);
1023 1023    void imull(Register dst, Register src, int value);
1024 1024  
1025 1025    void imulq(Register dst, Register src);
1026 1026    void imulq(Register dst, Register src, int value);
1027 1027  
1028 1028  
1029 1029    // jcc is the generic conditional branch generator to run-
1030 1030    // time routines, jcc is used for branches to labels. jcc
1031 1031    // takes a branch opcode (cc) and a label (L) and generates
1032 1032    // either a backward branch or a forward branch and links it
1033 1033    // to the label fixup chain. Usage:
1034 1034    //
1035 1035    // Label L;      // unbound label
1036 1036    // jcc(cc, L);   // forward branch to unbound label
1037 1037    // bind(L);      // bind label to the current pc
1038 1038    // jcc(cc, L);   // backward branch to bound label
1039 1039    // bind(L);      // illegal: a label may be bound only once
1040 1040    //
1041 1041    // Note: The same Label can be used for forward and backward branches
1042 1042    // but it may be bound only once.
1043 1043  
1044 1044    void jcc(Condition cc, Label& L,
1045 1045             relocInfo::relocType rtype = relocInfo::none);
1046 1046  
1047 1047    // Conditional jump to a 8-bit offset to L.
1048 1048    // WARNING: be very careful using this for forward jumps.  If the label is
1049 1049    // not bound within an 8-bit offset of this instruction, a run-time error
1050 1050    // will occur.
1051 1051    void jccb(Condition cc, Label& L);
1052 1052  
1053 1053    void jmp(Address entry);    // pc <- entry
1054 1054  
1055 1055    // Label operations & relative jumps (PPUM Appendix D)
1056 1056    void jmp(Label& L, relocInfo::relocType rtype = relocInfo::none);   // unconditional jump to L
1057 1057  
1058 1058    void jmp(Register entry); // pc <- entry
1059 1059  
1060 1060    // Unconditional 8-bit offset jump to L.
1061 1061    // WARNING: be very careful using this for forward jumps.  If the label is
1062 1062    // not bound within an 8-bit offset of this instruction, a run-time error
1063 1063    // will occur.
1064 1064    void jmpb(Label& L);
1065 1065  
1066 1066    void ldmxcsr( Address src );
1067 1067  
1068 1068    void leal(Register dst, Address src);
1069 1069  
1070 1070    void leaq(Register dst, Address src);
1071 1071  
1072 1072    void lfence() {
1073 1073      emit_byte(0x0F);
1074 1074      emit_byte(0xAE);
1075 1075      emit_byte(0xE8);
1076 1076    }
1077 1077  
1078 1078    void lock();
1079 1079  
1080 1080    void lzcntl(Register dst, Register src);
1081 1081  
1082 1082  #ifdef _LP64
1083 1083    void lzcntq(Register dst, Register src);
1084 1084  #endif
1085 1085  
1086 1086    enum Membar_mask_bits {
1087 1087      StoreStore = 1 << 3,
1088 1088      LoadStore  = 1 << 2,
1089 1089      StoreLoad  = 1 << 1,
1090 1090      LoadLoad   = 1 << 0
1091 1091    };
1092 1092  
1093 1093    // Serializes memory and blows flags
1094 1094    void membar(Membar_mask_bits order_constraint) {
1095 1095      if (os::is_MP()) {
1096 1096        // We only have to handle StoreLoad
1097 1097        if (order_constraint & StoreLoad) {
1098 1098          // All usable chips support "locked" instructions which suffice
1099 1099          // as barriers, and are much faster than the alternative of
1100 1100          // using cpuid instruction. We use here a locked add [esp],0.
1101 1101          // This is conveniently otherwise a no-op except for blowing
1102 1102          // flags.
1103 1103          // Any change to this code may need to revisit other places in
1104 1104          // the code where this idiom is used, in particular the
1105 1105          // orderAccess code.
1106 1106          lock();
1107 1107          addl(Address(rsp, 0), 0);// Assert the lock# signal here
1108 1108        }
1109 1109      }
1110 1110    }
1111 1111  
1112 1112    void mfence();
1113 1113  
1114 1114    // Moves
1115 1115  
1116 1116    void mov64(Register dst, int64_t imm64);
1117 1117  
1118 1118    void movb(Address dst, Register src);
1119 1119    void movb(Address dst, int imm8);
1120 1120    void movb(Register dst, Address src);
1121 1121  
1122 1122    void movdl(XMMRegister dst, Register src);
1123 1123    void movdl(Register dst, XMMRegister src);
1124 1124  
1125 1125    // Move Double Quadword
1126 1126    void movdq(XMMRegister dst, Register src);
1127 1127    void movdq(Register dst, XMMRegister src);
1128 1128  
1129 1129    // Move Aligned Double Quadword
1130 1130    void movdqa(Address     dst, XMMRegister src);
1131 1131    void movdqa(XMMRegister dst, Address src);
1132 1132    void movdqa(XMMRegister dst, XMMRegister src);
1133 1133  
1134 1134    // Move Unaligned Double Quadword
1135 1135    void movdqu(Address     dst, XMMRegister src);
1136 1136    void movdqu(XMMRegister dst, Address src);
1137 1137    void movdqu(XMMRegister dst, XMMRegister src);
1138 1138  
1139 1139    void movl(Register dst, int32_t imm32);
1140 1140    void movl(Address dst, int32_t imm32);
1141 1141    void movl(Register dst, Register src);
1142 1142    void movl(Register dst, Address src);
1143 1143    void movl(Address dst, Register src);
1144 1144  
1145 1145    // These dummies prevent using movl from converting a zero (like NULL) into Register
1146 1146    // by giving the compiler two choices it can't resolve
1147 1147  
1148 1148    void movl(Address  dst, void* junk);
1149 1149    void movl(Register dst, void* junk);
1150 1150  
1151 1151  #ifdef _LP64
1152 1152    void movq(Register dst, Register src);
1153 1153    void movq(Register dst, Address src);
1154 1154    void movq(Address  dst, Register src);
1155 1155  #endif
1156 1156  
1157 1157    void movq(Address     dst, MMXRegister src );
1158 1158    void movq(MMXRegister dst, Address src );
1159 1159  
1160 1160  #ifdef _LP64
1161 1161    // These dummies prevent using movq from converting a zero (like NULL) into Register
1162 1162    // by giving the compiler two choices it can't resolve
1163 1163  
1164 1164    void movq(Address  dst, void* dummy);
1165 1165    void movq(Register dst, void* dummy);
1166 1166  #endif
1167 1167  
1168 1168    // Move Quadword
1169 1169    void movq(Address     dst, XMMRegister src);
1170 1170    void movq(XMMRegister dst, Address src);
1171 1171  
1172 1172    void movsbl(Register dst, Address src);
1173 1173    void movsbl(Register dst, Register src);
1174 1174  
1175 1175  #ifdef _LP64
1176 1176    void movsbq(Register dst, Address src);
1177 1177    void movsbq(Register dst, Register src);
1178 1178  
1179 1179    // Move signed 32bit immediate to 64bit extending sign
1180 1180    void movslq(Address  dst, int32_t imm64);
1181 1181    void movslq(Register dst, int32_t imm64);
1182 1182  
1183 1183    void movslq(Register dst, Address src);
1184 1184    void movslq(Register dst, Register src);
1185 1185    void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
1186 1186  #endif
1187 1187  
1188 1188    void movswl(Register dst, Address src);
1189 1189    void movswl(Register dst, Register src);
1190 1190  
1191 1191  #ifdef _LP64
1192 1192    void movswq(Register dst, Address src);
1193 1193    void movswq(Register dst, Register src);
1194 1194  #endif
1195 1195  
1196 1196    void movw(Address dst, int imm16);
1197 1197    void movw(Register dst, Address src);
1198 1198    void movw(Address dst, Register src);
1199 1199  
1200 1200    void movzbl(Register dst, Address src);
1201 1201    void movzbl(Register dst, Register src);
1202 1202  
1203 1203  #ifdef _LP64
1204 1204    void movzbq(Register dst, Address src);
1205 1205    void movzbq(Register dst, Register src);
1206 1206  #endif
1207 1207  
1208 1208    void movzwl(Register dst, Address src);
1209 1209    void movzwl(Register dst, Register src);
1210 1210  
1211 1211  #ifdef _LP64
1212 1212    void movzwq(Register dst, Address src);
1213 1213    void movzwq(Register dst, Register src);
1214 1214  #endif
1215 1215  
1216 1216    void mull(Address src);
1217 1217    void mull(Register src);
1218 1218  
1219 1219    // Multiply Scalar Double-Precision Floating-Point Values
1220 1220    void mulsd(XMMRegister dst, Address src);
1221 1221    void mulsd(XMMRegister dst, XMMRegister src);
1222 1222  
1223 1223    // Multiply Scalar Single-Precision Floating-Point Values
1224 1224    void mulss(XMMRegister dst, Address src);
1225 1225    void mulss(XMMRegister dst, XMMRegister src);
1226 1226  
1227 1227    void negl(Register dst);
1228 1228  
1229 1229  #ifdef _LP64
1230 1230    void negq(Register dst);
1231 1231  #endif
1232 1232  
1233 1233    void nop(int i = 1);
1234 1234  
1235 1235    void notl(Register dst);
1236 1236  
1237 1237  #ifdef _LP64
1238 1238    void notq(Register dst);
1239 1239  #endif
1240 1240  
1241 1241    void orl(Address dst, int32_t imm32);
1242 1242    void orl(Register dst, int32_t imm32);
1243 1243    void orl(Register dst, Address src);
1244 1244    void orl(Register dst, Register src);
1245 1245  
1246 1246    void orq(Address dst, int32_t imm32);
1247 1247    void orq(Register dst, int32_t imm32);
1248 1248    void orq(Register dst, Address src);
1249 1249    void orq(Register dst, Register src);
1250 1250  
1251 1251    // SSE4.2 string instructions
1252 1252    void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1253 1253    void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1254 1254  
1255 1255  #ifndef _LP64 // no 32bit push/pop on amd64
1256 1256    void popl(Address dst);
1257 1257  #endif
1258 1258  
1259 1259  #ifdef _LP64
1260 1260    void popq(Address dst);
1261 1261  #endif
1262 1262  
1263 1263    void popcntl(Register dst, Address src);
1264 1264    void popcntl(Register dst, Register src);
1265 1265  
1266 1266  #ifdef _LP64
1267 1267    void popcntq(Register dst, Address src);
1268 1268    void popcntq(Register dst, Register src);
1269 1269  #endif
1270 1270  
1271 1271    // Prefetches (SSE, SSE2, 3DNOW only)
1272 1272  
1273 1273    void prefetchnta(Address src);
1274 1274    void prefetchr(Address src);
1275 1275    void prefetcht0(Address src);
1276 1276    void prefetcht1(Address src);
1277 1277    void prefetcht2(Address src);
1278 1278    void prefetchw(Address src);
1279 1279  
1280 1280    // POR - Bitwise logical OR
1281 1281    void por(XMMRegister dst, XMMRegister src);
1282 1282  
1283 1283    // Shuffle Packed Doublewords
1284 1284    void pshufd(XMMRegister dst, XMMRegister src, int mode);
1285 1285    void pshufd(XMMRegister dst, Address src,     int mode);
1286 1286  
1287 1287    // Shuffle Packed Low Words
1288 1288    void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1289 1289    void pshuflw(XMMRegister dst, Address src,     int mode);
1290 1290  
1291 1291    // Shift Right Logical Quadword Immediate
1292 1292    void psrlq(XMMRegister dst, int shift);
1293 1293  
1294 1294    // Logical Compare Double Quadword
1295 1295    void ptest(XMMRegister dst, XMMRegister src);
1296 1296    void ptest(XMMRegister dst, Address src);
1297 1297  
1298 1298    // Interleave Low Bytes
1299 1299    void punpcklbw(XMMRegister dst, XMMRegister src);
1300 1300  
1301 1301  #ifndef _LP64 // no 32bit push/pop on amd64
1302 1302    void pushl(Address src);
1303 1303  #endif
1304 1304  
1305 1305    void pushq(Address src);
1306 1306  
1307 1307    // Xor Packed Byte Integer Values
1308 1308    void pxor(XMMRegister dst, Address src);
1309 1309    void pxor(XMMRegister dst, XMMRegister src);
1310 1310  
1311 1311    void rcll(Register dst, int imm8);
1312 1312  
1313 1313    void rclq(Register dst, int imm8);
1314 1314  
1315 1315    void ret(int imm16);
1316 1316  
1317 1317    void sahf();
1318 1318  
1319 1319    void sarl(Register dst, int imm8);
1320 1320    void sarl(Register dst);
1321 1321  
1322 1322    void sarq(Register dst, int imm8);
1323 1323    void sarq(Register dst);
1324 1324  
1325 1325    void sbbl(Address dst, int32_t imm32);
1326 1326    void sbbl(Register dst, int32_t imm32);
1327 1327    void sbbl(Register dst, Address src);
1328 1328    void sbbl(Register dst, Register src);
1329 1329  
1330 1330    void sbbq(Address dst, int32_t imm32);
1331 1331    void sbbq(Register dst, int32_t imm32);
1332 1332    void sbbq(Register dst, Address src);
1333 1333    void sbbq(Register dst, Register src);
1334 1334  
1335 1335    void setb(Condition cc, Register dst);
1336 1336  
1337 1337    void shldl(Register dst, Register src);
1338 1338  
1339 1339    void shll(Register dst, int imm8);
1340 1340    void shll(Register dst);
1341 1341  
1342 1342    void shlq(Register dst, int imm8);
1343 1343    void shlq(Register dst);
1344 1344  
1345 1345    void shrdl(Register dst, Register src);
1346 1346  
1347 1347    void shrl(Register dst, int imm8);
1348 1348    void shrl(Register dst);
1349 1349  
1350 1350    void shrq(Register dst, int imm8);
1351 1351    void shrq(Register dst);
1352 1352  
1353 1353    void smovl(); // QQQ generic?
1354 1354  
1355 1355    // Compute Square Root of Scalar Double-Precision Floating-Point Value
1356 1356    void sqrtsd(XMMRegister dst, Address src);
1357 1357    void sqrtsd(XMMRegister dst, XMMRegister src);
1358 1358  
1359 1359    // Compute Square Root of Scalar Single-Precision Floating-Point Value
1360 1360    void sqrtss(XMMRegister dst, Address src);
1361 1361    void sqrtss(XMMRegister dst, XMMRegister src);
1362 1362  
1363 1363    void std() { emit_byte(0xfd); }
1364 1364  
1365 1365    void stmxcsr( Address dst );
1366 1366  
1367 1367    void subl(Address dst, int32_t imm32);
1368 1368    void subl(Address dst, Register src);
1369 1369    void subl(Register dst, int32_t imm32);
1370 1370    void subl(Register dst, Address src);
1371 1371    void subl(Register dst, Register src);
1372 1372  
1373 1373    void subq(Address dst, int32_t imm32);
1374 1374    void subq(Address dst, Register src);
1375 1375    void subq(Register dst, int32_t imm32);
1376 1376    void subq(Register dst, Address src);
1377 1377    void subq(Register dst, Register src);
1378 1378  
1379 1379  
1380 1380    // Subtract Scalar Double-Precision Floating-Point Values
1381 1381    void subsd(XMMRegister dst, Address src);
1382 1382    void subsd(XMMRegister dst, XMMRegister src);
1383 1383  
1384 1384    // Subtract Scalar Single-Precision Floating-Point Values
1385 1385    void subss(XMMRegister dst, Address src);
1386 1386    void subss(XMMRegister dst, XMMRegister src);
1387 1387  
1388 1388    void testb(Register dst, int imm8);
1389 1389  
1390 1390    void testl(Register dst, int32_t imm32);
1391 1391    void testl(Register dst, Register src);
1392 1392    void testl(Register dst, Address src);
1393 1393  
1394 1394    void testq(Register dst, int32_t imm32);
1395 1395    void testq(Register dst, Register src);
1396 1396  
1397 1397  
1398 1398    // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
1399 1399    void ucomisd(XMMRegister dst, Address src);
1400 1400    void ucomisd(XMMRegister dst, XMMRegister src);
1401 1401  
1402 1402    // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
1403 1403    void ucomiss(XMMRegister dst, Address src);
1404 1404    void ucomiss(XMMRegister dst, XMMRegister src);
1405 1405  
1406 1406    void xaddl(Address dst, Register src);
1407 1407  
1408 1408    void xaddq(Address dst, Register src);
1409 1409  
1410 1410    void xchgl(Register reg, Address adr);
1411 1411    void xchgl(Register dst, Register src);
1412 1412  
1413 1413    void xchgq(Register reg, Address adr);
1414 1414    void xchgq(Register dst, Register src);
1415 1415  
1416 1416    void xorl(Register dst, int32_t imm32);
1417 1417    void xorl(Register dst, Address src);
1418 1418    void xorl(Register dst, Register src);
1419 1419  
1420 1420    void xorq(Register dst, Address src);
1421 1421    void xorq(Register dst, Register src);
1422 1422  
1423 1423    // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
1424 1424    void xorpd(XMMRegister dst, Address src);
1425 1425    void xorpd(XMMRegister dst, XMMRegister src);
1426 1426  
1427 1427    // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
1428 1428    void xorps(XMMRegister dst, Address src);
1429 1429    void xorps(XMMRegister dst, XMMRegister src);
1430 1430  
1431 1431    void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
1432 1432  };

↓ open down ↓

1432 lines elided

↑ open up ↑

1433 1433  
1434 1434  
1435 1435  // MacroAssembler extends Assembler by frequently used macros.
1436 1436  //
1437 1437  // Instructions for which a 'better' code sequence exists depending
1438 1438  // on arguments should also go in here.
1439 1439  
1440 1440  class MacroAssembler: public Assembler {
1441 1441    friend class LIR_Assembler;
1442 1442    friend class Runtime1;      // as_Address()
     1443 +
1443 1444   protected:
1444 1445  
1445 1446    Address as_Address(AddressLiteral adr);
1446 1447    Address as_Address(ArrayAddress adr);
1447 1448  
1448 1449    // Support for VM calls
1449 1450    //
1450 1451    // This is the base routine called by the different versions of call_VM_leaf. The interpreter
1451 1452    // may customize this version by overriding it for its purposes (e.g., to save/restore
1452 1453    // additional registers when doing a VM call).

1453 1454  #ifdef CC_INTERP
1454 1455    // c++ interpreter never wants to use interp_masm version of call_VM
1455 1456    #define VIRTUAL
1456 1457  #else
1457 1458    #define VIRTUAL virtual
1458 1459  #endif
1459 1460  
1460 1461    VIRTUAL void call_VM_leaf_base(
1461 1462      address entry_point,               // the entry point
1462 1463      int     number_of_arguments        // the number of arguments to pop after the call
1463 1464    );
1464 1465  
1465 1466    // This is the base routine called by the different versions of call_VM. The interpreter
1466 1467    // may customize this version by overriding it for its purposes (e.g., to save/restore
1467 1468    // additional registers when doing a VM call).
1468 1469    //
1469 1470    // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base
1470 1471    // returns the register which contains the thread upon return. If a thread register has been
1471 1472    // specified, the return value will correspond to that register. If no last_java_sp is specified
1472 1473    // (noreg) than rsp will be used instead.
1473 1474    VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
1474 1475      Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
1475 1476      Register java_thread,              // the thread if computed before     ; use noreg otherwise
1476 1477      Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
1477 1478      address  entry_point,              // the entry point
1478 1479      int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
1479 1480      bool     check_exceptions          // whether to check for pending exceptions after return
1480 1481    );
1481 1482  
1482 1483    // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
1483 1484    // The implementation is only non-empty for the InterpreterMacroAssembler,
1484 1485    // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
1485 1486    virtual void check_and_handle_popframe(Register java_thread);
1486 1487    virtual void check_and_handle_earlyret(Register java_thread);
1487 1488  
1488 1489    void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
1489 1490  
1490 1491    // helpers for FPU flag access
1491 1492    // tmp is a temporary register, if none is available use noreg
1492 1493    void save_rax   (Register tmp);
1493 1494    void restore_rax(Register tmp);
1494 1495  
1495 1496   public:
1496 1497    MacroAssembler(CodeBuffer* code) : Assembler(code) {}
1497 1498  
1498 1499    // Support for NULL-checks
1499 1500    //
1500 1501    // Generates code that causes a NULL OS exception if the content of reg is NULL.
1501 1502    // If the accessed location is M[reg + offset] and the offset is known, provide the
1502 1503    // offset. No explicit code generation is needed if the offset is within a certain
1503 1504    // range (0 <= offset <= page_size).
1504 1505  
1505 1506    void null_check(Register reg, int offset = -1);
1506 1507    static bool needs_explicit_null_check(intptr_t offset);
1507 1508  
1508 1509    // Required platform-specific helpers for Label::patch_instructions.
1509 1510    // They _shadow_ the declarations in AbstractAssembler, which are undefined.
1510 1511    void pd_patch_instruction(address branch, address target);
1511 1512  #ifndef PRODUCT
1512 1513    static void pd_print_patched_instruction(address branch);
1513 1514  #endif
1514 1515  
1515 1516    // The following 4 methods return the offset of the appropriate move instruction
1516 1517  
1517 1518    // Support for fast byte/short loading with zero extension (depending on particular CPU)
1518 1519    int load_unsigned_byte(Register dst, Address src);
1519 1520    int load_unsigned_short(Register dst, Address src);
1520 1521  
1521 1522    // Support for fast byte/short loading with sign extension (depending on particular CPU)
1522 1523    int load_signed_byte(Register dst, Address src);
1523 1524    int load_signed_short(Register dst, Address src);
1524 1525  
1525 1526    // Support for sign-extension (hi:lo = extend_sign(lo))
1526 1527    void extend_sign(Register hi, Register lo);
1527 1528  
1528 1529    // Load and store values by size and signed-ness
1529 1530    void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
1530 1531    void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
1531 1532  
1532 1533    // Support for inc/dec with optimal instruction selection depending on value
1533 1534  
1534 1535    void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; }
1535 1536    void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; }
1536 1537  
1537 1538    void decrementl(Address dst, int value = 1);
1538 1539    void decrementl(Register reg, int value = 1);
1539 1540  
1540 1541    void decrementq(Register reg, int value = 1);
1541 1542    void decrementq(Address dst, int value = 1);
1542 1543  
1543 1544    void incrementl(Address dst, int value = 1);
1544 1545    void incrementl(Register reg, int value = 1);
1545 1546  
1546 1547    void incrementq(Register reg, int value = 1);
1547 1548    void incrementq(Address dst, int value = 1);
1548 1549  
1549 1550  
1550 1551    // Support optimal SSE move instructions.
1551 1552    void movflt(XMMRegister dst, XMMRegister src) {
1552 1553      if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
1553 1554      else                       { movss (dst, src); return; }
1554 1555    }
1555 1556    void movflt(XMMRegister dst, Address src) { movss(dst, src); }
1556 1557    void movflt(XMMRegister dst, AddressLiteral src);
1557 1558    void movflt(Address dst, XMMRegister src) { movss(dst, src); }
1558 1559  
1559 1560    void movdbl(XMMRegister dst, XMMRegister src) {
1560 1561      if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; }
1561 1562      else                       { movsd (dst, src); return; }
1562 1563    }
1563 1564  
1564 1565    void movdbl(XMMRegister dst, AddressLiteral src);
1565 1566  
1566 1567    void movdbl(XMMRegister dst, Address src) {
1567 1568      if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; }
1568 1569      else                         { movlpd(dst, src); return; }
1569 1570    }
1570 1571    void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }
1571 1572  
1572 1573    void incrementl(AddressLiteral dst);
1573 1574    void incrementl(ArrayAddress dst);
1574 1575  
1575 1576    // Alignment
1576 1577    void align(int modulus);
1577 1578  
1578 1579    // Misc
1579 1580    void fat_nop(); // 5 byte nop
1580 1581  
1581 1582    // Stack frame creation/removal
1582 1583    void enter();
1583 1584    void leave();
1584 1585  
1585 1586    // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
1586 1587    // The pointer will be loaded into the thread register.
1587 1588    void get_thread(Register thread);
1588 1589  
1589 1590  
1590 1591    // Support for VM calls
1591 1592    //
1592 1593    // It is imperative that all calls into the VM are handled via the call_VM macros.
1593 1594    // They make sure that the stack linkage is setup correctly. call_VM's correspond
1594 1595    // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
1595 1596  
1596 1597  
1597 1598    void call_VM(Register oop_result,
1598 1599                 address entry_point,
1599 1600                 bool check_exceptions = true);
1600 1601    void call_VM(Register oop_result,
1601 1602                 address entry_point,
1602 1603                 Register arg_1,
1603 1604                 bool check_exceptions = true);
1604 1605    void call_VM(Register oop_result,
1605 1606                 address entry_point,
1606 1607                 Register arg_1, Register arg_2,
1607 1608                 bool check_exceptions = true);
1608 1609    void call_VM(Register oop_result,
1609 1610                 address entry_point,
1610 1611                 Register arg_1, Register arg_2, Register arg_3,
1611 1612                 bool check_exceptions = true);
1612 1613  
1613 1614    // Overloadings with last_Java_sp
1614 1615    void call_VM(Register oop_result,
1615 1616                 Register last_java_sp,
1616 1617                 address entry_point,
1617 1618                 int number_of_arguments = 0,
1618 1619                 bool check_exceptions = true);
1619 1620    void call_VM(Register oop_result,
1620 1621                 Register last_java_sp,
1621 1622                 address entry_point,
1622 1623                 Register arg_1, bool
1623 1624                 check_exceptions = true);
1624 1625    void call_VM(Register oop_result,
1625 1626                 Register last_java_sp,
1626 1627                 address entry_point,
1627 1628                 Register arg_1, Register arg_2,
1628 1629                 bool check_exceptions = true);
1629 1630    void call_VM(Register oop_result,
1630 1631                 Register last_java_sp,
1631 1632                 address entry_point,
1632 1633                 Register arg_1, Register arg_2, Register arg_3,
1633 1634                 bool check_exceptions = true);
1634 1635  
1635 1636    void call_VM_leaf(address entry_point,
1636 1637                      int number_of_arguments = 0);
1637 1638    void call_VM_leaf(address entry_point,
1638 1639                      Register arg_1);
1639 1640    void call_VM_leaf(address entry_point,
1640 1641                      Register arg_1, Register arg_2);
1641 1642    void call_VM_leaf(address entry_point,
1642 1643                      Register arg_1, Register arg_2, Register arg_3);
1643 1644  
1644 1645    // last Java Frame (fills frame anchor)
1645 1646    void set_last_Java_frame(Register thread,
1646 1647                             Register last_java_sp,
1647 1648                             Register last_java_fp,
1648 1649                             address last_java_pc);
1649 1650  
1650 1651    // thread in the default location (r15_thread on 64bit)
1651 1652    void set_last_Java_frame(Register last_java_sp,
1652 1653                             Register last_java_fp,
1653 1654                             address last_java_pc);

↓ open down ↓

201 lines elided

↑ open up ↑

1654 1655  
1655 1656    void reset_last_Java_frame(Register thread, bool clear_fp, bool clear_pc);
1656 1657  
1657 1658    // thread in the default location (r15_thread on 64bit)
1658 1659    void reset_last_Java_frame(bool clear_fp, bool clear_pc);
1659 1660  
1660 1661    // Stores
1661 1662    void store_check(Register obj);                // store check for obj - register is destroyed afterwards
1662 1663    void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
1663 1664  
     1665 +#ifndef SERIALGC
     1666 +  
1664 1667    void g1_write_barrier_pre(Register obj,
1665      -#ifndef _LP64
     1668 +                            Register pre_val,
1666 1669                              Register thread,
1667      -#endif
1668 1670                              Register tmp,
1669      -                            Register tmp2,
1670      -                            bool     tosca_live);
     1671 +                            bool tosca_live,
     1672 +                            bool expand_call);
     1673 +
1671 1674    void g1_write_barrier_post(Register store_addr,
1672 1675                               Register new_val,
1673      -#ifndef _LP64
1674 1676                               Register thread,
1675      -#endif
1676 1677                               Register tmp,
1677 1678                               Register tmp2);
1678 1679  
     1680 +#endif // SERIALGC
1679 1681  
1680 1682    // split store_check(Register obj) to enhance instruction interleaving
1681 1683    void store_check_part_1(Register obj);
1682 1684    void store_check_part_2(Register obj);
1683 1685  
1684 1686    // C 'boolean' to Java boolean: x == 0 ? 0 : 1
1685 1687    void c2bool(Register x);
1686 1688  
1687 1689    // C++ bool manipulation
1688 1690

1689 1691    void movbool(Register dst, Address src);
1690 1692    void movbool(Address dst, bool boolconst);
1691 1693    void movbool(Address dst, Register src);
1692 1694    void testbool(Register dst);
1693 1695  
1694 1696    // oop manipulations
1695 1697    void load_klass(Register dst, Register src);
1696 1698    void store_klass(Register dst, Register src);
1697 1699  
1698 1700    void load_heap_oop(Register dst, Address src);
1699 1701    void store_heap_oop(Address dst, Register src);
1700 1702  
1701 1703    // Used for storing NULL. All other oop constants should be
1702 1704    // stored using routines that take a jobject.
1703 1705    void store_heap_oop_null(Address dst);
1704 1706  
1705 1707    void load_prototype_header(Register dst, Register src);
1706 1708  
1707 1709  #ifdef _LP64
1708 1710    void store_klass_gap(Register dst, Register src);
1709 1711  
1710 1712    // This dummy is to prevent a call to store_heap_oop from
1711 1713    // converting a zero (like NULL) into a Register by giving
1712 1714    // the compiler two choices it can't resolve
1713 1715  
1714 1716    void store_heap_oop(Address dst, void* dummy);
1715 1717  
1716 1718    void encode_heap_oop(Register r);
1717 1719    void decode_heap_oop(Register r);
1718 1720    void encode_heap_oop_not_null(Register r);
1719 1721    void decode_heap_oop_not_null(Register r);
1720 1722    void encode_heap_oop_not_null(Register dst, Register src);
1721 1723    void decode_heap_oop_not_null(Register dst, Register src);
1722 1724  
1723 1725    void set_narrow_oop(Register dst, jobject obj);
1724 1726    void set_narrow_oop(Address dst, jobject obj);
1725 1727    void cmp_narrow_oop(Register dst, jobject obj);
1726 1728    void cmp_narrow_oop(Address dst, jobject obj);
1727 1729  
1728 1730    // if heap base register is used - reinit it with the correct value
1729 1731    void reinit_heapbase();
1730 1732  
1731 1733    DEBUG_ONLY(void verify_heapbase(const char* msg);)
1732 1734  
1733 1735  #endif // _LP64
1734 1736  
1735 1737    // Int division/remainder for Java
1736 1738    // (as idivl, but checks for special case as described in JVM spec.)
1737 1739    // returns idivl instruction offset for implicit exception handling
1738 1740    int corrected_idivl(Register reg);
1739 1741  
1740 1742    // Long division/remainder for Java
1741 1743    // (as idivq, but checks for special case as described in JVM spec.)
1742 1744    // returns idivq instruction offset for implicit exception handling
1743 1745    int corrected_idivq(Register reg);
1744 1746  
1745 1747    void int3();
1746 1748  
1747 1749    // Long operation macros for a 32bit cpu
1748 1750    // Long negation for Java
1749 1751    void lneg(Register hi, Register lo);
1750 1752  
1751 1753    // Long multiplication for Java
1752 1754    // (destroys contents of eax, ebx, ecx and edx)
1753 1755    void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y
1754 1756  
1755 1757    // Long shifts for Java
1756 1758    // (semantics as described in JVM spec.)
1757 1759    void lshl(Register hi, Register lo);                               // hi:lo << (rcx & 0x3f)
1758 1760    void lshr(Register hi, Register lo, bool sign_extension = false);  // hi:lo >> (rcx & 0x3f)
1759 1761  
1760 1762    // Long compare for Java
1761 1763    // (semantics as described in JVM spec.)
1762 1764    void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y)
1763 1765  
1764 1766  
1765 1767    // misc
1766 1768  
1767 1769    // Sign extension
1768 1770    void sign_extend_short(Register reg);
1769 1771    void sign_extend_byte(Register reg);
1770 1772  
1771 1773    // Division by power of 2, rounding towards 0
1772 1774    void division_with_shift(Register reg, int shift_value);
1773 1775  
1774 1776    // Compares the top-most stack entries on the FPU stack and sets the eflags as follows:
1775 1777    //
1776 1778    // CF (corresponds to C0) if x < y
1777 1779    // PF (corresponds to C2) if unordered
1778 1780    // ZF (corresponds to C3) if x = y
1779 1781    //
1780 1782    // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
1781 1783    // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code)
1782 1784    void fcmp(Register tmp);
1783 1785    // Variant of the above which allows y to be further down the stack
1784 1786    // and which only pops x and y if specified. If pop_right is
1785 1787    // specified then pop_left must also be specified.
1786 1788    void fcmp(Register tmp, int index, bool pop_left, bool pop_right);
1787 1789  
1788 1790    // Floating-point comparison for Java
1789 1791    // Compares the top-most stack entries on the FPU stack and stores the result in dst.
1790 1792    // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
1791 1793    // (semantics as described in JVM spec.)
1792 1794    void fcmp2int(Register dst, bool unordered_is_less);
1793 1795    // Variant of the above which allows y to be further down the stack
1794 1796    // and which only pops x and y if specified. If pop_right is
1795 1797    // specified then pop_left must also be specified.
1796 1798    void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right);
1797 1799  
1798 1800    // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards)
1799 1801    // tmp is a temporary register, if none is available use noreg
1800 1802    void fremr(Register tmp);
1801 1803  
1802 1804  
1803 1805    // same as fcmp2int, but using SSE2
1804 1806    void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
1805 1807    void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
1806 1808  
1807 1809    // Inlined sin/cos generator for Java; must not use CPU instruction
1808 1810    // directly on Intel as it does not have high enough precision
1809 1811    // outside of the range [-pi/4, pi/4]. Extra argument indicate the
1810 1812    // number of FPU stack slots in use; all but the topmost will
1811 1813    // require saving if a slow case is necessary. Assumes argument is
1812 1814    // on FP TOS; result is on FP TOS.  No cpu registers are changed by
1813 1815    // this code.
1814 1816    void trigfunc(char trig, int num_fpu_regs_in_use = 1);
1815 1817  
1816 1818    // branch to L if FPU flag C2 is set/not set
1817 1819    // tmp is a temporary register, if none is available use noreg
1818 1820    void jC2 (Register tmp, Label& L);
1819 1821    void jnC2(Register tmp, Label& L);
1820 1822  
1821 1823    // Pop ST (ffree & fincstp combined)
1822 1824    void fpop();
1823 1825  
1824 1826    // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
1825 1827    void push_fTOS();
1826 1828  
1827 1829    // pops double TOS element from CPU stack and pushes on FPU stack
1828 1830    void pop_fTOS();
1829 1831  
1830 1832    void empty_FPU_stack();
1831 1833  
1832 1834    void push_IU_state();
1833 1835    void pop_IU_state();
1834 1836  
1835 1837    void push_FPU_state();
1836 1838    void pop_FPU_state();
1837 1839  
1838 1840    void push_CPU_state();
1839 1841    void pop_CPU_state();
1840 1842  
1841 1843    // Round up to a power of two
1842 1844    void round_to(Register reg, int modulus);
1843 1845  
1844 1846    // Callee saved registers handling
1845 1847    void push_callee_saved_registers();
1846 1848    void pop_callee_saved_registers();
1847 1849  
1848 1850    // allocation
1849 1851    void eden_allocate(
1850 1852      Register obj,                      // result: pointer to object after successful allocation
1851 1853      Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
1852 1854      int      con_size_in_bytes,        // object size in bytes if   known at compile time
1853 1855      Register t1,                       // temp register
1854 1856      Label&   slow_case                 // continuation point if fast allocation fails
1855 1857    );
1856 1858    void tlab_allocate(
1857 1859      Register obj,                      // result: pointer to object after successful allocation
1858 1860      Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
1859 1861      int      con_size_in_bytes,        // object size in bytes if   known at compile time
1860 1862      Register t1,                       // temp register
1861 1863      Register t2,                       // temp register
1862 1864      Label&   slow_case                 // continuation point if fast allocation fails
1863 1865    );
1864 1866    Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
1865 1867    void incr_allocated_bytes(Register thread,
1866 1868                              Register var_size_in_bytes, int con_size_in_bytes,
1867 1869                              Register t1 = noreg);
1868 1870  
1869 1871    // interface method calling
1870 1872    void lookup_interface_method(Register recv_klass,
1871 1873                                 Register intf_klass,
1872 1874                                 RegisterOrConstant itable_index,
1873 1875                                 Register method_result,
1874 1876                                 Register scan_temp,
1875 1877                                 Label& no_such_interface);
1876 1878  
1877 1879    // Test sub_klass against super_klass, with fast and slow paths.
1878 1880  
1879 1881    // The fast path produces a tri-state answer: yes / no / maybe-slow.
1880 1882    // One of the three labels can be NULL, meaning take the fall-through.
1881 1883    // If super_check_offset is -1, the value is loaded up from super_klass.
1882 1884    // No registers are killed, except temp_reg.
1883 1885    void check_klass_subtype_fast_path(Register sub_klass,
1884 1886                                       Register super_klass,
1885 1887                                       Register temp_reg,
1886 1888                                       Label* L_success,
1887 1889                                       Label* L_failure,
1888 1890                                       Label* L_slow_path,
1889 1891                  RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
1890 1892  
1891 1893    // The rest of the type check; must be wired to a corresponding fast path.
1892 1894    // It does not repeat the fast path logic, so don't use it standalone.
1893 1895    // The temp_reg and temp2_reg can be noreg, if no temps are available.
1894 1896    // Updates the sub's secondary super cache as necessary.
1895 1897    // If set_cond_codes, condition codes will be Z on success, NZ on failure.
1896 1898    void check_klass_subtype_slow_path(Register sub_klass,
1897 1899                                       Register super_klass,
1898 1900                                       Register temp_reg,
1899 1901                                       Register temp2_reg,
1900 1902                                       Label* L_success,
1901 1903                                       Label* L_failure,
1902 1904                                       bool set_cond_codes = false);
1903 1905  
1904 1906    // Simplified, combined version, good for typical uses.
1905 1907    // Falls through on failure.
1906 1908    void check_klass_subtype(Register sub_klass,
1907 1909                             Register super_klass,
1908 1910                             Register temp_reg,
1909 1911                             Label& L_success);
1910 1912  
1911 1913    // method handles (JSR 292)
1912 1914    void check_method_handle_type(Register mtype_reg, Register mh_reg,
1913 1915                                  Register temp_reg,
1914 1916                                  Label& wrong_method_type);
1915 1917    void load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
1916 1918                                    Register temp_reg);
1917 1919    void jump_to_method_handle_entry(Register mh_reg, Register temp_reg);
1918 1920    Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
1919 1921  
1920 1922  
1921 1923    //----
1922 1924    void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
1923 1925  
1924 1926    // Debugging
1925 1927  
1926 1928    // only if +VerifyOops
1927 1929    void verify_oop(Register reg, const char* s = "broken oop");
1928 1930    void verify_oop_addr(Address addr, const char * s = "broken oop addr");
1929 1931  
1930 1932    // only if +VerifyFPU
1931 1933    void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
1932 1934  
1933 1935    // prints msg, dumps registers and stops execution
1934 1936    void stop(const char* msg);
1935 1937  
1936 1938    // prints msg and continues
1937 1939    void warn(const char* msg);
1938 1940  
1939 1941    static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
1940 1942    static void debug64(char* msg, int64_t pc, int64_t regs[]);
1941 1943  
1942 1944    void os_breakpoint();
1943 1945  
1944 1946    void untested()                                { stop("untested"); }
1945 1947  
1946 1948    void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, 1024, "unimplemented: %s", what);  stop(b); }
1947 1949  
1948 1950    void should_not_reach_here()                   { stop("should not reach here"); }
1949 1951  
1950 1952    void print_CPU_state();
1951 1953  
1952 1954    // Stack overflow checking
1953 1955    void bang_stack_with_offset(int offset) {
1954 1956      // stack grows down, caller passes positive offset
1955 1957      assert(offset > 0, "must bang with negative offset");
1956 1958      movl(Address(rsp, (-offset)), rax);
1957 1959    }
1958 1960  
1959 1961    // Writes to stack successive pages until offset reached to check for
1960 1962    // stack overflow + shadow pages.  Also, clobbers tmp
1961 1963    void bang_stack_size(Register size, Register tmp);
1962 1964  
1963 1965    virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
1964 1966                                                  Register tmp,
1965 1967                                                  int offset);
1966 1968  
1967 1969    // Support for serializing memory accesses between threads
1968 1970    void serialize_memory(Register thread, Register tmp);
1969 1971  
1970 1972    void verify_tlab();
1971 1973  
1972 1974    // Biased locking support
1973 1975    // lock_reg and obj_reg must be loaded up with the appropriate values.
1974 1976    // swap_reg must be rax, and is killed.
1975 1977    // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
1976 1978    // be killed; if not supplied, push/pop will be used internally to
1977 1979    // allocate a temporary (inefficient, avoid if possible).
1978 1980    // Optional slow case is for implementations (interpreter and C1) which branch to
1979 1981    // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
1980 1982    // Returns offset of first potentially-faulting instruction for null
1981 1983    // check info (currently consumed only by C1). If
1982 1984    // swap_reg_contains_mark is true then returns -1 as it is assumed
1983 1985    // the calling code has already passed any potential faults.
1984 1986    int biased_locking_enter(Register lock_reg, Register obj_reg,
1985 1987                             Register swap_reg, Register tmp_reg,
1986 1988                             bool swap_reg_contains_mark,
1987 1989                             Label& done, Label* slow_case = NULL,
1988 1990                             BiasedLockingCounters* counters = NULL);
1989 1991    void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
1990 1992  
1991 1993  
1992 1994    Condition negate_condition(Condition cond);
1993 1995  
1994 1996    // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit
1995 1997    // operands. In general the names are modified to avoid hiding the instruction in Assembler
1996 1998    // so that we don't need to implement all the varieties in the Assembler with trivial wrappers
1997 1999    // here in MacroAssembler. The major exception to this rule is call
1998 2000  
1999 2001    // Arithmetics
2000 2002  
2001 2003  
2002 2004    void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; }
2003 2005    void addptr(Address dst, Register src);
2004 2006  
2005 2007    void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); }
2006 2008    void addptr(Register dst, int32_t src);
2007 2009    void addptr(Register dst, Register src);
2008 2010  
2009 2011    void andptr(Register dst, int32_t src);
2010 2012    void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; }
2011 2013  
2012 2014    void cmp8(AddressLiteral src1, int imm);
2013 2015  
2014 2016    // renamed to drag out the casting of address to int32_t/intptr_t
2015 2017    void cmp32(Register src1, int32_t imm);
2016 2018  
2017 2019    void cmp32(AddressLiteral src1, int32_t imm);
2018 2020    // compare reg - mem, or reg - &mem
2019 2021    void cmp32(Register src1, AddressLiteral src2);
2020 2022  
2021 2023    void cmp32(Register src1, Address src2);
2022 2024  
2023 2025  #ifndef _LP64
2024 2026    void cmpoop(Address dst, jobject obj);
2025 2027    void cmpoop(Register dst, jobject obj);
2026 2028  #endif // _LP64
2027 2029  
2028 2030    // NOTE src2 must be the lval. This is NOT an mem-mem compare
2029 2031    void cmpptr(Address src1, AddressLiteral src2);
2030 2032  
2031 2033    void cmpptr(Register src1, AddressLiteral src2);
2032 2034  
2033 2035    void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
2034 2036    void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
2035 2037    // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
2036 2038  
2037 2039    void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
2038 2040    void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
2039 2041  
2040 2042    // cmp64 to avoild hiding cmpq
2041 2043    void cmp64(Register src1, AddressLiteral src);
2042 2044  
2043 2045    void cmpxchgptr(Register reg, Address adr);
2044 2046  
2045 2047    void locked_cmpxchgptr(Register reg, AddressLiteral adr);
2046 2048  
2047 2049  
2048 2050    void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); }
2049 2051  
2050 2052  
2051 2053    void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); }
2052 2054  
2053 2055    void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); }
2054 2056  
2055 2057    void shlptr(Register dst, int32_t shift);
2056 2058    void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); }
2057 2059  
2058 2060    void shrptr(Register dst, int32_t shift);
2059 2061    void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); }
2060 2062  
2061 2063    void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); }
2062 2064    void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); }
2063 2065  
2064 2066    void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
2065 2067  
2066 2068    void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
2067 2069    void subptr(Register dst, int32_t src);
2068 2070    void subptr(Register dst, Register src);
2069 2071  
2070 2072  
2071 2073    void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
2072 2074    void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
2073 2075  
2074 2076    void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
2075 2077    void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; }
2076 2078  
2077 2079    void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; }
2078 2080  
2079 2081  
2080 2082  
2081 2083    // Helper functions for statistics gathering.
2082 2084    // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
2083 2085    void cond_inc32(Condition cond, AddressLiteral counter_addr);
2084 2086    // Unconditional atomic increment.
2085 2087    void atomic_incl(AddressLiteral counter_addr);
2086 2088  
2087 2089    void lea(Register dst, AddressLiteral adr);
2088 2090    void lea(Address dst, AddressLiteral adr);
2089 2091    void lea(Register dst, Address adr) { Assembler::lea(dst, adr); }
2090 2092  
2091 2093    void leal32(Register dst, Address src) { leal(dst, src); }
2092 2094  
2093 2095    void test32(Register src1, AddressLiteral src2);
2094 2096  
2095 2097    void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
2096 2098    void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
2097 2099    void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); }
2098 2100  
2099 2101    void testptr(Register src, int32_t imm32) {  LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); }
2100 2102    void testptr(Register src1, Register src2);
2101 2103  
2102 2104    void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
2103 2105    void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); }
2104 2106  
2105 2107    // Calls
2106 2108  
2107 2109    void call(Label& L, relocInfo::relocType rtype);
2108 2110    void call(Register entry);
2109 2111  
2110 2112    // NOTE: this call tranfers to the effective address of entry NOT
2111 2113    // the address contained by entry. This is because this is more natural
2112 2114    // for jumps/calls.
2113 2115    void call(AddressLiteral entry);
2114 2116  
2115 2117    // Jumps
2116 2118  
2117 2119    // NOTE: these jumps tranfer to the effective address of dst NOT
2118 2120    // the address contained by dst. This is because this is more natural
2119 2121    // for jumps/calls.
2120 2122    void jump(AddressLiteral dst);
2121 2123    void jump_cc(Condition cc, AddressLiteral dst);
2122 2124  
2123 2125    // 32bit can do a case table jump in one instruction but we no longer allow the base
2124 2126    // to be installed in the Address class. This jump will tranfers to the address
2125 2127    // contained in the location described by entry (not the address of entry)
2126 2128    void jump(ArrayAddress entry);
2127 2129  
2128 2130    // Floating
2129 2131  
2130 2132    void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
2131 2133    void andpd(XMMRegister dst, AddressLiteral src);
2132 2134  
2133 2135    void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
2134 2136    void comiss(XMMRegister dst, AddressLiteral src);
2135 2137  
2136 2138    void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
2137 2139    void comisd(XMMRegister dst, AddressLiteral src);
2138 2140  
2139 2141    void fadd_s(Address src)        { Assembler::fadd_s(src); }
2140 2142    void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); }
2141 2143  
2142 2144    void fldcw(Address src) { Assembler::fldcw(src); }
2143 2145    void fldcw(AddressLiteral src);
2144 2146  
2145 2147    void fld_s(int index)   { Assembler::fld_s(index); }
2146 2148    void fld_s(Address src) { Assembler::fld_s(src); }
2147 2149    void fld_s(AddressLiteral src);
2148 2150  
2149 2151    void fld_d(Address src) { Assembler::fld_d(src); }
2150 2152    void fld_d(AddressLiteral src);
2151 2153  
2152 2154    void fld_x(Address src) { Assembler::fld_x(src); }
2153 2155    void fld_x(AddressLiteral src);
2154 2156  
2155 2157    void fmul_s(Address src)        { Assembler::fmul_s(src); }
2156 2158    void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); }
2157 2159  
2158 2160    void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
2159 2161    void ldmxcsr(AddressLiteral src);
2160 2162  
2161 2163  private:
2162 2164    // these are private because users should be doing movflt/movdbl
2163 2165  
2164 2166    void movss(Address dst, XMMRegister src)     { Assembler::movss(dst, src); }
2165 2167    void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
2166 2168    void movss(XMMRegister dst, Address src)     { Assembler::movss(dst, src); }
2167 2169    void movss(XMMRegister dst, AddressLiteral src);
2168 2170  
2169 2171    void movlpd(XMMRegister dst, Address src)      {Assembler::movlpd(dst, src); }
2170 2172    void movlpd(XMMRegister dst, AddressLiteral src);
2171 2173  
2172 2174  public:
2173 2175  
2174 2176    void addsd(XMMRegister dst, XMMRegister src)    { Assembler::addsd(dst, src); }
2175 2177    void addsd(XMMRegister dst, Address src)        { Assembler::addsd(dst, src); }
2176 2178    void addsd(XMMRegister dst, AddressLiteral src) { Assembler::addsd(dst, as_Address(src)); }
2177 2179  
2178 2180    void addss(XMMRegister dst, XMMRegister src)    { Assembler::addss(dst, src); }
2179 2181    void addss(XMMRegister dst, Address src)        { Assembler::addss(dst, src); }
2180 2182    void addss(XMMRegister dst, AddressLiteral src) { Assembler::addss(dst, as_Address(src)); }
2181 2183  
2182 2184    void divsd(XMMRegister dst, XMMRegister src)    { Assembler::divsd(dst, src); }
2183 2185    void divsd(XMMRegister dst, Address src)        { Assembler::divsd(dst, src); }
2184 2186    void divsd(XMMRegister dst, AddressLiteral src) { Assembler::divsd(dst, as_Address(src)); }
2185 2187  
2186 2188    void divss(XMMRegister dst, XMMRegister src)    { Assembler::divss(dst, src); }
2187 2189    void divss(XMMRegister dst, Address src)        { Assembler::divss(dst, src); }
2188 2190    void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); }
2189 2191  
2190 2192    void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
2191 2193    void movsd(Address dst, XMMRegister src)     { Assembler::movsd(dst, src); }
2192 2194    void movsd(XMMRegister dst, Address src)     { Assembler::movsd(dst, src); }
2193 2195    void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); }
2194 2196  
2195 2197    void mulsd(XMMRegister dst, XMMRegister src)    { Assembler::mulsd(dst, src); }
2196 2198    void mulsd(XMMRegister dst, Address src)        { Assembler::mulsd(dst, src); }
2197 2199    void mulsd(XMMRegister dst, AddressLiteral src) { Assembler::mulsd(dst, as_Address(src)); }
2198 2200  
2199 2201    void mulss(XMMRegister dst, XMMRegister src)    { Assembler::mulss(dst, src); }
2200 2202    void mulss(XMMRegister dst, Address src)        { Assembler::mulss(dst, src); }
2201 2203    void mulss(XMMRegister dst, AddressLiteral src) { Assembler::mulss(dst, as_Address(src)); }
2202 2204  
2203 2205    void sqrtsd(XMMRegister dst, XMMRegister src)    { Assembler::sqrtsd(dst, src); }
2204 2206    void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
2205 2207    void sqrtsd(XMMRegister dst, AddressLiteral src) { Assembler::sqrtsd(dst, as_Address(src)); }
2206 2208  
2207 2209    void sqrtss(XMMRegister dst, XMMRegister src)    { Assembler::sqrtss(dst, src); }
2208 2210    void sqrtss(XMMRegister dst, Address src)        { Assembler::sqrtss(dst, src); }
2209 2211    void sqrtss(XMMRegister dst, AddressLiteral src) { Assembler::sqrtss(dst, as_Address(src)); }
2210 2212  
2211 2213    void subsd(XMMRegister dst, XMMRegister src)    { Assembler::subsd(dst, src); }
2212 2214    void subsd(XMMRegister dst, Address src)        { Assembler::subsd(dst, src); }
2213 2215    void subsd(XMMRegister dst, AddressLiteral src) { Assembler::subsd(dst, as_Address(src)); }
2214 2216  
2215 2217    void subss(XMMRegister dst, XMMRegister src)    { Assembler::subss(dst, src); }
2216 2218    void subss(XMMRegister dst, Address src)        { Assembler::subss(dst, src); }
2217 2219    void subss(XMMRegister dst, AddressLiteral src) { Assembler::subss(dst, as_Address(src)); }
2218 2220  
2219 2221    void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
2220 2222    void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
2221 2223    void ucomiss(XMMRegister dst, AddressLiteral src);
2222 2224  
2223 2225    void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
2224 2226    void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
2225 2227    void ucomisd(XMMRegister dst, AddressLiteral src);
2226 2228  
2227 2229    // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
2228 2230    void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
2229 2231    void xorpd(XMMRegister dst, Address src)     { Assembler::xorpd(dst, src); }
2230 2232    void xorpd(XMMRegister dst, AddressLiteral src);
2231 2233  
2232 2234    // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
2233 2235    void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
2234 2236    void xorps(XMMRegister dst, Address src)     { Assembler::xorps(dst, src); }
2235 2237    void xorps(XMMRegister dst, AddressLiteral src);
2236 2238  
2237 2239    // Data
2238 2240  
2239 2241    void cmov(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmovl(cc, dst, src)); }
2240 2242  
2241 2243    void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmovl(cc, dst, src)); }
2242 2244    void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmovl(cc, dst, src)); }
2243 2245  
2244 2246    void movoop(Register dst, jobject obj);
2245 2247    void movoop(Address dst, jobject obj);
2246 2248  
2247 2249    void movptr(ArrayAddress dst, Register src);
2248 2250    // can this do an lea?
2249 2251    void movptr(Register dst, ArrayAddress src);
2250 2252  
2251 2253    void movptr(Register dst, Address src);
2252 2254  
2253 2255    void movptr(Register dst, AddressLiteral src);
2254 2256  
2255 2257    void movptr(Register dst, intptr_t src);
2256 2258    void movptr(Register dst, Register src);
2257 2259    void movptr(Address dst, intptr_t src);
2258 2260  
2259 2261    void movptr(Address dst, Register src);
2260 2262  
2261 2263  #ifdef _LP64
2262 2264    // Generally the next two are only used for moving NULL
2263 2265    // Although there are situations in initializing the mark word where
2264 2266    // they could be used. They are dangerous.
2265 2267  
2266 2268    // They only exist on LP64 so that int32_t and intptr_t are not the same
2267 2269    // and we have ambiguous declarations.
2268 2270  
2269 2271    void movptr(Address dst, int32_t imm32);
2270 2272    void movptr(Register dst, int32_t imm32);
2271 2273  #endif // _LP64
2272 2274  
2273 2275    // to avoid hiding movl
2274 2276    void mov32(AddressLiteral dst, Register src);
2275 2277    void mov32(Register dst, AddressLiteral src);
2276 2278  
2277 2279    // to avoid hiding movb
2278 2280    void movbyte(ArrayAddress dst, int src);
2279 2281  
2280 2282    // Can push value or effective address
2281 2283    void pushptr(AddressLiteral src);
2282 2284  
2283 2285    void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); }
2284 2286    void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); }
2285 2287  
2286 2288    void pushoop(jobject obj);
2287 2289  
2288 2290    // sign extend as need a l to ptr sized element
2289 2291    void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
2290 2292    void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
2291 2293  
2292 2294    // IndexOf strings.
2293 2295    void string_indexof(Register str1, Register str2,
2294 2296                        Register cnt1, Register cnt2, Register result,
2295 2297                        XMMRegister vec, Register tmp);
2296 2298  
2297 2299    // Compare strings.
2298 2300    void string_compare(Register str1, Register str2,
2299 2301                        Register cnt1, Register cnt2, Register result,
2300 2302                        XMMRegister vec1);
2301 2303  
2302 2304    // Compare char[] arrays.
2303 2305    void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
2304 2306                            Register limit, Register result, Register chr,
2305 2307                            XMMRegister vec1, XMMRegister vec2);
2306 2308  
2307 2309    // Fill primitive arrays
2308 2310    void generate_fill(BasicType t, bool aligned,
2309 2311                       Register to, Register value, Register count,
2310 2312                       Register rtmp, XMMRegister xtmp);
2311 2313  
2312 2314  #undef VIRTUAL
2313 2315  
2314 2316  };
2315 2317  
2316 2318  /**
2317 2319   * class SkipIfEqual:
2318 2320   *
2319 2321   * Instantiating this class will result in assembly code being output that will
2320 2322   * jump around any code emitted between the creation of the instance and it's
2321 2323   * automatic destruction at the end of a scope block, depending on the value of
2322 2324   * the flag passed to the constructor, which will be checked at run-time.
2323 2325   */
2324 2326  class SkipIfEqual {
2325 2327   private:
2326 2328    MacroAssembler* _masm;
2327 2329    Label _label;
2328 2330  
2329 2331   public:
2330 2332     SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
2331 2333     ~SkipIfEqual();
2332 2334  };
2333 2335  
2334 2336  #ifdef ASSERT
2335 2337  inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
2336 2338  #endif
2337 2339  
2338 2340  #endif // CPU_X86_VM_ASSEMBLER_X86_HPP

↓ open down ↓

650 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX