< prev index next >

src/hotspot/cpu/x86/assembler_x86.hpp

Print this page




 167   };
 168   static ScaleFactor times(int size) {
 169     assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
 170     if (size == 8)  return times_8;
 171     if (size == 4)  return times_4;
 172     if (size == 2)  return times_2;
 173     return times_1;
 174   }
 175   static int scale_size(ScaleFactor scale) {
 176     assert(scale != no_scale, "");
 177     assert(((1 << (int)times_1) == 1 &&
 178             (1 << (int)times_2) == 2 &&
 179             (1 << (int)times_4) == 4 &&
 180             (1 << (int)times_8) == 8), "");
 181     return (1 << (int)scale);
 182   }
 183 
 184  private:
 185   Register         _base;
 186   Register         _index;

 187   ScaleFactor      _scale;
 188   int              _disp;

 189   RelocationHolder _rspec;
 190 
 191   // Easily misused constructors make them private
 192   // %%% can we make these go away?
 193   NOT_LP64(Address(address loc, RelocationHolder spec);)
 194   Address(int disp, address loc, relocInfo::relocType rtype);
 195   Address(int disp, address loc, RelocationHolder spec);
 196 
 197  public:
 198 
 199  int disp() { return _disp; }
 200   // creation
 201   Address()
 202     : _base(noreg),
 203       _index(noreg),

 204       _scale(no_scale),
 205       _disp(0) {

 206   }
 207 
 208   // No default displacement otherwise Register can be implicitly
 209   // converted to 0(Register) which is quite a different animal.
 210 
 211   Address(Register base, int disp)
 212     : _base(base),
 213       _index(noreg),

 214       _scale(no_scale),
 215       _disp(disp) {

 216   }
 217 
 218   Address(Register base, Register index, ScaleFactor scale, int disp = 0)
 219     : _base (base),
 220       _index(index),

 221       _scale(scale),
 222       _disp (disp) {

 223     assert(!index->is_valid() == (scale == Address::no_scale),
 224            "inconsistent address");
 225   }
 226 
 227   Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
 228     : _base (base),
 229       _index(index.register_or_noreg()),

 230       _scale(scale),
 231       _disp (disp + (index.constant_or_zero() * scale_size(scale))) {

 232     if (!index.is_register())  scale = Address::no_scale;
 233     assert(!_index->is_valid() == (scale == Address::no_scale),
 234            "inconsistent address");
 235   }
 236 











 237   Address plus_disp(int disp) const {
 238     Address a = (*this);
 239     a._disp += disp;
 240     return a;
 241   }
 242   Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
 243     Address a = (*this);
 244     a._disp += disp.constant_or_zero() * scale_size(scale);
 245     if (disp.is_register()) {
 246       assert(!a.index()->is_valid(), "competing indexes");
 247       a._index = disp.as_register();
 248       a._scale = scale;
 249     }
 250     return a;
 251   }
 252   bool is_same_address(Address a) const {
 253     // disregard _rspec
 254     return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
 255   }
 256 
 257   // The following two overloads are used in connection with the
 258   // ByteSize type (see sizes.hpp).  They simplify the use of
 259   // ByteSize'd arguments in assembly code. Note that their equivalent
 260   // for the optimized build are the member functions with int disp
 261   // argument since ByteSize is mapped to an int type in that case.
 262   //
 263   // Note: DO NOT introduce similar overloaded functions for WordSize
 264   // arguments as in the optimized mode, both ByteSize and WordSize
 265   // are mapped to the same type and thus the compiler cannot make a
 266   // distinction anymore (=> compiler errors).
 267 
 268 #ifdef ASSERT
 269   Address(Register base, ByteSize disp)
 270     : _base(base),
 271       _index(noreg),

 272       _scale(no_scale),
 273       _disp(in_bytes(disp)) {

 274   }
 275 
 276   Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
 277     : _base(base),
 278       _index(index),

 279       _scale(scale),
 280       _disp(in_bytes(disp)) {

 281     assert(!index->is_valid() == (scale == Address::no_scale),
 282            "inconsistent address");
 283   }
 284 
 285   Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
 286     : _base (base),
 287       _index(index.register_or_noreg()),

 288       _scale(scale),
 289       _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) {

 290     if (!index.is_register())  scale = Address::no_scale;
 291     assert(!_index->is_valid() == (scale == Address::no_scale),
 292            "inconsistent address");
 293   }
 294 
 295 #endif // ASSERT
 296 
 297   // accessors
 298   bool        uses(Register reg) const { return _base == reg || _index == reg; }
 299   Register    base()             const { return _base;  }
 300   Register    index()            const { return _index; }

 301   ScaleFactor scale()            const { return _scale; }
 302   int         disp()             const { return _disp;  }

 303 
 304   // Convert the raw encoding form into the form expected by the constructor for
 305   // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 306   // that to noreg for the Address constructor.
 307   static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
 308 
 309   static Address make_array(ArrayAddress);
 310 
 311  private:
 312   bool base_needs_rex() const {
 313     return _base != noreg && _base->encoding() >= 8;
 314   }
 315 
 316   bool index_needs_rex() const {
 317     return _index != noreg &&_index->encoding() >= 8;
 318   }
 319 




 320   relocInfo::relocType reloc() const { return _rspec.type(); }
 321 
 322   friend class Assembler;
 323   friend class MacroAssembler;
 324   friend class LIR_Assembler; // base/index/scale/disp
 325 };
 326 
 327 //
 328 // AddressLiteral has been split out from Address because operands of this type
 329 // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
 330 // the few instructions that need to deal with address literals are unique and the
 331 // MacroAssembler does not have to implement every instruction in the Assembler
 332 // in order to search for address literals that may need special handling depending
 333 // on the instruction and the platform. As small step on the way to merging i486/amd64
 334 // directories.
 335 //
 336 class AddressLiteral {
 337   friend class ArrayAddress;
 338   RelocationHolder _rspec;
 339   // Typically we use AddressLiterals we want to use their rval


 666 
 667   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
 668                              VexOpcode opc, InstructionAttr *attributes);
 669 
 670   // Helper functions for groups of instructions
 671   void emit_arith_b(int op1, int op2, Register dst, int imm8);
 672 
 673   void emit_arith(int op1, int op2, Register dst, int32_t imm32);
 674   // Force generation of a 4 byte immediate value even if it fits into 8bit
 675   void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
 676   void emit_arith(int op1, int op2, Register dst, Register src);
 677 
 678   bool emit_compressed_disp_byte(int &disp);
 679 
 680   void emit_operand(Register reg,
 681                     Register base, Register index, Address::ScaleFactor scale,
 682                     int disp,
 683                     RelocationHolder const& rspec,
 684                     int rip_relative_correction = 0);
 685 




 686   void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
 687 
 688   // operands that only take the original 32bit registers
 689   void emit_operand32(Register reg, Address adr);
 690 
 691   void emit_operand(XMMRegister reg,
 692                     Register base, Register index, Address::ScaleFactor scale,
 693                     int disp,
 694                     RelocationHolder const& rspec);
 695 
 696   void emit_operand(XMMRegister reg, Address adr);
 697 
 698   void emit_operand(MMXRegister reg, Address adr);
 699 
 700   // workaround gcc (3.2.1-7) bug
 701   void emit_operand(Address adr, MMXRegister reg);
 702 
 703 
 704   // Immediate-to-memory forms
 705   void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);


1534   void orl(Register dst, int32_t imm32);
1535   void orl(Register dst, Address src);
1536   void orl(Register dst, Register src);
1537   void orl(Address dst, Register src);
1538 
1539   void orq(Address dst, int32_t imm32);
1540   void orq(Register dst, int32_t imm32);
1541   void orq(Register dst, Address src);
1542   void orq(Register dst, Register src);
1543 
1544   // Pack with unsigned saturation
1545   void packuswb(XMMRegister dst, XMMRegister src);
1546   void packuswb(XMMRegister dst, Address src);
1547   void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1548 
1549   // Pemutation of 64bit words
1550   void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1551   void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1552   void vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8);
1553   void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);

1554 
1555   void pause();
1556 
1557   // Undefined Instruction
1558   void ud2();
1559 
1560   // SSE4.2 string instructions
1561   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1562   void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1563 
1564   void pcmpeqb(XMMRegister dst, XMMRegister src);
1565   void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1566   void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1567   void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1568   void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1569 
1570   void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1571   void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1572 
1573   void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);


1600   void pextrb(Address dst, XMMRegister src, int imm8);
1601   // SSE 2 extract
1602   void pextrw(Register dst, XMMRegister src, int imm8);
1603   void pextrw(Address dst, XMMRegister src, int imm8);
1604 
1605   // SSE 4.1 insert
1606   void pinsrd(XMMRegister dst, Register src, int imm8);
1607   void pinsrq(XMMRegister dst, Register src, int imm8);
1608   void pinsrd(XMMRegister dst, Address src, int imm8);
1609   void pinsrq(XMMRegister dst, Address src, int imm8);
1610   void pinsrb(XMMRegister dst, Address src, int imm8);
1611   // SSE 2 insert
1612   void pinsrw(XMMRegister dst, Register src, int imm8);
1613   void pinsrw(XMMRegister dst, Address src, int imm8);
1614 
1615   // SSE4.1 packed move
1616   void pmovzxbw(XMMRegister dst, XMMRegister src);
1617   void pmovzxbw(XMMRegister dst, Address src);
1618 
1619   void vpmovzxbw( XMMRegister dst, Address src, int vector_len);

1620   void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1621 
1622   void evpmovwb(Address dst, XMMRegister src, int vector_len);
1623   void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1624 




1625 #ifndef _LP64 // no 32bit push/pop on amd64
1626   void popl(Address dst);
1627 #endif
1628 
1629 #ifdef _LP64
1630   void popq(Address dst);
1631 #endif
1632 
1633   void popcntl(Register dst, Address src);
1634   void popcntl(Register dst, Register src);
1635 
1636   void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1637 
1638 #ifdef _LP64
1639   void popcntq(Register dst, Address src);
1640   void popcntq(Register dst, Register src);
1641 #endif
1642 
1643   // Prefetches (SSE, SSE2, 3DNOW only)
1644 


2004   void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2005   void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2006   void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2007   void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2008   void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2009   void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2010 
2011   // Logical shift right packed integers
2012   void psrlw(XMMRegister dst, int shift);
2013   void psrld(XMMRegister dst, int shift);
2014   void psrlq(XMMRegister dst, int shift);
2015   void psrlw(XMMRegister dst, XMMRegister shift);
2016   void psrld(XMMRegister dst, XMMRegister shift);
2017   void psrlq(XMMRegister dst, XMMRegister shift);
2018   void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2019   void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2020   void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2021   void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2022   void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2023   void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);


2024 
2025   // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2026   void psraw(XMMRegister dst, int shift);
2027   void psrad(XMMRegister dst, int shift);
2028   void psraw(XMMRegister dst, XMMRegister shift);
2029   void psrad(XMMRegister dst, XMMRegister shift);
2030   void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2031   void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2032   void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2033   void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2034 
2035   // And packed integers
2036   void pand(XMMRegister dst, XMMRegister src);
2037   void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2038   void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

2039 
2040   // Andn packed integers
2041   void pandn(XMMRegister dst, XMMRegister src);
2042 
2043   // Or packed integers
2044   void por(XMMRegister dst, XMMRegister src);
2045   void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2046   void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

2047 
2048   // Xor packed integers
2049   void pxor(XMMRegister dst, XMMRegister src);
2050   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2051   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2052   void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2053   void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2054 
2055 
2056   // vinserti forms
2057   void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2058   void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2059   void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2060   void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2061   void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2062 
2063   // vinsertf forms
2064   void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2065   void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2066   void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);


2093   void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2094   void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
2095   void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2096   void evpbroadcastw(XMMRegister dst, Address src, int vector_len);
2097   void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2098   void evpbroadcastd(XMMRegister dst, Address src, int vector_len);
2099   void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2100   void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
2101 
2102   // scalar single/double precision replicate
2103   void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2104   void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
2105   void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2106   void evpbroadcastsd(XMMRegister dst, Address src, int vector_len);
2107 
2108   // gpr sourced byte/word/dword/qword replicate
2109   void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2110   void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2111   void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2112   void evpbroadcastq(XMMRegister dst, Register src, int vector_len);


2113 
2114   // Carry-Less Multiplication Quadword
2115   void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2116   void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2117   void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2118   // AVX instruction which is used to clear upper 128 bits of YMM registers and
2119   // to avoid transaction penalty between AVX and SSE states. There is no
2120   // penalty if legacy SSE instructions are encoded using VEX prefix because
2121   // they always clear upper 128 bits. It should be used before calling
2122   // runtime code and native libraries.
2123   void vzeroupper();
2124 
2125   // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
2126   void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2127   void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2128   void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2129   void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2130   void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2131 
2132  protected:




 167   };
 168   static ScaleFactor times(int size) {
 169     assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
 170     if (size == 8)  return times_8;
 171     if (size == 4)  return times_4;
 172     if (size == 2)  return times_2;
 173     return times_1;
 174   }
 175   static int scale_size(ScaleFactor scale) {
 176     assert(scale != no_scale, "");
 177     assert(((1 << (int)times_1) == 1 &&
 178             (1 << (int)times_2) == 2 &&
 179             (1 << (int)times_4) == 4 &&
 180             (1 << (int)times_8) == 8), "");
 181     return (1 << (int)scale);
 182   }
 183 
 184  private:
 185   Register         _base;
 186   Register         _index;
 187   XMMRegister      _xmmindex;
 188   ScaleFactor      _scale;
 189   int              _disp;
 190   bool             _isxmmindex;
 191   RelocationHolder _rspec;
 192 
 193   // Easily misused constructors make them private
 194   // %%% can we make these go away?
 195   NOT_LP64(Address(address loc, RelocationHolder spec);)
 196   Address(int disp, address loc, relocInfo::relocType rtype);
 197   Address(int disp, address loc, RelocationHolder spec);
 198 
 199  public:
 200 
 201  int disp() { return _disp; }
 202   // creation
 203   Address()
 204     : _base(noreg),
 205       _index(noreg),
 206       _xmmindex(xnoreg),
 207       _scale(no_scale),
 208       _disp(0),
 209       _isxmmindex(false){
 210   }
 211 
 212   // No default displacement otherwise Register can be implicitly
 213   // converted to 0(Register) which is quite a different animal.
 214 
 215   Address(Register base, int disp)
 216     : _base(base),
 217       _index(noreg),
 218       _xmmindex(xnoreg),
 219       _scale(no_scale),
 220       _disp(disp),
 221       _isxmmindex(false){
 222   }
 223 
 224   Address(Register base, Register index, ScaleFactor scale, int disp = 0)
 225     : _base (base),
 226       _index(index),
 227       _xmmindex(xnoreg),
 228       _scale(scale),
 229       _disp (disp),
 230       _isxmmindex(false) {
 231     assert(!index->is_valid() == (scale == Address::no_scale),
 232            "inconsistent address");
 233   }
 234 
 235   Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
 236     : _base (base),
 237       _index(index.register_or_noreg()),
 238       _xmmindex(xnoreg),
 239       _scale(scale),
 240       _disp (disp + (index.constant_or_zero() * scale_size(scale))),
 241       _isxmmindex(false){
 242     if (!index.is_register())  scale = Address::no_scale;
 243     assert(!_index->is_valid() == (scale == Address::no_scale),
 244            "inconsistent address");
 245   }
 246 
 247   Address(Register base, XMMRegister index, ScaleFactor scale, int disp = 0)
 248     : _base (base),
 249       _index(noreg),
 250       _xmmindex(index),
 251       _scale(scale),
 252       _disp(disp),
 253       _isxmmindex(true) {
 254       assert(!index->is_valid() == (scale == Address::no_scale),
 255              "inconsistent address");
 256   }
 257 
 258   Address plus_disp(int disp) const {
 259     Address a = (*this);
 260     a._disp += disp;
 261     return a;
 262   }
 263   Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
 264     Address a = (*this);
 265     a._disp += disp.constant_or_zero() * scale_size(scale);
 266     if (disp.is_register()) {
 267       assert(!a.index()->is_valid(), "competing indexes");
 268       a._index = disp.as_register();
 269       a._scale = scale;
 270     }
 271     return a;
 272   }
 273   bool is_same_address(Address a) const {
 274     // disregard _rspec
 275     return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
 276   }
 277 
 278   // The following two overloads are used in connection with the
 279   // ByteSize type (see sizes.hpp).  They simplify the use of
 280   // ByteSize'd arguments in assembly code. Note that their equivalent
 281   // for the optimized build are the member functions with int disp
 282   // argument since ByteSize is mapped to an int type in that case.
 283   //
 284   // Note: DO NOT introduce similar overloaded functions for WordSize
 285   // arguments as in the optimized mode, both ByteSize and WordSize
 286   // are mapped to the same type and thus the compiler cannot make a
 287   // distinction anymore (=> compiler errors).
 288 
 289 #ifdef ASSERT
 290   Address(Register base, ByteSize disp)
 291     : _base(base),
 292       _index(noreg),
 293       _xmmindex(xnoreg),
 294       _scale(no_scale),
 295       _disp(in_bytes(disp)),
 296       _isxmmindex(false){
 297   }
 298 
 299   Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
 300     : _base(base),
 301       _index(index),
 302       _xmmindex(xnoreg),
 303       _scale(scale),
 304       _disp(in_bytes(disp)),
 305       _isxmmindex(false){
 306     assert(!index->is_valid() == (scale == Address::no_scale),
 307            "inconsistent address");
 308   }

 309   Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
 310     : _base (base),
 311       _index(index.register_or_noreg()),
 312       _xmmindex(xnoreg),
 313       _scale(scale),
 314       _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))),
 315       _isxmmindex(false) {
 316     if (!index.is_register())  scale = Address::no_scale;
 317     assert(!_index->is_valid() == (scale == Address::no_scale),
 318            "inconsistent address");
 319   }
 320 
 321 #endif // ASSERT
 322 
 323   // accessors
 324   bool        uses(Register reg) const { return _base == reg || _index == reg; }
 325   Register    base()             const { return _base;  }
 326   Register    index()            const { return _index; }
 327   XMMRegister xmmindex()         const { return _xmmindex; }
 328   ScaleFactor scale()            const { return _scale; }
 329   int         disp()             const { return _disp;  }
 330   bool        isxmmindex()       const { return _isxmmindex; }
 331 
 332   // Convert the raw encoding form into the form expected by the constructor for
 333   // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 334   // that to noreg for the Address constructor.
 335   static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
 336 
 337   static Address make_array(ArrayAddress);
 338 
 339  private:
 340   bool base_needs_rex() const {
 341     return _base != noreg && _base->encoding() >= 8;
 342   }
 343 
 344   bool index_needs_rex() const {
 345     return _index != noreg &&_index->encoding() >= 8;
 346   }
 347 
 348   bool xmmindex_needs_rex() const {
 349     return _xmmindex != xnoreg && _xmmindex->encoding() >= 8;
 350   }
 351 
 352   relocInfo::relocType reloc() const { return _rspec.type(); }
 353 
 354   friend class Assembler;
 355   friend class MacroAssembler;
 356   friend class LIR_Assembler; // base/index/scale/disp
 357 };
 358 
 359 //
 360 // AddressLiteral has been split out from Address because operands of this type
 361 // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
 362 // the few instructions that need to deal with address literals are unique and the
 363 // MacroAssembler does not have to implement every instruction in the Assembler
 364 // in order to search for address literals that may need special handling depending
 365 // on the instruction and the platform. As small step on the way to merging i486/amd64
 366 // directories.
 367 //
 368 class AddressLiteral {
 369   friend class ArrayAddress;
 370   RelocationHolder _rspec;
 371   // Typically we use AddressLiterals we want to use their rval


 698 
 699   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
 700                              VexOpcode opc, InstructionAttr *attributes);
 701 
 702   // Helper functions for groups of instructions
 703   void emit_arith_b(int op1, int op2, Register dst, int imm8);
 704 
 705   void emit_arith(int op1, int op2, Register dst, int32_t imm32);
 706   // Force generation of a 4 byte immediate value even if it fits into 8bit
 707   void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
 708   void emit_arith(int op1, int op2, Register dst, Register src);
 709 
 710   bool emit_compressed_disp_byte(int &disp);
 711 
 712   void emit_operand(Register reg,
 713                     Register base, Register index, Address::ScaleFactor scale,
 714                     int disp,
 715                     RelocationHolder const& rspec,
 716                     int rip_relative_correction = 0);
 717 
 718   void emit_operand(XMMRegister reg, Register base, XMMRegister index,
 719                     Address::ScaleFactor scale,
 720                     int disp, RelocationHolder const& rspec);
 721 
 722   void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
 723 
 724   // operands that only take the original 32bit registers
 725   void emit_operand32(Register reg, Address adr);
 726 
 727   void emit_operand(XMMRegister reg,
 728                     Register base, Register index, Address::ScaleFactor scale,
 729                     int disp,
 730                     RelocationHolder const& rspec);
 731 
 732   void emit_operand(XMMRegister reg, Address adr);
 733 
 734   void emit_operand(MMXRegister reg, Address adr);
 735 
 736   // workaround gcc (3.2.1-7) bug
 737   void emit_operand(Address adr, MMXRegister reg);
 738 
 739 
 740   // Immediate-to-memory forms
 741   void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);


1570   void orl(Register dst, int32_t imm32);
1571   void orl(Register dst, Address src);
1572   void orl(Register dst, Register src);
1573   void orl(Address dst, Register src);
1574 
1575   void orq(Address dst, int32_t imm32);
1576   void orq(Register dst, int32_t imm32);
1577   void orq(Register dst, Address src);
1578   void orq(Register dst, Register src);
1579 
1580   // Pack with unsigned saturation
1581   void packuswb(XMMRegister dst, XMMRegister src);
1582   void packuswb(XMMRegister dst, Address src);
1583   void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1584 
1585   // Pemutation of 64bit words
1586   void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1587   void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1588   void vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8);
1589   void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1590   void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1591 
1592   void pause();
1593 
1594   // Undefined Instruction
1595   void ud2();
1596 
1597   // SSE4.2 string instructions
1598   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1599   void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1600 
1601   void pcmpeqb(XMMRegister dst, XMMRegister src);
1602   void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1603   void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1604   void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1605   void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1606 
1607   void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1608   void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1609 
1610   void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);


1637   void pextrb(Address dst, XMMRegister src, int imm8);
1638   // SSE 2 extract
1639   void pextrw(Register dst, XMMRegister src, int imm8);
1640   void pextrw(Address dst, XMMRegister src, int imm8);
1641 
1642   // SSE 4.1 insert
1643   void pinsrd(XMMRegister dst, Register src, int imm8);
1644   void pinsrq(XMMRegister dst, Register src, int imm8);
1645   void pinsrd(XMMRegister dst, Address src, int imm8);
1646   void pinsrq(XMMRegister dst, Address src, int imm8);
1647   void pinsrb(XMMRegister dst, Address src, int imm8);
1648   // SSE 2 insert
1649   void pinsrw(XMMRegister dst, Register src, int imm8);
1650   void pinsrw(XMMRegister dst, Address src, int imm8);
1651 
1652   // SSE4.1 packed move
1653   void pmovzxbw(XMMRegister dst, XMMRegister src);
1654   void pmovzxbw(XMMRegister dst, Address src);
1655 
1656   void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1657   void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1658   void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1659 
1660   void evpmovwb(Address dst, XMMRegister src, int vector_len);
1661   void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1662 
1663   void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1664 
1665   void evpmovdb(Address dst, XMMRegister src, int vector_len);
1666 
1667 #ifndef _LP64 // no 32bit push/pop on amd64
1668   void popl(Address dst);
1669 #endif
1670 
1671 #ifdef _LP64
1672   void popq(Address dst);
1673 #endif
1674 
1675   void popcntl(Register dst, Address src);
1676   void popcntl(Register dst, Register src);
1677 
1678   void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1679 
1680 #ifdef _LP64
1681   void popcntq(Register dst, Address src);
1682   void popcntq(Register dst, Register src);
1683 #endif
1684 
1685   // Prefetches (SSE, SSE2, 3DNOW only)
1686 


2046   void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2047   void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2048   void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2049   void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2050   void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2051   void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2052 
2053   // Logical shift right packed integers
2054   void psrlw(XMMRegister dst, int shift);
2055   void psrld(XMMRegister dst, int shift);
2056   void psrlq(XMMRegister dst, int shift);
2057   void psrlw(XMMRegister dst, XMMRegister shift);
2058   void psrld(XMMRegister dst, XMMRegister shift);
2059   void psrlq(XMMRegister dst, XMMRegister shift);
2060   void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2061   void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2062   void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2063   void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2064   void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2065   void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2066   void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2067   void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2068 
2069   // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2070   void psraw(XMMRegister dst, int shift);
2071   void psrad(XMMRegister dst, int shift);
2072   void psraw(XMMRegister dst, XMMRegister shift);
2073   void psrad(XMMRegister dst, XMMRegister shift);
2074   void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2075   void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2076   void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2077   void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2078 
2079   // And packed integers
2080   void pand(XMMRegister dst, XMMRegister src);
2081   void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2082   void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2083   void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2084 
2085   // Andn packed integers
2086   void pandn(XMMRegister dst, XMMRegister src);
2087 
2088   // Or packed integers
2089   void por(XMMRegister dst, XMMRegister src);
2090   void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2091   void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2092   void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2093 
2094   // Xor packed integers
2095   void pxor(XMMRegister dst, XMMRegister src);
2096   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2097   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2098   void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2099   void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2100 
2101 
2102   // vinserti forms
2103   void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2104   void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2105   void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2106   void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2107   void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2108 
2109   // vinsertf forms
2110   void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2111   void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2112   void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);


2139   void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2140   void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
2141   void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2142   void evpbroadcastw(XMMRegister dst, Address src, int vector_len);
2143   void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2144   void evpbroadcastd(XMMRegister dst, Address src, int vector_len);
2145   void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2146   void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
2147 
2148   // scalar single/double precision replicate
2149   void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2150   void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
2151   void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2152   void evpbroadcastsd(XMMRegister dst, Address src, int vector_len);
2153 
2154   // gpr sourced byte/word/dword/qword replicate
2155   void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2156   void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2157   void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2158   void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2159 
2160   void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len);
2161 
2162   // Carry-Less Multiplication Quadword
2163   void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2164   void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2165   void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2166   // AVX instruction which is used to clear upper 128 bits of YMM registers and
2167   // to avoid transaction penalty between AVX and SSE states. There is no
2168   // penalty if legacy SSE instructions are encoded using VEX prefix because
2169   // they always clear upper 128 bits. It should be used before calling
2170   // runtime code and native libraries.
2171   void vzeroupper();
2172 
2173   // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
2174   void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2175   void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2176   void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2177   void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2178   void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2179 
2180  protected:


< prev index next >