167 };
168 static ScaleFactor times(int size) {
169 assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
170 if (size == 8) return times_8;
171 if (size == 4) return times_4;
172 if (size == 2) return times_2;
173 return times_1;
174 }
175 static int scale_size(ScaleFactor scale) {
176 assert(scale != no_scale, "");
177 assert(((1 << (int)times_1) == 1 &&
178 (1 << (int)times_2) == 2 &&
179 (1 << (int)times_4) == 4 &&
180 (1 << (int)times_8) == 8), "");
181 return (1 << (int)scale);
182 }
183
184 private:
185 Register _base;
186 Register _index;
187 ScaleFactor _scale;
188 int _disp;
189 RelocationHolder _rspec;
190
191 // Easily misused constructors make them private
192 // %%% can we make these go away?
193 NOT_LP64(Address(address loc, RelocationHolder spec);)
194 Address(int disp, address loc, relocInfo::relocType rtype);
195 Address(int disp, address loc, RelocationHolder spec);
196
197 public:
198
199 int disp() { return _disp; }
200 // creation
201 Address()
202 : _base(noreg),
203 _index(noreg),
204 _scale(no_scale),
205 _disp(0) {
206 }
207
208 // No default displacement otherwise Register can be implicitly
209 // converted to 0(Register) which is quite a different animal.
210
211 Address(Register base, int disp)
212 : _base(base),
213 _index(noreg),
214 _scale(no_scale),
215 _disp(disp) {
216 }
217
218 Address(Register base, Register index, ScaleFactor scale, int disp = 0)
219 : _base (base),
220 _index(index),
221 _scale(scale),
222 _disp (disp) {
223 assert(!index->is_valid() == (scale == Address::no_scale),
224 "inconsistent address");
225 }
226
227 Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
228 : _base (base),
229 _index(index.register_or_noreg()),
230 _scale(scale),
231 _disp (disp + (index.constant_or_zero() * scale_size(scale))) {
232 if (!index.is_register()) scale = Address::no_scale;
233 assert(!_index->is_valid() == (scale == Address::no_scale),
234 "inconsistent address");
235 }
236
237 Address plus_disp(int disp) const {
238 Address a = (*this);
239 a._disp += disp;
240 return a;
241 }
242 Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
243 Address a = (*this);
244 a._disp += disp.constant_or_zero() * scale_size(scale);
245 if (disp.is_register()) {
246 assert(!a.index()->is_valid(), "competing indexes");
247 a._index = disp.as_register();
248 a._scale = scale;
249 }
250 return a;
251 }
252 bool is_same_address(Address a) const {
253 // disregard _rspec
254 return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
255 }
256
257 // The following two overloads are used in connection with the
258 // ByteSize type (see sizes.hpp). They simplify the use of
259 // ByteSize'd arguments in assembly code. Note that their equivalent
260 // for the optimized build are the member functions with int disp
261 // argument since ByteSize is mapped to an int type in that case.
262 //
263 // Note: DO NOT introduce similar overloaded functions for WordSize
264 // arguments as in the optimized mode, both ByteSize and WordSize
265 // are mapped to the same type and thus the compiler cannot make a
266 // distinction anymore (=> compiler errors).
267
268 #ifdef ASSERT
269 Address(Register base, ByteSize disp)
270 : _base(base),
271 _index(noreg),
272 _scale(no_scale),
273 _disp(in_bytes(disp)) {
274 }
275
276 Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
277 : _base(base),
278 _index(index),
279 _scale(scale),
280 _disp(in_bytes(disp)) {
281 assert(!index->is_valid() == (scale == Address::no_scale),
282 "inconsistent address");
283 }
284
285 Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
286 : _base (base),
287 _index(index.register_or_noreg()),
288 _scale(scale),
289 _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) {
290 if (!index.is_register()) scale = Address::no_scale;
291 assert(!_index->is_valid() == (scale == Address::no_scale),
292 "inconsistent address");
293 }
294
295 #endif // ASSERT
296
297 // accessors
298 bool uses(Register reg) const { return _base == reg || _index == reg; }
299 Register base() const { return _base; }
300 Register index() const { return _index; }
301 ScaleFactor scale() const { return _scale; }
302 int disp() const { return _disp; }
303
304 // Convert the raw encoding form into the form expected by the constructor for
305 // Address. An index of 4 (rsp) corresponds to having no index, so convert
306 // that to noreg for the Address constructor.
307 static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
308
309 static Address make_array(ArrayAddress);
310
311 private:
312 bool base_needs_rex() const {
313 return _base != noreg && _base->encoding() >= 8;
314 }
315
316 bool index_needs_rex() const {
317 return _index != noreg &&_index->encoding() >= 8;
318 }
319
320 relocInfo::relocType reloc() const { return _rspec.type(); }
321
322 friend class Assembler;
323 friend class MacroAssembler;
324 friend class LIR_Assembler; // base/index/scale/disp
325 };
326
327 //
328 // AddressLiteral has been split out from Address because operands of this type
329 // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
330 // the few instructions that need to deal with address literals are unique and the
331 // MacroAssembler does not have to implement every instruction in the Assembler
332 // in order to search for address literals that may need special handling depending
333 // on the instruction and the platform. As small step on the way to merging i486/amd64
334 // directories.
335 //
336 class AddressLiteral {
337 friend class ArrayAddress;
338 RelocationHolder _rspec;
339 // Typically we use AddressLiterals we want to use their rval
666
667 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
668 VexOpcode opc, InstructionAttr *attributes);
669
670 // Helper functions for groups of instructions
671 void emit_arith_b(int op1, int op2, Register dst, int imm8);
672
673 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
674 // Force generation of a 4 byte immediate value even if it fits into 8bit
675 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
676 void emit_arith(int op1, int op2, Register dst, Register src);
677
678 bool emit_compressed_disp_byte(int &disp);
679
680 void emit_operand(Register reg,
681 Register base, Register index, Address::ScaleFactor scale,
682 int disp,
683 RelocationHolder const& rspec,
684 int rip_relative_correction = 0);
685
686 void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
687
688 // operands that only take the original 32bit registers
689 void emit_operand32(Register reg, Address adr);
690
691 void emit_operand(XMMRegister reg,
692 Register base, Register index, Address::ScaleFactor scale,
693 int disp,
694 RelocationHolder const& rspec);
695
696 void emit_operand(XMMRegister reg, Address adr);
697
698 void emit_operand(MMXRegister reg, Address adr);
699
700 // workaround gcc (3.2.1-7) bug
701 void emit_operand(Address adr, MMXRegister reg);
702
703
704 // Immediate-to-memory forms
705 void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
1534 void orl(Register dst, int32_t imm32);
1535 void orl(Register dst, Address src);
1536 void orl(Register dst, Register src);
1537 void orl(Address dst, Register src);
1538
1539 void orq(Address dst, int32_t imm32);
1540 void orq(Register dst, int32_t imm32);
1541 void orq(Register dst, Address src);
1542 void orq(Register dst, Register src);
1543
1544 // Pack with unsigned saturation
1545 void packuswb(XMMRegister dst, XMMRegister src);
1546 void packuswb(XMMRegister dst, Address src);
1547 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1548
1549 // Pemutation of 64bit words
1550 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1551 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1552 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1553 void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1554
1555 void pause();
1556
1557 // Undefined Instruction
1558 void ud2();
1559
1560 // SSE4.2 string instructions
1561 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1562 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1563
1564 void pcmpeqb(XMMRegister dst, XMMRegister src);
1565 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1566 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1567 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1568 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1569
1570 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1571 void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1572
1573 void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1600 void pextrb(Address dst, XMMRegister src, int imm8);
1601 // SSE 2 extract
1602 void pextrw(Register dst, XMMRegister src, int imm8);
1603 void pextrw(Address dst, XMMRegister src, int imm8);
1604
1605 // SSE 4.1 insert
1606 void pinsrd(XMMRegister dst, Register src, int imm8);
1607 void pinsrq(XMMRegister dst, Register src, int imm8);
1608 void pinsrd(XMMRegister dst, Address src, int imm8);
1609 void pinsrq(XMMRegister dst, Address src, int imm8);
1610 void pinsrb(XMMRegister dst, Address src, int imm8);
1611 // SSE 2 insert
1612 void pinsrw(XMMRegister dst, Register src, int imm8);
1613 void pinsrw(XMMRegister dst, Address src, int imm8);
1614
1615 // SSE4.1 packed move
1616 void pmovzxbw(XMMRegister dst, XMMRegister src);
1617 void pmovzxbw(XMMRegister dst, Address src);
1618
1619 void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1620 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1621
1622 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1623 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1624
1625 #ifndef _LP64 // no 32bit push/pop on amd64
1626 void popl(Address dst);
1627 #endif
1628
1629 #ifdef _LP64
1630 void popq(Address dst);
1631 #endif
1632
1633 void popcntl(Register dst, Address src);
1634 void popcntl(Register dst, Register src);
1635
1636 void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1637
1638 #ifdef _LP64
1639 void popcntq(Register dst, Address src);
1640 void popcntq(Register dst, Register src);
1641 #endif
1642
1643 // Prefetches (SSE, SSE2, 3DNOW only)
1644
2004 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2005 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2006 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2007 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2008 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2009 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2010
2011 // Logical shift right packed integers
2012 void psrlw(XMMRegister dst, int shift);
2013 void psrld(XMMRegister dst, int shift);
2014 void psrlq(XMMRegister dst, int shift);
2015 void psrlw(XMMRegister dst, XMMRegister shift);
2016 void psrld(XMMRegister dst, XMMRegister shift);
2017 void psrlq(XMMRegister dst, XMMRegister shift);
2018 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2019 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2020 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2021 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2022 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2023 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2024
2025 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2026 void psraw(XMMRegister dst, int shift);
2027 void psrad(XMMRegister dst, int shift);
2028 void psraw(XMMRegister dst, XMMRegister shift);
2029 void psrad(XMMRegister dst, XMMRegister shift);
2030 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2031 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2032 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2033 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2034
2035 // And packed integers
2036 void pand(XMMRegister dst, XMMRegister src);
2037 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2038 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2039
2040 // Andn packed integers
2041 void pandn(XMMRegister dst, XMMRegister src);
2042
2043 // Or packed integers
2044 void por(XMMRegister dst, XMMRegister src);
2045 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2046 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2047
2048 // Xor packed integers
2049 void pxor(XMMRegister dst, XMMRegister src);
2050 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2051 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2052 void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2053 void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2054
2055
2056 // vinserti forms
2057 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2058 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2059 void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2060 void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2061 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2062
2063 // vinsertf forms
2064 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2065 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2066 void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2093 void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2094 void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
2095 void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2096 void evpbroadcastw(XMMRegister dst, Address src, int vector_len);
2097 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2098 void evpbroadcastd(XMMRegister dst, Address src, int vector_len);
2099 void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2100 void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
2101
2102 // scalar single/double precision replicate
2103 void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2104 void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
2105 void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2106 void evpbroadcastsd(XMMRegister dst, Address src, int vector_len);
2107
2108 // gpr sourced byte/word/dword/qword replicate
2109 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2110 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2111 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2112 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2113
2114 // Carry-Less Multiplication Quadword
2115 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2116 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2117 void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2118 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2119 // to avoid transaction penalty between AVX and SSE states. There is no
2120 // penalty if legacy SSE instructions are encoded using VEX prefix because
2121 // they always clear upper 128 bits. It should be used before calling
2122 // runtime code and native libraries.
2123 void vzeroupper();
2124
2125 // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
2126 void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2127 void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2128 void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2129 void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2130 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2131
2132 protected:
|
167 };
168 static ScaleFactor times(int size) {
169 assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
170 if (size == 8) return times_8;
171 if (size == 4) return times_4;
172 if (size == 2) return times_2;
173 return times_1;
174 }
175 static int scale_size(ScaleFactor scale) {
176 assert(scale != no_scale, "");
177 assert(((1 << (int)times_1) == 1 &&
178 (1 << (int)times_2) == 2 &&
179 (1 << (int)times_4) == 4 &&
180 (1 << (int)times_8) == 8), "");
181 return (1 << (int)scale);
182 }
183
184 private:
185 Register _base;
186 Register _index;
187 XMMRegister _xmmindex;
188 ScaleFactor _scale;
189 int _disp;
190 bool _isxmmindex;
191 RelocationHolder _rspec;
192
193 // Easily misused constructors make them private
194 // %%% can we make these go away?
195 NOT_LP64(Address(address loc, RelocationHolder spec);)
196 Address(int disp, address loc, relocInfo::relocType rtype);
197 Address(int disp, address loc, RelocationHolder spec);
198
199 public:
200
201 int disp() { return _disp; }
202 // creation
203 Address()
204 : _base(noreg),
205 _index(noreg),
206 _xmmindex(xnoreg),
207 _scale(no_scale),
208 _disp(0),
209 _isxmmindex(false){
210 }
211
212 // No default displacement otherwise Register can be implicitly
213 // converted to 0(Register) which is quite a different animal.
214
215 Address(Register base, int disp)
216 : _base(base),
217 _index(noreg),
218 _xmmindex(xnoreg),
219 _scale(no_scale),
220 _disp(disp),
221 _isxmmindex(false){
222 }
223
224 Address(Register base, Register index, ScaleFactor scale, int disp = 0)
225 : _base (base),
226 _index(index),
227 _xmmindex(xnoreg),
228 _scale(scale),
229 _disp (disp),
230 _isxmmindex(false) {
231 assert(!index->is_valid() == (scale == Address::no_scale),
232 "inconsistent address");
233 }
234
235 Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
236 : _base (base),
237 _index(index.register_or_noreg()),
238 _xmmindex(xnoreg),
239 _scale(scale),
240 _disp (disp + (index.constant_or_zero() * scale_size(scale))),
241 _isxmmindex(false){
242 if (!index.is_register()) scale = Address::no_scale;
243 assert(!_index->is_valid() == (scale == Address::no_scale),
244 "inconsistent address");
245 }
246
247 Address(Register base, XMMRegister index, ScaleFactor scale, int disp = 0)
248 : _base (base),
249 _index(noreg),
250 _xmmindex(index),
251 _scale(scale),
252 _disp(disp),
253 _isxmmindex(true) {
254 assert(!index->is_valid() == (scale == Address::no_scale),
255 "inconsistent address");
256 }
257
258 Address plus_disp(int disp) const {
259 Address a = (*this);
260 a._disp += disp;
261 return a;
262 }
263 Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
264 Address a = (*this);
265 a._disp += disp.constant_or_zero() * scale_size(scale);
266 if (disp.is_register()) {
267 assert(!a.index()->is_valid(), "competing indexes");
268 a._index = disp.as_register();
269 a._scale = scale;
270 }
271 return a;
272 }
273 bool is_same_address(Address a) const {
274 // disregard _rspec
275 return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
276 }
277
278 // The following two overloads are used in connection with the
279 // ByteSize type (see sizes.hpp). They simplify the use of
280 // ByteSize'd arguments in assembly code. Note that their equivalent
281 // for the optimized build are the member functions with int disp
282 // argument since ByteSize is mapped to an int type in that case.
283 //
284 // Note: DO NOT introduce similar overloaded functions for WordSize
285 // arguments as in the optimized mode, both ByteSize and WordSize
286 // are mapped to the same type and thus the compiler cannot make a
287 // distinction anymore (=> compiler errors).
288
289 #ifdef ASSERT
290 Address(Register base, ByteSize disp)
291 : _base(base),
292 _index(noreg),
293 _xmmindex(xnoreg),
294 _scale(no_scale),
295 _disp(in_bytes(disp)),
296 _isxmmindex(false){
297 }
298
299 Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
300 : _base(base),
301 _index(index),
302 _xmmindex(xnoreg),
303 _scale(scale),
304 _disp(in_bytes(disp)),
305 _isxmmindex(false){
306 assert(!index->is_valid() == (scale == Address::no_scale),
307 "inconsistent address");
308 }
309 Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
310 : _base (base),
311 _index(index.register_or_noreg()),
312 _xmmindex(xnoreg),
313 _scale(scale),
314 _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))),
315 _isxmmindex(false) {
316 if (!index.is_register()) scale = Address::no_scale;
317 assert(!_index->is_valid() == (scale == Address::no_scale),
318 "inconsistent address");
319 }
320
321 #endif // ASSERT
322
323 // accessors
324 bool uses(Register reg) const { return _base == reg || _index == reg; }
325 Register base() const { return _base; }
326 Register index() const { return _index; }
327 XMMRegister xmmindex() const { return _xmmindex; }
328 ScaleFactor scale() const { return _scale; }
329 int disp() const { return _disp; }
330 bool isxmmindex() const { return _isxmmindex; }
331
332 // Convert the raw encoding form into the form expected by the constructor for
333 // Address. An index of 4 (rsp) corresponds to having no index, so convert
334 // that to noreg for the Address constructor.
335 static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
336
337 static Address make_array(ArrayAddress);
338
339 private:
340 bool base_needs_rex() const {
341 return _base != noreg && _base->encoding() >= 8;
342 }
343
344 bool index_needs_rex() const {
345 return _index != noreg &&_index->encoding() >= 8;
346 }
347
348 bool xmmindex_needs_rex() const {
349 return _xmmindex != xnoreg && _xmmindex->encoding() >= 8;
350 }
351
352 relocInfo::relocType reloc() const { return _rspec.type(); }
353
354 friend class Assembler;
355 friend class MacroAssembler;
356 friend class LIR_Assembler; // base/index/scale/disp
357 };
358
359 //
360 // AddressLiteral has been split out from Address because operands of this type
361 // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
362 // the few instructions that need to deal with address literals are unique and the
363 // MacroAssembler does not have to implement every instruction in the Assembler
364 // in order to search for address literals that may need special handling depending
365 // on the instruction and the platform. As small step on the way to merging i486/amd64
366 // directories.
367 //
368 class AddressLiteral {
369 friend class ArrayAddress;
370 RelocationHolder _rspec;
371 // Typically we use AddressLiterals we want to use their rval
698
699 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
700 VexOpcode opc, InstructionAttr *attributes);
701
702 // Helper functions for groups of instructions
703 void emit_arith_b(int op1, int op2, Register dst, int imm8);
704
705 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
706 // Force generation of a 4 byte immediate value even if it fits into 8bit
707 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
708 void emit_arith(int op1, int op2, Register dst, Register src);
709
710 bool emit_compressed_disp_byte(int &disp);
711
712 void emit_operand(Register reg,
713 Register base, Register index, Address::ScaleFactor scale,
714 int disp,
715 RelocationHolder const& rspec,
716 int rip_relative_correction = 0);
717
718 void emit_operand(XMMRegister reg, Register base, XMMRegister index, Address::ScaleFactor scale,
719 int disp, RelocationHolder const& rspec);
720
721 void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
722
723 // operands that only take the original 32bit registers
724 void emit_operand32(Register reg, Address adr);
725
726 void emit_operand(XMMRegister reg,
727 Register base, Register index, Address::ScaleFactor scale,
728 int disp,
729 RelocationHolder const& rspec);
730
731 void emit_operand(XMMRegister reg, Address adr);
732
733 void emit_operand(MMXRegister reg, Address adr);
734
735 // workaround gcc (3.2.1-7) bug
736 void emit_operand(Address adr, MMXRegister reg);
737
738
739 // Immediate-to-memory forms
740 void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
1569 void orl(Register dst, int32_t imm32);
1570 void orl(Register dst, Address src);
1571 void orl(Register dst, Register src);
1572 void orl(Address dst, Register src);
1573
1574 void orq(Address dst, int32_t imm32);
1575 void orq(Register dst, int32_t imm32);
1576 void orq(Register dst, Address src);
1577 void orq(Register dst, Register src);
1578
1579 // Pack with unsigned saturation
1580 void packuswb(XMMRegister dst, XMMRegister src);
1581 void packuswb(XMMRegister dst, Address src);
1582 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1583
1584 // Pemutation of 64bit words
1585 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1586 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1587 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1588 void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1589 void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1590
1591 void pause();
1592
1593 // Undefined Instruction
1594 void ud2();
1595
1596 // SSE4.2 string instructions
1597 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1598 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1599
1600 void pcmpeqb(XMMRegister dst, XMMRegister src);
1601 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1602 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1603 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1604 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1605
1606 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1607 void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1608
1609 void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1636 void pextrb(Address dst, XMMRegister src, int imm8);
1637 // SSE 2 extract
1638 void pextrw(Register dst, XMMRegister src, int imm8);
1639 void pextrw(Address dst, XMMRegister src, int imm8);
1640
1641 // SSE 4.1 insert
1642 void pinsrd(XMMRegister dst, Register src, int imm8);
1643 void pinsrq(XMMRegister dst, Register src, int imm8);
1644 void pinsrd(XMMRegister dst, Address src, int imm8);
1645 void pinsrq(XMMRegister dst, Address src, int imm8);
1646 void pinsrb(XMMRegister dst, Address src, int imm8);
1647 // SSE 2 insert
1648 void pinsrw(XMMRegister dst, Register src, int imm8);
1649 void pinsrw(XMMRegister dst, Address src, int imm8);
1650
1651 // SSE4.1 packed move
1652 void pmovzxbw(XMMRegister dst, XMMRegister src);
1653 void pmovzxbw(XMMRegister dst, Address src);
1654
1655 void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1656 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1657 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1658
1659 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1660 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1661
1662 void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1663
1664 void evpmovdb(Address dst, XMMRegister src, int vector_len);
1665
1666 #ifndef _LP64 // no 32bit push/pop on amd64
1667 void popl(Address dst);
1668 #endif
1669
1670 #ifdef _LP64
1671 void popq(Address dst);
1672 #endif
1673
1674 void popcntl(Register dst, Address src);
1675 void popcntl(Register dst, Register src);
1676
1677 void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1678
1679 #ifdef _LP64
1680 void popcntq(Register dst, Address src);
1681 void popcntq(Register dst, Register src);
1682 #endif
1683
1684 // Prefetches (SSE, SSE2, 3DNOW only)
1685
2045 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2046 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2047 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2048 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2049 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2050 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2051
2052 // Logical shift right packed integers
2053 void psrlw(XMMRegister dst, int shift);
2054 void psrld(XMMRegister dst, int shift);
2055 void psrlq(XMMRegister dst, int shift);
2056 void psrlw(XMMRegister dst, XMMRegister shift);
2057 void psrld(XMMRegister dst, XMMRegister shift);
2058 void psrlq(XMMRegister dst, XMMRegister shift);
2059 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2060 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2061 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2062 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2063 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2064 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2065 void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2066 void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2067
2068 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2069 void psraw(XMMRegister dst, int shift);
2070 void psrad(XMMRegister dst, int shift);
2071 void psraw(XMMRegister dst, XMMRegister shift);
2072 void psrad(XMMRegister dst, XMMRegister shift);
2073 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2074 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2075 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2076 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2077
2078 // And packed integers
2079 void pand(XMMRegister dst, XMMRegister src);
2080 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2081 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2082 void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2083
2084 // Andn packed integers
2085 void pandn(XMMRegister dst, XMMRegister src);
2086
2087 // Or packed integers
2088 void por(XMMRegister dst, XMMRegister src);
2089 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2090 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2091 void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2092
2093 // Xor packed integers
2094 void pxor(XMMRegister dst, XMMRegister src);
2095 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2096 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2097 void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2098 void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2099
2100
2101 // vinserti forms
2102 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2103 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2104 void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2105 void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2106 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2107
2108 // vinsertf forms
2109 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2110 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2111 void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2138 void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2139 void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
2140 void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2141 void evpbroadcastw(XMMRegister dst, Address src, int vector_len);
2142 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2143 void evpbroadcastd(XMMRegister dst, Address src, int vector_len);
2144 void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2145 void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
2146
2147 // scalar single/double precision replicate
2148 void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2149 void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
2150 void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2151 void evpbroadcastsd(XMMRegister dst, Address src, int vector_len);
2152
2153 // gpr sourced byte/word/dword/qword replicate
2154 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2155 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2156 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2157 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2158
2159 void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len);
2160
2161 // Carry-Less Multiplication Quadword
2162 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2163 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2164 void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2165 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2166 // to avoid transaction penalty between AVX and SSE states. There is no
2167 // penalty if legacy SSE instructions are encoded using VEX prefix because
2168 // they always clear upper 128 bits. It should be used before calling
2169 // runtime code and native libraries.
2170 void vzeroupper();
2171
2172 // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
2173 void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2174 void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2175 void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2176 void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2177 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2178
2179 protected:
|