167 };
168 static ScaleFactor times(int size) {
169 assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
170 if (size == 8) return times_8;
171 if (size == 4) return times_4;
172 if (size == 2) return times_2;
173 return times_1;
174 }
175 static int scale_size(ScaleFactor scale) {
176 assert(scale != no_scale, "");
177 assert(((1 << (int)times_1) == 1 &&
178 (1 << (int)times_2) == 2 &&
179 (1 << (int)times_4) == 4 &&
180 (1 << (int)times_8) == 8), "");
181 return (1 << (int)scale);
182 }
183
184 private:
185 Register _base;
186 Register _index;
187 ScaleFactor _scale;
188 int _disp;
189 RelocationHolder _rspec;
190
191 // Easily misused constructors make them private
192 // %%% can we make these go away?
193 NOT_LP64(Address(address loc, RelocationHolder spec);)
194 Address(int disp, address loc, relocInfo::relocType rtype);
195 Address(int disp, address loc, RelocationHolder spec);
196
197 public:
198
199 int disp() { return _disp; }
200 // creation
201 Address()
202 : _base(noreg),
203 _index(noreg),
204 _scale(no_scale),
205 _disp(0) {
206 }
207
208 // No default displacement otherwise Register can be implicitly
209 // converted to 0(Register) which is quite a different animal.
210
211 Address(Register base, int disp)
212 : _base(base),
213 _index(noreg),
214 _scale(no_scale),
215 _disp(disp) {
216 }
217
218 Address(Register base, Register index, ScaleFactor scale, int disp = 0)
219 : _base (base),
220 _index(index),
221 _scale(scale),
222 _disp (disp) {
223 assert(!index->is_valid() == (scale == Address::no_scale),
224 "inconsistent address");
225 }
226
227 Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
228 : _base (base),
229 _index(index.register_or_noreg()),
230 _scale(scale),
231 _disp (disp + (index.constant_or_zero() * scale_size(scale))) {
232 if (!index.is_register()) scale = Address::no_scale;
233 assert(!_index->is_valid() == (scale == Address::no_scale),
234 "inconsistent address");
235 }
236
237 Address plus_disp(int disp) const {
238 Address a = (*this);
239 a._disp += disp;
240 return a;
241 }
242 Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
243 Address a = (*this);
244 a._disp += disp.constant_or_zero() * scale_size(scale);
245 if (disp.is_register()) {
246 assert(!a.index()->is_valid(), "competing indexes");
247 a._index = disp.as_register();
248 a._scale = scale;
249 }
250 return a;
251 }
252 bool is_same_address(Address a) const {
253 // disregard _rspec
254 return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
255 }
256
257 // The following two overloads are used in connection with the
258 // ByteSize type (see sizes.hpp). They simplify the use of
259 // ByteSize'd arguments in assembly code. Note that their equivalent
260 // for the optimized build are the member functions with int disp
261 // argument since ByteSize is mapped to an int type in that case.
262 //
263 // Note: DO NOT introduce similar overloaded functions for WordSize
264 // arguments as in the optimized mode, both ByteSize and WordSize
265 // are mapped to the same type and thus the compiler cannot make a
266 // distinction anymore (=> compiler errors).
267
268 #ifdef ASSERT
269 Address(Register base, ByteSize disp)
270 : _base(base),
271 _index(noreg),
272 _scale(no_scale),
273 _disp(in_bytes(disp)) {
274 }
275
276 Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
277 : _base(base),
278 _index(index),
279 _scale(scale),
280 _disp(in_bytes(disp)) {
281 assert(!index->is_valid() == (scale == Address::no_scale),
282 "inconsistent address");
283 }
284
285 Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
286 : _base (base),
287 _index(index.register_or_noreg()),
288 _scale(scale),
289 _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) {
290 if (!index.is_register()) scale = Address::no_scale;
291 assert(!_index->is_valid() == (scale == Address::no_scale),
292 "inconsistent address");
293 }
294
295 #endif // ASSERT
296
297 // accessors
298 bool uses(Register reg) const { return _base == reg || _index == reg; }
299 Register base() const { return _base; }
300 Register index() const { return _index; }
301 ScaleFactor scale() const { return _scale; }
302 int disp() const { return _disp; }
303
304 // Convert the raw encoding form into the form expected by the constructor for
305 // Address. An index of 4 (rsp) corresponds to having no index, so convert
306 // that to noreg for the Address constructor.
307 static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
308
309 static Address make_array(ArrayAddress);
310
311 private:
312 bool base_needs_rex() const {
313 return _base != noreg && _base->encoding() >= 8;
314 }
315
316 bool index_needs_rex() const {
317 return _index != noreg &&_index->encoding() >= 8;
318 }
319
320 relocInfo::relocType reloc() const { return _rspec.type(); }
321
322 friend class Assembler;
323 friend class MacroAssembler;
324 friend class LIR_Assembler; // base/index/scale/disp
325 };
326
327 //
328 // AddressLiteral has been split out from Address because operands of this type
329 // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
330 // the few instructions that need to deal with address literals are unique and the
331 // MacroAssembler does not have to implement every instruction in the Assembler
332 // in order to search for address literals that may need special handling depending
333 // on the instruction and the platform. As small step on the way to merging i486/amd64
334 // directories.
335 //
336 class AddressLiteral {
337 friend class ArrayAddress;
338 RelocationHolder _rspec;
339 // Typically we use AddressLiterals we want to use their rval
666
667 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
668 VexOpcode opc, InstructionAttr *attributes);
669
670 // Helper functions for groups of instructions
671 void emit_arith_b(int op1, int op2, Register dst, int imm8);
672
673 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
674 // Force generation of a 4 byte immediate value even if it fits into 8bit
675 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
676 void emit_arith(int op1, int op2, Register dst, Register src);
677
678 bool emit_compressed_disp_byte(int &disp);
679
680 void emit_operand(Register reg,
681 Register base, Register index, Address::ScaleFactor scale,
682 int disp,
683 RelocationHolder const& rspec,
684 int rip_relative_correction = 0);
685
686 void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
687
688 // operands that only take the original 32bit registers
689 void emit_operand32(Register reg, Address adr);
690
691 void emit_operand(XMMRegister reg,
692 Register base, Register index, Address::ScaleFactor scale,
693 int disp,
694 RelocationHolder const& rspec);
695
696 void emit_operand(XMMRegister reg, Address adr);
697
698 void emit_operand(MMXRegister reg, Address adr);
699
700 // workaround gcc (3.2.1-7) bug
701 void emit_operand(Address adr, MMXRegister reg);
702
703
704 // Immediate-to-memory forms
705 void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
1534 void orl(Register dst, int32_t imm32);
1535 void orl(Register dst, Address src);
1536 void orl(Register dst, Register src);
1537 void orl(Address dst, Register src);
1538
1539 void orq(Address dst, int32_t imm32);
1540 void orq(Register dst, int32_t imm32);
1541 void orq(Register dst, Address src);
1542 void orq(Register dst, Register src);
1543
1544 // Pack with unsigned saturation
1545 void packuswb(XMMRegister dst, XMMRegister src);
1546 void packuswb(XMMRegister dst, Address src);
1547 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1548
1549 // Pemutation of 64bit words
1550 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1551 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1552 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1553 void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1554
1555 void pause();
1556
1557 // Undefined Instruction
1558 void ud2();
1559
1560 // SSE4.2 string instructions
1561 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1562 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1563
1564 void pcmpeqb(XMMRegister dst, XMMRegister src);
1565 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1566 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1567 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1568 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1569
1570 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1571 void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1572
1573 void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1600 void pextrb(Address dst, XMMRegister src, int imm8);
1601 // SSE 2 extract
1602 void pextrw(Register dst, XMMRegister src, int imm8);
1603 void pextrw(Address dst, XMMRegister src, int imm8);
1604
1605 // SSE 4.1 insert
1606 void pinsrd(XMMRegister dst, Register src, int imm8);
1607 void pinsrq(XMMRegister dst, Register src, int imm8);
1608 void pinsrd(XMMRegister dst, Address src, int imm8);
1609 void pinsrq(XMMRegister dst, Address src, int imm8);
1610 void pinsrb(XMMRegister dst, Address src, int imm8);
1611 // SSE 2 insert
1612 void pinsrw(XMMRegister dst, Register src, int imm8);
1613 void pinsrw(XMMRegister dst, Address src, int imm8);
1614
1615 // SSE4.1 packed move
1616 void pmovzxbw(XMMRegister dst, XMMRegister src);
1617 void pmovzxbw(XMMRegister dst, Address src);
1618
1619 void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1620 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1621
1622 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1623 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1624
1625 #ifndef _LP64 // no 32bit push/pop on amd64
1626 void popl(Address dst);
1627 #endif
1628
1629 #ifdef _LP64
1630 void popq(Address dst);
1631 #endif
1632
1633 void popcntl(Register dst, Address src);
1634 void popcntl(Register dst, Register src);
1635
1636 void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1637
1638 #ifdef _LP64
1639 void popcntq(Register dst, Address src);
1640 void popcntq(Register dst, Register src);
1641 #endif
1642
1643 // Prefetches (SSE, SSE2, 3DNOW only)
1644
2004 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2005 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2006 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2007 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2008 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2009 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2010
2011 // Logical shift right packed integers
2012 void psrlw(XMMRegister dst, int shift);
2013 void psrld(XMMRegister dst, int shift);
2014 void psrlq(XMMRegister dst, int shift);
2015 void psrlw(XMMRegister dst, XMMRegister shift);
2016 void psrld(XMMRegister dst, XMMRegister shift);
2017 void psrlq(XMMRegister dst, XMMRegister shift);
2018 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2019 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2020 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2021 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2022 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2023 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2024
2025 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2026 void psraw(XMMRegister dst, int shift);
2027 void psrad(XMMRegister dst, int shift);
2028 void psraw(XMMRegister dst, XMMRegister shift);
2029 void psrad(XMMRegister dst, XMMRegister shift);
2030 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2031 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2032 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2033 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2034
2035 // And packed integers
2036 void pand(XMMRegister dst, XMMRegister src);
2037 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2038 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2039
2040 // Andn packed integers
2041 void pandn(XMMRegister dst, XMMRegister src);
2042
2043 // Or packed integers
2044 void por(XMMRegister dst, XMMRegister src);
2045 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2046 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2047
2048 // Xor packed integers
2049 void pxor(XMMRegister dst, XMMRegister src);
2050 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2051 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2052 void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2053 void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2054
2055
2056 // vinserti forms
2057 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2058 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2059 void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2060 void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2061 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2062
2063 // vinsertf forms
2064 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2065 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2066 void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2093 void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2094 void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
2095 void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2096 void evpbroadcastw(XMMRegister dst, Address src, int vector_len);
2097 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2098 void evpbroadcastd(XMMRegister dst, Address src, int vector_len);
2099 void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2100 void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
2101
2102 // scalar single/double precision replicate
2103 void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2104 void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
2105 void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2106 void evpbroadcastsd(XMMRegister dst, Address src, int vector_len);
2107
2108 // gpr sourced byte/word/dword/qword replicate
2109 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2110 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2111 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2112 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2113
2114 // Carry-Less Multiplication Quadword
2115 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2116 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2117 void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2118 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2119 // to avoid transaction penalty between AVX and SSE states. There is no
2120 // penalty if legacy SSE instructions are encoded using VEX prefix because
2121 // they always clear upper 128 bits. It should be used before calling
2122 // runtime code and native libraries.
2123 void vzeroupper();
2124
2125 // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
2126 void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2127 void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2128 void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2129 void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2130 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2131
2132 protected:
|
167 };
168 static ScaleFactor times(int size) {
169 assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
170 if (size == 8) return times_8;
171 if (size == 4) return times_4;
172 if (size == 2) return times_2;
173 return times_1;
174 }
175 static int scale_size(ScaleFactor scale) {
176 assert(scale != no_scale, "");
177 assert(((1 << (int)times_1) == 1 &&
178 (1 << (int)times_2) == 2 &&
179 (1 << (int)times_4) == 4 &&
180 (1 << (int)times_8) == 8), "");
181 return (1 << (int)scale);
182 }
183
184 private:
185 Register _base;
186 Register _index;
187 XMMRegister _xmmindex;
188 ScaleFactor _scale;
189 int _disp;
190 bool _isxmmindex;
191 RelocationHolder _rspec;
192
193 // Easily misused constructors make them private
194 // %%% can we make these go away?
195 NOT_LP64(Address(address loc, RelocationHolder spec);)
196 Address(int disp, address loc, relocInfo::relocType rtype);
197 Address(int disp, address loc, RelocationHolder spec);
198
199 public:
200
201 int disp() { return _disp; }
202 // creation
203 Address()
204 : _base(noreg),
205 _index(noreg),
206 _xmmindex(xnoreg),
207 _scale(no_scale),
208 _disp(0),
209 _isxmmindex(false){
210 }
211
212 // No default displacement otherwise Register can be implicitly
213 // converted to 0(Register) which is quite a different animal.
214
215 Address(Register base, int disp)
216 : _base(base),
217 _index(noreg),
218 _xmmindex(xnoreg),
219 _scale(no_scale),
220 _disp(disp),
221 _isxmmindex(false){
222 }
223
224 Address(Register base, Register index, ScaleFactor scale, int disp = 0)
225 : _base (base),
226 _index(index),
227 _xmmindex(xnoreg),
228 _scale(scale),
229 _disp (disp),
230 _isxmmindex(false) {
231 assert(!index->is_valid() == (scale == Address::no_scale),
232 "inconsistent address");
233 }
234
235 Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
236 : _base (base),
237 _index(index.register_or_noreg()),
238 _xmmindex(xnoreg),
239 _scale(scale),
240 _disp (disp + (index.constant_or_zero() * scale_size(scale))),
241 _isxmmindex(false){
242 if (!index.is_register()) scale = Address::no_scale;
243 assert(!_index->is_valid() == (scale == Address::no_scale),
244 "inconsistent address");
245 }
246
247 Address(Register base, XMMRegister index, ScaleFactor scale, int disp = 0)
248 : _base (base),
249 _index(noreg),
250 _xmmindex(index),
251 _scale(scale),
252 _disp(disp),
253 _isxmmindex(true) {
254 assert(!index->is_valid() == (scale == Address::no_scale),
255 "inconsistent address");
256 }
257
258 Address plus_disp(int disp) const {
259 Address a = (*this);
260 a._disp += disp;
261 return a;
262 }
263 Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
264 Address a = (*this);
265 a._disp += disp.constant_or_zero() * scale_size(scale);
266 if (disp.is_register()) {
267 assert(!a.index()->is_valid(), "competing indexes");
268 a._index = disp.as_register();
269 a._scale = scale;
270 }
271 return a;
272 }
273 bool is_same_address(Address a) const {
274 // disregard _rspec
275 return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
276 }
277
278 // The following two overloads are used in connection with the
279 // ByteSize type (see sizes.hpp). They simplify the use of
280 // ByteSize'd arguments in assembly code. Note that their equivalent
281 // for the optimized build are the member functions with int disp
282 // argument since ByteSize is mapped to an int type in that case.
283 //
284 // Note: DO NOT introduce similar overloaded functions for WordSize
285 // arguments as in the optimized mode, both ByteSize and WordSize
286 // are mapped to the same type and thus the compiler cannot make a
287 // distinction anymore (=> compiler errors).
288
289 #ifdef ASSERT
290 Address(Register base, ByteSize disp)
291 : _base(base),
292 _index(noreg),
293 _xmmindex(xnoreg),
294 _scale(no_scale),
295 _disp(in_bytes(disp)),
296 _isxmmindex(false){
297 }
298
299 Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
300 : _base(base),
301 _index(index),
302 _xmmindex(xnoreg),
303 _scale(scale),
304 _disp(in_bytes(disp)),
305 _isxmmindex(false){
306 assert(!index->is_valid() == (scale == Address::no_scale),
307 "inconsistent address");
308 }
309 Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
310 : _base (base),
311 _index(index.register_or_noreg()),
312 _xmmindex(xnoreg),
313 _scale(scale),
314 _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))),
315 _isxmmindex(false) {
316 if (!index.is_register()) scale = Address::no_scale;
317 assert(!_index->is_valid() == (scale == Address::no_scale),
318 "inconsistent address");
319 }
320
321 #endif // ASSERT
322
323 // accessors
324 bool uses(Register reg) const { return _base == reg || _index == reg; }
325 Register base() const { return _base; }
326 Register index() const { return _index; }
327 XMMRegister xmmindex() const { return _xmmindex; }
328 ScaleFactor scale() const { return _scale; }
329 int disp() const { return _disp; }
330 bool isxmmindex() const { return _isxmmindex; }
331
332 // Convert the raw encoding form into the form expected by the constructor for
333 // Address. An index of 4 (rsp) corresponds to having no index, so convert
334 // that to noreg for the Address constructor.
335 static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
336
337 static Address make_array(ArrayAddress);
338
339 private:
340 bool base_needs_rex() const {
341 return _base != noreg && _base->encoding() >= 8;
342 }
343
344 bool index_needs_rex() const {
345 return _index != noreg &&_index->encoding() >= 8;
346 }
347
348 bool xmmindex_needs_rex() const {
349 return _xmmindex != xnoreg && _xmmindex->encoding() >= 8;
350 }
351
352 relocInfo::relocType reloc() const { return _rspec.type(); }
353
354 friend class Assembler;
355 friend class MacroAssembler;
356 friend class LIR_Assembler; // base/index/scale/disp
357 };
358
359 //
360 // AddressLiteral has been split out from Address because operands of this type
361 // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
362 // the few instructions that need to deal with address literals are unique and the
363 // MacroAssembler does not have to implement every instruction in the Assembler
364 // in order to search for address literals that may need special handling depending
365 // on the instruction and the platform. As small step on the way to merging i486/amd64
366 // directories.
367 //
368 class AddressLiteral {
369 friend class ArrayAddress;
370 RelocationHolder _rspec;
371 // Typically we use AddressLiterals we want to use their rval
698
699 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
700 VexOpcode opc, InstructionAttr *attributes);
701
702 // Helper functions for groups of instructions
703 void emit_arith_b(int op1, int op2, Register dst, int imm8);
704
705 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
706 // Force generation of a 4 byte immediate value even if it fits into 8bit
707 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
708 void emit_arith(int op1, int op2, Register dst, Register src);
709
710 bool emit_compressed_disp_byte(int &disp);
711
712 void emit_operand(Register reg,
713 Register base, Register index, Address::ScaleFactor scale,
714 int disp,
715 RelocationHolder const& rspec,
716 int rip_relative_correction = 0);
717
718 void emit_operand(XMMRegister reg, Register base, XMMRegister index,
719 Address::ScaleFactor scale,
720 int disp, RelocationHolder const& rspec);
721
722 void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
723
724 // operands that only take the original 32bit registers
725 void emit_operand32(Register reg, Address adr);
726
727 void emit_operand(XMMRegister reg,
728 Register base, Register index, Address::ScaleFactor scale,
729 int disp,
730 RelocationHolder const& rspec);
731
732 void emit_operand(XMMRegister reg, Address adr);
733
734 void emit_operand(MMXRegister reg, Address adr);
735
736 // workaround gcc (3.2.1-7) bug
737 void emit_operand(Address adr, MMXRegister reg);
738
739
740 // Immediate-to-memory forms
741 void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
1570 void orl(Register dst, int32_t imm32);
1571 void orl(Register dst, Address src);
1572 void orl(Register dst, Register src);
1573 void orl(Address dst, Register src);
1574
1575 void orq(Address dst, int32_t imm32);
1576 void orq(Register dst, int32_t imm32);
1577 void orq(Register dst, Address src);
1578 void orq(Register dst, Register src);
1579
1580 // Pack with unsigned saturation
1581 void packuswb(XMMRegister dst, XMMRegister src);
1582 void packuswb(XMMRegister dst, Address src);
1583 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1584
1585 // Pemutation of 64bit words
1586 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1587 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1588 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1589 void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1590 void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1591
1592 void pause();
1593
1594 // Undefined Instruction
1595 void ud2();
1596
1597 // SSE4.2 string instructions
1598 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1599 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1600
1601 void pcmpeqb(XMMRegister dst, XMMRegister src);
1602 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1603 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1604 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1605 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1606
1607 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1608 void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1609
1610 void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1637 void pextrb(Address dst, XMMRegister src, int imm8);
1638 // SSE 2 extract
1639 void pextrw(Register dst, XMMRegister src, int imm8);
1640 void pextrw(Address dst, XMMRegister src, int imm8);
1641
1642 // SSE 4.1 insert
1643 void pinsrd(XMMRegister dst, Register src, int imm8);
1644 void pinsrq(XMMRegister dst, Register src, int imm8);
1645 void pinsrd(XMMRegister dst, Address src, int imm8);
1646 void pinsrq(XMMRegister dst, Address src, int imm8);
1647 void pinsrb(XMMRegister dst, Address src, int imm8);
1648 // SSE 2 insert
1649 void pinsrw(XMMRegister dst, Register src, int imm8);
1650 void pinsrw(XMMRegister dst, Address src, int imm8);
1651
1652 // SSE4.1 packed move
1653 void pmovzxbw(XMMRegister dst, XMMRegister src);
1654 void pmovzxbw(XMMRegister dst, Address src);
1655
1656 void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1657 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1658 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1659
1660 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1661 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1662
1663 void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1664
1665 void evpmovdb(Address dst, XMMRegister src, int vector_len);
1666
1667 #ifndef _LP64 // no 32bit push/pop on amd64
1668 void popl(Address dst);
1669 #endif
1670
1671 #ifdef _LP64
1672 void popq(Address dst);
1673 #endif
1674
1675 void popcntl(Register dst, Address src);
1676 void popcntl(Register dst, Register src);
1677
1678 void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1679
1680 #ifdef _LP64
1681 void popcntq(Register dst, Address src);
1682 void popcntq(Register dst, Register src);
1683 #endif
1684
1685 // Prefetches (SSE, SSE2, 3DNOW only)
1686
2046 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2047 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2048 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2049 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2050 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2051 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2052
2053 // Logical shift right packed integers
2054 void psrlw(XMMRegister dst, int shift);
2055 void psrld(XMMRegister dst, int shift);
2056 void psrlq(XMMRegister dst, int shift);
2057 void psrlw(XMMRegister dst, XMMRegister shift);
2058 void psrld(XMMRegister dst, XMMRegister shift);
2059 void psrlq(XMMRegister dst, XMMRegister shift);
2060 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2061 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2062 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2063 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2064 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2065 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2066 void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2067 void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2068
2069 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2070 void psraw(XMMRegister dst, int shift);
2071 void psrad(XMMRegister dst, int shift);
2072 void psraw(XMMRegister dst, XMMRegister shift);
2073 void psrad(XMMRegister dst, XMMRegister shift);
2074 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2075 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2076 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2077 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2078
2079 // And packed integers
2080 void pand(XMMRegister dst, XMMRegister src);
2081 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2082 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2083 void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2084
2085 // Andn packed integers
2086 void pandn(XMMRegister dst, XMMRegister src);
2087
2088 // Or packed integers
2089 void por(XMMRegister dst, XMMRegister src);
2090 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2091 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2092 void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2093
2094 // Xor packed integers
2095 void pxor(XMMRegister dst, XMMRegister src);
2096 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2097 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2098 void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2099 void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2100
2101
2102 // vinserti forms
2103 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2104 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2105 void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2106 void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2107 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2108
2109 // vinsertf forms
2110 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2111 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2112 void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2139 void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2140 void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
2141 void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2142 void evpbroadcastw(XMMRegister dst, Address src, int vector_len);
2143 void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2144 void evpbroadcastd(XMMRegister dst, Address src, int vector_len);
2145 void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2146 void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
2147
2148 // scalar single/double precision replicate
2149 void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2150 void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
2151 void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2152 void evpbroadcastsd(XMMRegister dst, Address src, int vector_len);
2153
2154 // gpr sourced byte/word/dword/qword replicate
2155 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2156 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2157 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2158 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2159
2160 void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len);
2161
2162 // Carry-Less Multiplication Quadword
2163 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2164 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2165 void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2166 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2167 // to avoid transaction penalty between AVX and SSE states. There is no
2168 // penalty if legacy SSE instructions are encoded using VEX prefix because
2169 // they always clear upper 128 bits. It should be used before calling
2170 // runtime code and native libraries.
2171 void vzeroupper();
2172
2173 // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
2174 void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2175 void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2176 void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2177 void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2178 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2179
2180 protected:
|