570 EVEX_8bit = 0,
571 EVEX_16bit = 1,
572 EVEX_32bit = 2,
573 EVEX_64bit = 3,
574 EVEX_NObit = 4
575 };
576
577 enum WhichOperand {
578 // input to locate_operand, and format code for relocations
579 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
580 disp32_operand = 1, // embedded 32-bit displacement or address
581 call32_operand = 2, // embedded 32-bit self-relative displacement
582 #ifndef _LP64
583 _WhichOperand_limit = 3
584 #else
585 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
586 _WhichOperand_limit = 4
587 #endif
588 };
589
590
591
592 // NOTE: The general philopsophy of the declarations here is that 64bit versions
593 // of instructions are freely declared without the need for wrapping them an ifdef.
594 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
595 // In the .cpp file the implementations are wrapped so that they are dropped out
596 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
597 // to the size it was prior to merging up the 32bit and 64bit assemblers.
598 //
599 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
600 // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
601
602 private:
603
604 bool _legacy_mode_bw;
605 bool _legacy_mode_dq;
606 bool _legacy_mode_vl;
607 bool _legacy_mode_vlbw;
608 bool _is_managed;
609 bool _vector_masking; // For stub code use only
813 _legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
814 _legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
815 _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
816 _is_managed = false;
817 _vector_masking = false;
818 _attributes = NULL;
819 }
820
821 void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
822 void clear_attributes(void) { _attributes = NULL; }
823
824 void set_managed(void) { _is_managed = true; }
825 void clear_managed(void) { _is_managed = false; }
826 bool is_managed(void) { return _is_managed; }
827
828 // Following functions are for stub code use only
829 void set_vector_masking(void) { _vector_masking = true; }
830 void clear_vector_masking(void) { _vector_masking = false; }
831 bool is_vector_masking(void) { return _vector_masking; }
832
833
834 void lea(Register dst, Address src);
835
836 void mov(Register dst, Register src);
837
838 void pusha();
839 void popa();
840
841 void pushf();
842 void popf();
843
844 void push(int32_t imm32);
845
846 void push(Register src);
847
848 void pop(Register dst);
849
850 // These are dummies to prevent surprise implicit conversions to Register
851 void push(void* v);
852 void pop(void* v);
853
1345 void kmovbl(KRegister dst, Register src);
1346 void kmovbl(Register dst, KRegister src);
1347 void kmovwl(KRegister dst, Register src);
1348 void kmovwl(KRegister dst, Address src);
1349 void kmovwl(Register dst, KRegister src);
1350 void kmovdl(KRegister dst, Register src);
1351 void kmovdl(Register dst, KRegister src);
1352 void kmovql(KRegister dst, KRegister src);
1353 void kmovql(Address dst, KRegister src);
1354 void kmovql(KRegister dst, Address src);
1355 void kmovql(KRegister dst, Register src);
1356 void kmovql(Register dst, KRegister src);
1357
1358 void knotwl(KRegister dst, KRegister src);
1359
1360 void kortestbl(KRegister dst, KRegister src);
1361 void kortestwl(KRegister dst, KRegister src);
1362 void kortestdl(KRegister dst, KRegister src);
1363 void kortestql(KRegister dst, KRegister src);
1364
1365 void ktestql(KRegister dst, KRegister src);
1366
1367 void movdl(XMMRegister dst, Register src);
1368 void movdl(Register dst, XMMRegister src);
1369 void movdl(XMMRegister dst, Address src);
1370 void movdl(Address dst, XMMRegister src);
1371
1372 // Move Double Quadword
1373 void movdq(XMMRegister dst, Register src);
1374 void movdq(Register dst, XMMRegister src);
1375
1376 // Move Aligned Double Quadword
1377 void movdqa(XMMRegister dst, XMMRegister src);
1378 void movdqa(XMMRegister dst, Address src);
1379
1380 // Move Unaligned Double Quadword
1381 void movdqu(Address dst, XMMRegister src);
1382 void movdqu(XMMRegister dst, Address src);
1383 void movdqu(XMMRegister dst, XMMRegister src);
1384
1385 // Move Unaligned 256bit Vector
1386 void vmovdqu(Address dst, XMMRegister src);
1387 void vmovdqu(XMMRegister dst, Address src);
1388 void vmovdqu(XMMRegister dst, XMMRegister src);
1389
1390 // Move Unaligned 512bit Vector
1391 void evmovdqub(Address dst, XMMRegister src, int vector_len);
1392 void evmovdqub(XMMRegister dst, Address src, int vector_len);
1393 void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
1394 void evmovdqub(KRegister mask, XMMRegister dst, Address src, int vector_len);
1395 void evmovdquw(Address dst, XMMRegister src, int vector_len);
1396 void evmovdquw(XMMRegister dst, Address src, int vector_len);
1397 void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len);
1398 void evmovdqul(Address dst, XMMRegister src, int vector_len);
1399 void evmovdqul(XMMRegister dst, Address src, int vector_len);
1400 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
1401 void evmovdquq(Address dst, XMMRegister src, int vector_len);
1402 void evmovdquq(XMMRegister dst, Address src, int vector_len);
1403 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
1404
1405 // Move lower 64bit to high 64bit in 128bit register
1406 void movlhps(XMMRegister dst, XMMRegister src);
1407
1408 void movl(Register dst, int32_t imm32);
1409 void movl(Address dst, int32_t imm32);
1410 void movl(Register dst, Register src);
1411 void movl(Register dst, Address src);
1412 void movl(Address dst, Register src);
1413
1414 // These dummies prevent using movl from converting a zero (like NULL) into Register
1415 // by giving the compiler two choices it can't resolve
1416
1417 void movl(Address dst, void* junk);
1528 // Pack with unsigned saturation
1529 void packuswb(XMMRegister dst, XMMRegister src);
1530 void packuswb(XMMRegister dst, Address src);
1531 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1532
1533 // Pemutation of 64bit words
1534 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1535 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1536 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1537
1538 void pause();
1539
1540 // SSE4.2 string instructions
1541 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1542 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1543
1544 void pcmpeqb(XMMRegister dst, XMMRegister src);
1545 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1546 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1547 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1548 void evpcmpeqb(KRegister mask, KRegister kdst, XMMRegister nds, Address src, int vector_len);
1549
1550 void pcmpeqw(XMMRegister dst, XMMRegister src);
1551 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1552 void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1553 void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1554
1555 void pcmpeqd(XMMRegister dst, XMMRegister src);
1556 void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1557 void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1558 void evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1559
1560 void pcmpeqq(XMMRegister dst, XMMRegister src);
1561 void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1562 void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1563 void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1564
1565 void pmovmskb(Register dst, XMMRegister src);
1566 void vpmovmskb(Register dst, XMMRegister src);
1567
1568 // SSE 4.1 extract
1572 void pextrq(Address dst, XMMRegister src, int imm8);
1573 void pextrb(Address dst, XMMRegister src, int imm8);
1574 // SSE 2 extract
1575 void pextrw(Register dst, XMMRegister src, int imm8);
1576 void pextrw(Address dst, XMMRegister src, int imm8);
1577
1578 // SSE 4.1 insert
1579 void pinsrd(XMMRegister dst, Register src, int imm8);
1580 void pinsrq(XMMRegister dst, Register src, int imm8);
1581 void pinsrd(XMMRegister dst, Address src, int imm8);
1582 void pinsrq(XMMRegister dst, Address src, int imm8);
1583 void pinsrb(XMMRegister dst, Address src, int imm8);
1584 // SSE 2 insert
1585 void pinsrw(XMMRegister dst, Register src, int imm8);
1586 void pinsrw(XMMRegister dst, Address src, int imm8);
1587
1588 // SSE4.1 packed move
1589 void pmovzxbw(XMMRegister dst, XMMRegister src);
1590 void pmovzxbw(XMMRegister dst, Address src);
1591
1592 void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
1593
1594 #ifndef _LP64 // no 32bit push/pop on amd64
1595 void popl(Address dst);
1596 #endif
1597
1598 #ifdef _LP64
1599 void popq(Address dst);
1600 #endif
1601
1602 void popcntl(Register dst, Address src);
1603 void popcntl(Register dst, Register src);
1604
1605 #ifdef _LP64
1606 void popcntq(Register dst, Address src);
1607 void popcntq(Register dst, Register src);
1608 #endif
1609
1610 // Prefetches (SSE, SSE2, 3DNOW only)
1611
1612 void prefetchnta(Address src);
1822
1823 // AVX 3-operands scalar instructions (encoded with VEX prefix)
1824
1825 void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
1826 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1827 void vaddss(XMMRegister dst, XMMRegister nds, Address src);
1828 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1829 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
1830 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1831 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
1832 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1833 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
1834 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1835 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
1836 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1837 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
1838 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1839 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
1840 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1841
1842
1843 //====================VECTOR ARITHMETIC=====================================
1844
1845 // Add Packed Floating-Point Values
1846 void addpd(XMMRegister dst, XMMRegister src);
1847 void addpd(XMMRegister dst, Address src);
1848 void addps(XMMRegister dst, XMMRegister src);
1849 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1850 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1851 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1852 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1853
1854 // Subtract Packed Floating-Point Values
1855 void subpd(XMMRegister dst, XMMRegister src);
1856 void subps(XMMRegister dst, XMMRegister src);
1857 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1858 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1859 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1860 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1861
2055 // gpr sourced byte/word/dword/qword replicate
2056 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2057 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2058 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2059 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2060
2061 // Carry-Less Multiplication Quadword
2062 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2063 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2064
2065 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2066 // to avoid transaction penalty between AVX and SSE states. There is no
2067 // penalty if legacy SSE instructions are encoded using VEX prefix because
2068 // they always clear upper 128 bits. It should be used before calling
2069 // runtime code and native libraries.
2070 void vzeroupper();
2071
2072 // AVX support for vectorized conditional move (double). The following two instructions used only coupled.
2073 void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2074 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2075
2076 void shlxl(Register dst, Register src1, Register src2);
2077 void shlxq(Register dst, Register src1, Register src2);
2078
2079 protected:
2080 // Next instructions require address alignment 16 bytes SSE mode.
2081 // They should be called only from corresponding MacroAssembler instructions.
2082 void andpd(XMMRegister dst, Address src);
2083 void andps(XMMRegister dst, Address src);
2084 void xorpd(XMMRegister dst, Address src);
2085 void xorps(XMMRegister dst, Address src);
2086
2087 };
2088
2089 // The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2090 // Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2091 // are applied.
2092 class InstructionAttr {
2093 public:
2094 InstructionAttr(
2095 int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
2096 bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2097 bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
|
570 EVEX_8bit = 0,
571 EVEX_16bit = 1,
572 EVEX_32bit = 2,
573 EVEX_64bit = 3,
574 EVEX_NObit = 4
575 };
576
577 enum WhichOperand {
578 // input to locate_operand, and format code for relocations
579 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
580 disp32_operand = 1, // embedded 32-bit displacement or address
581 call32_operand = 2, // embedded 32-bit self-relative displacement
582 #ifndef _LP64
583 _WhichOperand_limit = 3
584 #else
585 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
586 _WhichOperand_limit = 4
587 #endif
588 };
589
590 enum ComparisonPredicate {
591 eq = 0,
592 lt = 1,
593 le = 2,
594 _false = 3,
595 neq = 4,
596 nlt = 5,
597 nle = 6,
598 _true = 7
599 };
600
601
602 // NOTE: The general philopsophy of the declarations here is that 64bit versions
603 // of instructions are freely declared without the need for wrapping them an ifdef.
604 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
605 // In the .cpp file the implementations are wrapped so that they are dropped out
606 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
607 // to the size it was prior to merging up the 32bit and 64bit assemblers.
608 //
609 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
610 // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
611
612 private:
613
614 bool _legacy_mode_bw;
615 bool _legacy_mode_dq;
616 bool _legacy_mode_vl;
617 bool _legacy_mode_vlbw;
618 bool _is_managed;
619 bool _vector_masking; // For stub code use only
823 _legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
824 _legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
825 _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
826 _is_managed = false;
827 _vector_masking = false;
828 _attributes = NULL;
829 }
830
831 void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
832 void clear_attributes(void) { _attributes = NULL; }
833
834 void set_managed(void) { _is_managed = true; }
835 void clear_managed(void) { _is_managed = false; }
836 bool is_managed(void) { return _is_managed; }
837
838 // Following functions are for stub code use only
839 void set_vector_masking(void) { _vector_masking = true; }
840 void clear_vector_masking(void) { _vector_masking = false; }
841 bool is_vector_masking(void) { return _vector_masking; }
842
843 void lea(Register dst, Address src);
844
845 void mov(Register dst, Register src);
846
847 void pusha();
848 void popa();
849
850 void pushf();
851 void popf();
852
853 void push(int32_t imm32);
854
855 void push(Register src);
856
857 void pop(Register dst);
858
859 // These are dummies to prevent surprise implicit conversions to Register
860 void push(void* v);
861 void pop(void* v);
862
1354 void kmovbl(KRegister dst, Register src);
1355 void kmovbl(Register dst, KRegister src);
1356 void kmovwl(KRegister dst, Register src);
1357 void kmovwl(KRegister dst, Address src);
1358 void kmovwl(Register dst, KRegister src);
1359 void kmovdl(KRegister dst, Register src);
1360 void kmovdl(Register dst, KRegister src);
1361 void kmovql(KRegister dst, KRegister src);
1362 void kmovql(Address dst, KRegister src);
1363 void kmovql(KRegister dst, Address src);
1364 void kmovql(KRegister dst, Register src);
1365 void kmovql(Register dst, KRegister src);
1366
1367 void knotwl(KRegister dst, KRegister src);
1368
1369 void kortestbl(KRegister dst, KRegister src);
1370 void kortestwl(KRegister dst, KRegister src);
1371 void kortestdl(KRegister dst, KRegister src);
1372 void kortestql(KRegister dst, KRegister src);
1373
1374 void ktestq(KRegister src1, KRegister src2);
1375 void ktestd(KRegister src1, KRegister src2);
1376
1377 void ktestql(KRegister dst, KRegister src);
1378
1379 void movdl(XMMRegister dst, Register src);
1380 void movdl(Register dst, XMMRegister src);
1381 void movdl(XMMRegister dst, Address src);
1382 void movdl(Address dst, XMMRegister src);
1383
1384 // Move Double Quadword
1385 void movdq(XMMRegister dst, Register src);
1386 void movdq(Register dst, XMMRegister src);
1387
1388 // Move Aligned Double Quadword
1389 void movdqa(XMMRegister dst, XMMRegister src);
1390 void movdqa(XMMRegister dst, Address src);
1391
1392 // Move Unaligned Double Quadword
1393 void movdqu(Address dst, XMMRegister src);
1394 void movdqu(XMMRegister dst, Address src);
1395 void movdqu(XMMRegister dst, XMMRegister src);
1396
1397 // Move Unaligned 256bit Vector
1398 void vmovdqu(Address dst, XMMRegister src);
1399 void vmovdqu(XMMRegister dst, Address src);
1400 void vmovdqu(XMMRegister dst, XMMRegister src);
1401
1402 // Move Unaligned 512bit Vector
1403 void evmovdqub(Address dst, XMMRegister src, int vector_len);
1404 void evmovdqub(XMMRegister dst, Address src, int vector_len);
1405 void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
1406 void evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len);
1407 void evmovdquw(Address dst, XMMRegister src, int vector_len);
1408 void evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len);
1409 void evmovdquw(XMMRegister dst, Address src, int vector_len);
1410 void evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1411 void evmovdqul(Address dst, XMMRegister src, int vector_len);
1412 void evmovdqul(XMMRegister dst, Address src, int vector_len);
1413 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
1414 void evmovdquq(Address dst, XMMRegister src, int vector_len);
1415 void evmovdquq(XMMRegister dst, Address src, int vector_len);
1416 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
1417
1418 // Move lower 64bit to high 64bit in 128bit register
1419 void movlhps(XMMRegister dst, XMMRegister src);
1420
1421 void movl(Register dst, int32_t imm32);
1422 void movl(Address dst, int32_t imm32);
1423 void movl(Register dst, Register src);
1424 void movl(Register dst, Address src);
1425 void movl(Address dst, Register src);
1426
1427 // These dummies prevent using movl from converting a zero (like NULL) into Register
1428 // by giving the compiler two choices it can't resolve
1429
1430 void movl(Address dst, void* junk);
1541 // Pack with unsigned saturation
1542 void packuswb(XMMRegister dst, XMMRegister src);
1543 void packuswb(XMMRegister dst, Address src);
1544 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1545
1546 // Pemutation of 64bit words
1547 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1548 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1549 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1550
1551 void pause();
1552
1553 // SSE4.2 string instructions
1554 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1555 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1556
1557 void pcmpeqb(XMMRegister dst, XMMRegister src);
1558 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1559 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1560 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1561 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1562
1563 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1564 void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1565
1566 void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1567 void evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate of, int vector_len);
1568 void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);
1569
1570 void pcmpeqw(XMMRegister dst, XMMRegister src);
1571 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1572 void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1573 void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1574
1575 void pcmpeqd(XMMRegister dst, XMMRegister src);
1576 void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1577 void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1578 void evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1579
1580 void pcmpeqq(XMMRegister dst, XMMRegister src);
1581 void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1582 void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1583 void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1584
1585 void pmovmskb(Register dst, XMMRegister src);
1586 void vpmovmskb(Register dst, XMMRegister src);
1587
1588 // SSE 4.1 extract
1592 void pextrq(Address dst, XMMRegister src, int imm8);
1593 void pextrb(Address dst, XMMRegister src, int imm8);
1594 // SSE 2 extract
1595 void pextrw(Register dst, XMMRegister src, int imm8);
1596 void pextrw(Address dst, XMMRegister src, int imm8);
1597
1598 // SSE 4.1 insert
1599 void pinsrd(XMMRegister dst, Register src, int imm8);
1600 void pinsrq(XMMRegister dst, Register src, int imm8);
1601 void pinsrd(XMMRegister dst, Address src, int imm8);
1602 void pinsrq(XMMRegister dst, Address src, int imm8);
1603 void pinsrb(XMMRegister dst, Address src, int imm8);
1604 // SSE 2 insert
1605 void pinsrw(XMMRegister dst, Register src, int imm8);
1606 void pinsrw(XMMRegister dst, Address src, int imm8);
1607
1608 // SSE4.1 packed move
1609 void pmovzxbw(XMMRegister dst, XMMRegister src);
1610 void pmovzxbw(XMMRegister dst, Address src);
1611
1612 void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1613 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1614
1615 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1616 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1617
1618 #ifndef _LP64 // no 32bit push/pop on amd64
1619 void popl(Address dst);
1620 #endif
1621
1622 #ifdef _LP64
1623 void popq(Address dst);
1624 #endif
1625
1626 void popcntl(Register dst, Address src);
1627 void popcntl(Register dst, Register src);
1628
1629 #ifdef _LP64
1630 void popcntq(Register dst, Address src);
1631 void popcntq(Register dst, Register src);
1632 #endif
1633
1634 // Prefetches (SSE, SSE2, 3DNOW only)
1635
1636 void prefetchnta(Address src);
1846
1847 // AVX 3-operands scalar instructions (encoded with VEX prefix)
1848
1849 void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
1850 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1851 void vaddss(XMMRegister dst, XMMRegister nds, Address src);
1852 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1853 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
1854 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1855 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
1856 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1857 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
1858 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1859 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
1860 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1861 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
1862 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1863 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
1864 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1865
1866 void shlxl(Register dst, Register src1, Register src2);
1867 void shlxq(Register dst, Register src1, Register src2);
1868
1869 //====================VECTOR ARITHMETIC=====================================
1870
1871 // Add Packed Floating-Point Values
1872 void addpd(XMMRegister dst, XMMRegister src);
1873 void addpd(XMMRegister dst, Address src);
1874 void addps(XMMRegister dst, XMMRegister src);
1875 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1876 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1877 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1878 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1879
1880 // Subtract Packed Floating-Point Values
1881 void subpd(XMMRegister dst, XMMRegister src);
1882 void subps(XMMRegister dst, XMMRegister src);
1883 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1884 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1885 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1886 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1887
2081 // gpr sourced byte/word/dword/qword replicate
2082 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2083 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2084 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2085 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2086
2087 // Carry-Less Multiplication Quadword
2088 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2089 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2090
2091 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2092 // to avoid transaction penalty between AVX and SSE states. There is no
2093 // penalty if legacy SSE instructions are encoded using VEX prefix because
2094 // they always clear upper 128 bits. It should be used before calling
2095 // runtime code and native libraries.
2096 void vzeroupper();
2097
2098 // AVX support for vectorized conditional move (double). The following two instructions used only coupled.
2099 void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2100 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2101
2102 protected:
2103 // Next instructions require address alignment 16 bytes SSE mode.
2104 // They should be called only from corresponding MacroAssembler instructions.
2105 void andpd(XMMRegister dst, Address src);
2106 void andps(XMMRegister dst, Address src);
2107 void xorpd(XMMRegister dst, Address src);
2108 void xorps(XMMRegister dst, Address src);
2109
2110 };
2111
2112 // The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2113 // Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2114 // are applied.
2115 class InstructionAttr {
2116 public:
2117 InstructionAttr(
2118 int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
2119 bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2120 bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
|