600 VexSimdPrefix pre, VexOpcode opc,
601 bool vex_w, bool vector256);
602
603 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
604 VexSimdPrefix pre, bool vector256 = false,
605 VexOpcode opc = VEX_OPCODE_0F) {
606 int src_enc = src->encoding();
607 int dst_enc = dst->encoding();
608 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
609 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);
610 }
611
612 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
613 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
614 bool rex_w = false, bool vector256 = false);
615
616 void simd_prefix(XMMRegister dst, Address src,
617 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
618 simd_prefix(dst, xnoreg, src, pre, opc);
619 }
620 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
621 simd_prefix(src, dst, pre);
622 }
623 void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
624 VexSimdPrefix pre) {
625 bool rex_w = true;
626 simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
627 }
628
629
630 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
631 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
632 bool rex_w = false, bool vector256 = false);
633
634 int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
635 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
636 return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
637 }
638
639 // Move/convert 32-bit integer value.
640 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
641 VexSimdPrefix pre) {
642 // It is OK to cast from Register to XMMRegister to pass argument here
643 // since only encoding is used in simd_prefix_and_encode() and number of
644 // Gen and Xmm registers are the same.
645 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
646 }
647 int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
648 return simd_prefix_and_encode(dst, xnoreg, src, pre);
649 }
650 int simd_prefix_and_encode(Register dst, XMMRegister src,
651 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
652 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
653 }
654
655 // Move/convert 64-bit integer value.
656 int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
657 VexSimdPrefix pre) {
658 bool rex_w = true;
660 }
661 int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
662 return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
663 }
664 int simd_prefix_and_encode_q(Register dst, XMMRegister src,
665 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
666 bool rex_w = true;
667 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
668 }
669
670 // Helper functions for groups of instructions
671 void emit_arith_b(int op1, int op2, Register dst, int imm8);
672
673 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
674 // Force generation of a 4 byte immediate value even if it fits into 8bit
675 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
676 // only 32bit??
677 void emit_arith(int op1, int op2, Register dst, jobject obj);
678 void emit_arith(int op1, int op2, Register dst, Register src);
679
680 void emit_operand(Register reg,
681 Register base, Register index, Address::ScaleFactor scale,
682 int disp,
683 RelocationHolder const& rspec,
684 int rip_relative_correction = 0);
685
686 void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
687
688 // operands that only take the original 32bit registers
689 void emit_operand32(Register reg, Address adr);
690
691 void emit_operand(XMMRegister reg,
692 Register base, Register index, Address::ScaleFactor scale,
693 int disp,
694 RelocationHolder const& rspec);
695
696 void emit_operand(XMMRegister reg, Address adr);
697
698 void emit_operand(MMXRegister reg, Address adr);
699
874 void addr_nop_8();
875
876 // Add Scalar Double-Precision Floating-Point Values
877 void addsd(XMMRegister dst, Address src);
878 void addsd(XMMRegister dst, XMMRegister src);
879
880 // Add Scalar Single-Precision Floating-Point Values
881 void addss(XMMRegister dst, Address src);
882 void addss(XMMRegister dst, XMMRegister src);
883
884 void andl(Address dst, int32_t imm32);
885 void andl(Register dst, int32_t imm32);
886 void andl(Register dst, Address src);
887 void andl(Register dst, Register src);
888
889 void andq(Address dst, int32_t imm32);
890 void andq(Register dst, int32_t imm32);
891 void andq(Register dst, Address src);
892 void andq(Register dst, Register src);
893
894 // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
895 void andpd(XMMRegister dst, XMMRegister src);
896
897 // Bitwise Logical AND of Packed Single-Precision Floating-Point Values
898 void andps(XMMRegister dst, XMMRegister src);
899
900 void bsfl(Register dst, Register src);
901 void bsrl(Register dst, Register src);
902
903 #ifdef _LP64
904 void bsfq(Register dst, Register src);
905 void bsrq(Register dst, Register src);
906 #endif
907
908 void bswapl(Register reg);
909
910 void bswapq(Register reg);
911
912 void call(Label& L, relocInfo::relocType rtype);
913 void call(Register reg); // push pc; pc <- reg
914 void call(Address adr); // push pc; pc <- adr
915
916 void cdql();
917
918 void cdqq();
919
1419 void popq(Address dst);
1420 #endif
1421
1422 void popcntl(Register dst, Address src);
1423 void popcntl(Register dst, Register src);
1424
1425 #ifdef _LP64
1426 void popcntq(Register dst, Address src);
1427 void popcntq(Register dst, Register src);
1428 #endif
1429
1430 // Prefetches (SSE, SSE2, 3DNOW only)
1431
1432 void prefetchnta(Address src);
1433 void prefetchr(Address src);
1434 void prefetcht0(Address src);
1435 void prefetcht1(Address src);
1436 void prefetcht2(Address src);
1437 void prefetchw(Address src);
1438
1439 // POR - Bitwise logical OR
1440 void por(XMMRegister dst, XMMRegister src);
1441 void por(XMMRegister dst, Address src);
1442
1443 // Shuffle Packed Doublewords
1444 void pshufd(XMMRegister dst, XMMRegister src, int mode);
1445 void pshufd(XMMRegister dst, Address src, int mode);
1446
1447 // Shuffle Packed Low Words
1448 void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1449 void pshuflw(XMMRegister dst, Address src, int mode);
1450
1451 // Shift Right by bits Logical Quadword Immediate
1452 void psrlq(XMMRegister dst, int shift);
1453
1454 // Shift Right by bytes Logical DoubleQuadword Immediate
1455 void psrldq(XMMRegister dst, int shift);
1456
1457 // Logical Compare Double Quadword
1458 void ptest(XMMRegister dst, XMMRegister src);
1459 void ptest(XMMRegister dst, Address src);
1460
1461 // Interleave Low Bytes
1462 void punpcklbw(XMMRegister dst, XMMRegister src);
1463 void punpcklbw(XMMRegister dst, Address src);
1464
1465 // Interleave Low Doublewords
1466 void punpckldq(XMMRegister dst, XMMRegister src);
1467 void punpckldq(XMMRegister dst, Address src);
1468
1469 // Interleave Low Quadwords
1470 void punpcklqdq(XMMRegister dst, XMMRegister src);
1471
1472 #ifndef _LP64 // no 32bit push/pop on amd64
1473 void pushl(Address src);
1474 #endif
1475
1476 void pushq(Address src);
1477
1478 // Xor Packed Byte Integer Values
1479 void pxor(XMMRegister dst, Address src);
1480 void pxor(XMMRegister dst, XMMRegister src);
1481
1482 void rcll(Register dst, int imm8);
1483
1484 void rclq(Register dst, int imm8);
1485
1486 void ret(int imm16);
1487
1488 void sahf();
1489
1490 void sarl(Register dst, int imm8);
1491 void sarl(Register dst);
1492
1493 void sarq(Register dst, int imm8);
1494 void sarq(Register dst);
1495
1496 void sbbl(Address dst, int32_t imm32);
1497 void sbbl(Register dst, int32_t imm32);
1498 void sbbl(Register dst, Address src);
1499 void sbbl(Register dst, Register src);
1500
1501 void sbbq(Address dst, int32_t imm32);
1584 void xchgl(Register reg, Address adr);
1585 void xchgl(Register dst, Register src);
1586
1587 void xchgq(Register reg, Address adr);
1588 void xchgq(Register dst, Register src);
1589
1590 // Get Value of Extended Control Register
1591 void xgetbv() {
1592 emit_byte(0x0F);
1593 emit_byte(0x01);
1594 emit_byte(0xD0);
1595 }
1596
1597 void xorl(Register dst, int32_t imm32);
1598 void xorl(Register dst, Address src);
1599 void xorl(Register dst, Register src);
1600
1601 void xorq(Register dst, Address src);
1602 void xorq(Register dst, Register src);
1603
1604 // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
1605 void xorpd(XMMRegister dst, XMMRegister src);
1606
1607 // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
1608 void xorps(XMMRegister dst, XMMRegister src);
1609
1610 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
1611
1612 // AVX 3-operands scalar instructions (encoded with VEX prefix)
1613 void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
1614 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1615 void vaddss(XMMRegister dst, XMMRegister nds, Address src);
1616 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1617 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
1618 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1619 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
1620 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1621 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
1622 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1623 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
1624 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1625 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
1626 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1627 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
1628 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1629
1630 // AVX Vector instrucitons.
1631 void vandpd(XMMRegister dst, XMMRegister nds, Address src);
1632 void vandps(XMMRegister dst, XMMRegister nds, Address src);
1633 void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
1634 void vxorps(XMMRegister dst, XMMRegister nds, Address src);
1635 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1636 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1637 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1638 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1639 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1640
1641 // AVX instruction which is used to clear upper 128 bits of YMM registers and
1642 // to avoid transaction penalty between AVX and SSE states. There is no
1643 // penalty if legacy SSE instructions are encoded using VEX prefix because
1644 // they always clear upper 128 bits. It should be used before calling
1645 // runtime code and native libraries.
1646 void vzeroupper();
1647
1648 protected:
1649 // Next instructions require address alignment 16 bytes SSE mode.
1650 // They should be called only from corresponding MacroAssembler instructions.
1651 void andpd(XMMRegister dst, Address src);
1652 void andps(XMMRegister dst, Address src);
1653 void xorpd(XMMRegister dst, Address src);
1654 void xorps(XMMRegister dst, Address src);
1655
1656 };
1657
2511 // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
2512 void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
2513 void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
2514 void xorpd(XMMRegister dst, AddressLiteral src);
2515
2516 // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
2517 void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
2518 void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
2519 void xorps(XMMRegister dst, AddressLiteral src);
2520
2521 // AVX 3-operands instructions
2522
2523 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
2524 void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); }
2525 void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2526
2527 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
2528 void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); }
2529 void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2530
2531 void vandpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandpd(dst, nds, src); }
2532 void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2533
2534 void vandps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandps(dst, nds, src); }
2535 void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2536
2537 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
2538 void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
2539 void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2540
2541 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
2542 void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); }
2543 void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2544
2545 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
2546 void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); }
2547 void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2548
2549 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
2550 void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); }
2551 void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2552
2553 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
2554 void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); }
2555 void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2556
2557 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
2558 void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); }
2559 void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2560
2561 // AVX Vector instructions
2562
2563 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
2564 void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
2565 void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2566
2567 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
2568 void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
2569 void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2570
2571 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2572 if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
2573 Assembler::vpxor(dst, nds, src, vector256);
2574 else
2575 Assembler::vxorpd(dst, nds, src, vector256);
2576 }
2577
2578 // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
2579 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2580 if (UseAVX > 1) // vinserti128h is available only in AVX2
2581 Assembler::vinserti128h(dst, nds, src);
2582 else
2583 Assembler::vinsertf128h(dst, nds, src);
2584 }
2585
2586 // Data
2587
2588 void cmov32( Condition cc, Register dst, Address src);
2589 void cmov32( Condition cc, Register dst, Register src);
2590
2591 void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); }
2592
2593 void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
|
600 VexSimdPrefix pre, VexOpcode opc,
601 bool vex_w, bool vector256);
602
603 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
604 VexSimdPrefix pre, bool vector256 = false,
605 VexOpcode opc = VEX_OPCODE_0F) {
606 int src_enc = src->encoding();
607 int dst_enc = dst->encoding();
608 int nds_enc = nds->is_valid() ? nds->encoding() : 0;
609 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);
610 }
611
612 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
613 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
614 bool rex_w = false, bool vector256 = false);
615
616 void simd_prefix(XMMRegister dst, Address src,
617 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
618 simd_prefix(dst, xnoreg, src, pre, opc);
619 }
620
621 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
622 simd_prefix(src, dst, pre);
623 }
624 void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
625 VexSimdPrefix pre) {
626 bool rex_w = true;
627 simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
628 }
629
630 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
631 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
632 bool rex_w = false, bool vector256 = false);
633
634 // Move/convert 32-bit integer value.
635 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
636 VexSimdPrefix pre) {
637 // It is OK to cast from Register to XMMRegister to pass argument here
638 // since only encoding is used in simd_prefix_and_encode() and number of
639 // Gen and Xmm registers are the same.
640 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
641 }
642 int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
643 return simd_prefix_and_encode(dst, xnoreg, src, pre);
644 }
645 int simd_prefix_and_encode(Register dst, XMMRegister src,
646 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
647 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
648 }
649
650 // Move/convert 64-bit integer value.
651 int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
652 VexSimdPrefix pre) {
653 bool rex_w = true;
655 }
656 int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
657 return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
658 }
659 int simd_prefix_and_encode_q(Register dst, XMMRegister src,
660 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
661 bool rex_w = true;
662 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
663 }
664
665 // Helper functions for groups of instructions
666 void emit_arith_b(int op1, int op2, Register dst, int imm8);
667
668 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
669 // Force generation of a 4 byte immediate value even if it fits into 8bit
670 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
671 // only 32bit??
672 void emit_arith(int op1, int op2, Register dst, jobject obj);
673 void emit_arith(int op1, int op2, Register dst, Register src);
674
675 void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
676 void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
677 void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
678 void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
679 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
680 Address src, VexSimdPrefix pre, bool vector256);
681 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
682 XMMRegister src, VexSimdPrefix pre, bool vector256);
683
684 void emit_operand(Register reg,
685 Register base, Register index, Address::ScaleFactor scale,
686 int disp,
687 RelocationHolder const& rspec,
688 int rip_relative_correction = 0);
689
690 void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
691
692 // operands that only take the original 32bit registers
693 void emit_operand32(Register reg, Address adr);
694
695 void emit_operand(XMMRegister reg,
696 Register base, Register index, Address::ScaleFactor scale,
697 int disp,
698 RelocationHolder const& rspec);
699
700 void emit_operand(XMMRegister reg, Address adr);
701
702 void emit_operand(MMXRegister reg, Address adr);
703
878 void addr_nop_8();
879
880 // Add Scalar Double-Precision Floating-Point Values
881 void addsd(XMMRegister dst, Address src);
882 void addsd(XMMRegister dst, XMMRegister src);
883
884 // Add Scalar Single-Precision Floating-Point Values
885 void addss(XMMRegister dst, Address src);
886 void addss(XMMRegister dst, XMMRegister src);
887
888 void andl(Address dst, int32_t imm32);
889 void andl(Register dst, int32_t imm32);
890 void andl(Register dst, Address src);
891 void andl(Register dst, Register src);
892
893 void andq(Address dst, int32_t imm32);
894 void andq(Register dst, int32_t imm32);
895 void andq(Register dst, Address src);
896 void andq(Register dst, Register src);
897
898 void bsfl(Register dst, Register src);
899 void bsrl(Register dst, Register src);
900
901 #ifdef _LP64
902 void bsfq(Register dst, Register src);
903 void bsrq(Register dst, Register src);
904 #endif
905
906 void bswapl(Register reg);
907
908 void bswapq(Register reg);
909
910 void call(Label& L, relocInfo::relocType rtype);
911 void call(Register reg); // push pc; pc <- reg
912 void call(Address adr); // push pc; pc <- adr
913
914 void cdql();
915
916 void cdqq();
917
1417 void popq(Address dst);
1418 #endif
1419
1420 void popcntl(Register dst, Address src);
1421 void popcntl(Register dst, Register src);
1422
1423 #ifdef _LP64
1424 void popcntq(Register dst, Address src);
1425 void popcntq(Register dst, Register src);
1426 #endif
1427
1428 // Prefetches (SSE, SSE2, 3DNOW only)
1429
1430 void prefetchnta(Address src);
1431 void prefetchr(Address src);
1432 void prefetcht0(Address src);
1433 void prefetcht1(Address src);
1434 void prefetcht2(Address src);
1435 void prefetchw(Address src);
1436
1437 // Shuffle Packed Doublewords
1438 void pshufd(XMMRegister dst, XMMRegister src, int mode);
1439 void pshufd(XMMRegister dst, Address src, int mode);
1440
1441 // Shuffle Packed Low Words
1442 void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1443 void pshuflw(XMMRegister dst, Address src, int mode);
1444
1445 // Shift Right by bytes Logical DoubleQuadword Immediate
1446 void psrldq(XMMRegister dst, int shift);
1447
1448 // Logical Compare Double Quadword
1449 void ptest(XMMRegister dst, XMMRegister src);
1450 void ptest(XMMRegister dst, Address src);
1451
1452 // Interleave Low Bytes
1453 void punpcklbw(XMMRegister dst, XMMRegister src);
1454 void punpcklbw(XMMRegister dst, Address src);
1455
1456 // Interleave Low Doublewords
1457 void punpckldq(XMMRegister dst, XMMRegister src);
1458 void punpckldq(XMMRegister dst, Address src);
1459
1460 // Interleave Low Quadwords
1461 void punpcklqdq(XMMRegister dst, XMMRegister src);
1462
1463 #ifndef _LP64 // no 32bit push/pop on amd64
1464 void pushl(Address src);
1465 #endif
1466
1467 void pushq(Address src);
1468
1469 void rcll(Register dst, int imm8);
1470
1471 void rclq(Register dst, int imm8);
1472
1473 void ret(int imm16);
1474
1475 void sahf();
1476
1477 void sarl(Register dst, int imm8);
1478 void sarl(Register dst);
1479
1480 void sarq(Register dst, int imm8);
1481 void sarq(Register dst);
1482
1483 void sbbl(Address dst, int32_t imm32);
1484 void sbbl(Register dst, int32_t imm32);
1485 void sbbl(Register dst, Address src);
1486 void sbbl(Register dst, Register src);
1487
1488 void sbbq(Address dst, int32_t imm32);
1571 void xchgl(Register reg, Address adr);
1572 void xchgl(Register dst, Register src);
1573
1574 void xchgq(Register reg, Address adr);
1575 void xchgq(Register dst, Register src);
1576
1577 // Get Value of Extended Control Register
1578 void xgetbv() {
1579 emit_byte(0x0F);
1580 emit_byte(0x01);
1581 emit_byte(0xD0);
1582 }
1583
1584 void xorl(Register dst, int32_t imm32);
1585 void xorl(Register dst, Address src);
1586 void xorl(Register dst, Register src);
1587
1588 void xorq(Register dst, Address src);
1589 void xorq(Register dst, Register src);
1590
1591 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
1592
1593 // AVX 3-operands scalar instructions (encoded with VEX prefix)
1594
1595 void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
1596 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1597 void vaddss(XMMRegister dst, XMMRegister nds, Address src);
1598 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1599 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
1600 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1601 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
1602 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1603 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
1604 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1605 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
1606 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1607 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
1608 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1609 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
1610 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1611
1612
1613 //====================VECTOR ARITHMETIC=====================================
1614
1615 // Add Packed Floating-Point Values
1616 void addpd(XMMRegister dst, XMMRegister src);
1617 void addps(XMMRegister dst, XMMRegister src);
1618 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1619 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1620 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1621 void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1622
1623 // Subtract Packed Floating-Point Values
1624 void subpd(XMMRegister dst, XMMRegister src);
1625 void subps(XMMRegister dst, XMMRegister src);
1626 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1627 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1628 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1629 void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1630
1631 // Multiply Packed Floating-Point Values
1632 void mulpd(XMMRegister dst, XMMRegister src);
1633 void mulps(XMMRegister dst, XMMRegister src);
1634 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1635 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1636 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1637 void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1638
1639 // Divide Packed Floating-Point Values
1640 void divpd(XMMRegister dst, XMMRegister src);
1641 void divps(XMMRegister dst, XMMRegister src);
1642 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1643 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1644 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1645 void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1646
1647 // Bitwise Logical AND of Packed Floating-Point Values
1648 void andpd(XMMRegister dst, XMMRegister src);
1649 void andps(XMMRegister dst, XMMRegister src);
1650 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1651 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1652 void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1653 void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1654
1655 // Bitwise Logical XOR of Packed Floating-Point Values
1656 void xorpd(XMMRegister dst, XMMRegister src);
1657 void xorps(XMMRegister dst, XMMRegister src);
1658 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1659 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1660 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1661 void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1662
1663 // Add packed integers
1664 void paddb(XMMRegister dst, XMMRegister src);
1665 void paddw(XMMRegister dst, XMMRegister src);
1666 void paddd(XMMRegister dst, XMMRegister src);
1667 void paddq(XMMRegister dst, XMMRegister src);
1668 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1669 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1670 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1671 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1672 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1673 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1674 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1675 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1676
1677 // Sub packed integers
1678 void psubb(XMMRegister dst, XMMRegister src);
1679 void psubw(XMMRegister dst, XMMRegister src);
1680 void psubd(XMMRegister dst, XMMRegister src);
1681 void psubq(XMMRegister dst, XMMRegister src);
1682 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1683 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1684 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1685 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1686 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1687 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1688 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1689 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1690
1691 // Multiply packed integers (only shorts and ints)
1692 void pmullw(XMMRegister dst, XMMRegister src);
1693 void pmulld(XMMRegister dst, XMMRegister src);
1694 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1695 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1696 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1697 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1698
1699 // Shift left packed integers
1700 void psllw(XMMRegister dst, int shift);
1701 void pslld(XMMRegister dst, int shift);
1702 void psllq(XMMRegister dst, int shift);
1703 void psllw(XMMRegister dst, XMMRegister shift);
1704 void pslld(XMMRegister dst, XMMRegister shift);
1705 void psllq(XMMRegister dst, XMMRegister shift);
1706 void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1707 void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1708 void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1709 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1710 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1711 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1712
1713 // Logical shift right packed integers
1714 void psrlw(XMMRegister dst, int shift);
1715 void psrld(XMMRegister dst, int shift);
1716 void psrlq(XMMRegister dst, int shift);
1717 void psrlw(XMMRegister dst, XMMRegister shift);
1718 void psrld(XMMRegister dst, XMMRegister shift);
1719 void psrlq(XMMRegister dst, XMMRegister shift);
1720 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1721 void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1722 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1723 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1724 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1725 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1726
1727 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
1728 void psraw(XMMRegister dst, int shift);
1729 void psrad(XMMRegister dst, int shift);
1730 void psraw(XMMRegister dst, XMMRegister shift);
1731 void psrad(XMMRegister dst, XMMRegister shift);
1732 void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1733 void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1734 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1735 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1736
1737 // And packed integers
1738 void pand(XMMRegister dst, XMMRegister src);
1739 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1740 void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1741
1742 // Or packed integers
1743 void por(XMMRegister dst, XMMRegister src);
1744 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1745 void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1746
1747 // Xor packed integers
1748 void pxor(XMMRegister dst, XMMRegister src);
1749 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1750 void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1751
1752 // Copy low 128bit into high 128bit of YMM registers.
1753 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1754 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1755
1756 // AVX instruction which is used to clear upper 128 bits of YMM registers and
1757 // to avoid transaction penalty between AVX and SSE states. There is no
1758 // penalty if legacy SSE instructions are encoded using VEX prefix because
1759 // they always clear upper 128 bits. It should be used before calling
1760 // runtime code and native libraries.
1761 void vzeroupper();
1762
1763 protected:
1764 // Next instructions require address alignment 16 bytes SSE mode.
1765 // They should be called only from corresponding MacroAssembler instructions.
1766 void andpd(XMMRegister dst, Address src);
1767 void andps(XMMRegister dst, Address src);
1768 void xorpd(XMMRegister dst, Address src);
1769 void xorps(XMMRegister dst, Address src);
1770
1771 };
1772
2626 // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
2627 void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); }
2628 void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
2629 void xorpd(XMMRegister dst, AddressLiteral src);
2630
2631 // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
2632 void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); }
2633 void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
2634 void xorps(XMMRegister dst, AddressLiteral src);
2635
2636 // AVX 3-operands instructions
2637
2638 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
2639 void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); }
2640 void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2641
2642 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
2643 void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); }
2644 void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2645
2646 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
2647 void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
2648 void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
2649
2650 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
2651 void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
2652 void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
2653
2654 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
2655 void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
2656 void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2657
2658 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
2659 void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); }
2660 void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2661
2662 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
2663 void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); }
2664 void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2665
2666 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
2667 void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); }
2668 void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2669
2670 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
2671 void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); }
2672 void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2673
2674 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
2675 void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); }
2676 void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
2677
2678 // AVX Vector instructions
2679
2680 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
2681 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
2682 void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
2683
2684 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
2685 void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
2686 void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
2687
2688 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2689 if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
2690 Assembler::vpxor(dst, nds, src, vector256);
2691 else
2692 Assembler::vxorpd(dst, nds, src, vector256);
2693 }
2694 void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
2695 if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
2696 Assembler::vpxor(dst, nds, src, vector256);
2697 else
2698 Assembler::vxorpd(dst, nds, src, vector256);
2699 }
2700
2701 // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
2702 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2703 if (UseAVX > 1) // vinserti128h is available only in AVX2
2704 Assembler::vinserti128h(dst, nds, src);
2705 else
2706 Assembler::vinsertf128h(dst, nds, src);
2707 }
2708
2709 // Data
2710
2711 void cmov32( Condition cc, Register dst, Address src);
2712 void cmov32( Condition cc, Register dst, Register src);
2713
2714 void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); }
2715
2716 void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); }
|