< prev index next >

src/cpu/x86/vm/assembler_x86.hpp

Print this page




 421 
 422 // x86 can do array addressing as a single operation since disp can be an absolute
 423 // address amd64 can't. We create a class that expresses the concept but does extra
 424 // magic on amd64 to get the final result
 425 
 426 class ArrayAddress VALUE_OBJ_CLASS_SPEC {
 427   private:
 428 
 429   AddressLiteral _base;
 430   Address        _index;
 431 
 432   public:
 433 
 434   ArrayAddress() {};
 435   ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
 436   AddressLiteral base() { return _base; }
 437   Address index() { return _index; }
 438 
 439 };
 440 


 441 // 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
 442 // See fxsave and xsave(EVEX enabled) documentation for layout
 443 const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);
 444 
 445 // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
 446 // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
 447 // is what you get. The Assembler is generating code into a CodeBuffer.
 448 
 449 class Assembler : public AbstractAssembler  {
 450   friend class AbstractAssembler; // for the non-virtual hack
 451   friend class LIR_Assembler; // as_Address()
 452   friend class StubGenerator;
 453 
 454  public:
 455   enum Condition {                     // The x86 condition codes used for conditional jumps/moves.
 456     zero          = 0x4,
 457     notZero       = 0x5,
 458     equal         = 0x4,
 459     notEqual      = 0x5,
 460     less          = 0xc,


 551     EVEX_FV   = 0,
 552     EVEX_HV   = 4,
 553     EVEX_FVM  = 6,
 554     EVEX_T1S  = 7,
 555     EVEX_T1F  = 11,
 556     EVEX_T2   = 13,
 557     EVEX_T4   = 15,
 558     EVEX_T8   = 17,
 559     EVEX_HVM  = 18,
 560     EVEX_QVM  = 19,
 561     EVEX_OVM  = 20,
 562     EVEX_M128 = 21,
 563     EVEX_DUP  = 22,
 564     EVEX_ETUP = 23
 565   };
 566 
 567   enum EvexInputSizeInBits {
 568     EVEX_8bit  = 0,
 569     EVEX_16bit = 1,
 570     EVEX_32bit = 2,
 571     EVEX_64bit = 3

 572   };
 573 
 574   enum WhichOperand {
 575     // input to locate_operand, and format code for relocations
 576     imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
 577     disp32_operand = 1,          // embedded 32-bit displacement or address
 578     call32_operand = 2,          // embedded 32-bit self-relative displacement
 579 #ifndef _LP64
 580     _WhichOperand_limit = 3
 581 #else
 582      narrow_oop_operand = 3,     // embedded 32-bit immediate narrow oop
 583     _WhichOperand_limit = 4
 584 #endif
 585   };
 586 
 587 
 588 
 589   // NOTE: The general philopsophy of the declarations here is that 64bit versions
 590   // of instructions are freely declared without the need for wrapping them an ifdef.
 591   // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
 592   // In the .cpp file the implementations are wrapped so that they are dropped out
 593   // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
 594   // to the size it was prior to merging up the 32bit and 64bit assemblers.
 595   //
 596   // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
 597   // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
 598 
 599 private:
 600 
 601   int _evex_encoding;
 602   int _input_size_in_bits;
 603   int _avx_vector_len;
 604   int _tuple_type;
 605   bool _is_evex_instruction;
 606   bool _legacy_mode_bw;
 607   bool _legacy_mode_dq;
 608   bool _legacy_mode_vl;
 609   bool _legacy_mode_vlbw;
 610   bool _instruction_uses_vl;

 611 
 612   // 64bit prefixes
 613   int prefix_and_encode(int reg_enc, bool byteinst = false);
 614   int prefixq_and_encode(int reg_enc);
 615 
 616   int prefix_and_encode(int dst_enc, int src_enc) {
 617     return prefix_and_encode(dst_enc, false, src_enc, false);
 618   }
 619   int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);
 620   int prefixq_and_encode(int dst_enc, int src_enc);
 621 
 622   void prefix(Register reg);
 623   void prefix(Register dst, Register src, Prefix p);
 624   void prefix(Register dst, Address adr, Prefix p);
 625   void prefix(Address adr);
 626   void prefixq(Address adr);
 627 
 628   void prefix(Address adr, Register reg,  bool byteinst = false);
 629   void prefix(Address adr, XMMRegister reg);
 630   void prefixq(Address adr, Register reg);
 631   void prefixq(Address adr, XMMRegister reg);
 632 
 633   void prefetch_prefix(Address src);
 634 
 635   void rex_prefix(Address adr, XMMRegister xreg,
 636                   VexSimdPrefix pre, VexOpcode opc, bool rex_w);
 637   int  rex_prefix_and_encode(int dst_enc, int src_enc,
 638                              VexSimdPrefix pre, VexOpcode opc, bool rex_w);
 639 
 640   void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
 641                   int nds_enc, VexSimdPrefix pre, VexOpcode opc,
 642                   int vector_len);
 643 
 644   void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
 645                    int nds_enc, VexSimdPrefix pre, VexOpcode opc,
 646                    bool is_extended_context, bool is_merge_context,
 647                    int vector_len, bool no_mask_reg );
 648 
 649   void vex_prefix(Address adr, int nds_enc, int xreg_enc,
 650                   VexSimdPrefix pre, VexOpcode opc,
 651                   bool vex_w, int vector_len,
 652                   bool legacy_mode = false, bool no_mask_reg = false);
 653 
 654   void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
 655                   VexSimdPrefix pre, int vector_len = AVX_128bit,
 656                   bool no_mask_reg = false, bool legacy_mode = false) {
 657     int dst_enc = dst->encoding();
 658     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
 659     vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
 660   }
 661 
 662   void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
 663                     VexSimdPrefix pre, int vector_len = AVX_128bit,
 664                     bool no_mask_reg = false) {
 665     int dst_enc = dst->encoding();
 666     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
 667     vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
 668   }
 669 
 670   void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
 671     bool vex_w = false;
 672     int vector_len = AVX_128bit;
 673     vex_prefix(src, nds->encoding(), dst->encoding(),
 674                VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
 675                vector_len, no_mask_reg);
 676   }
 677 
 678   void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
 679     bool vex_w = false;
 680     int vector_len = AVX_128bit;
 681     vex_prefix(src, nds->encoding(), dst->encoding(),
 682                VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
 683                vector_len, true, no_mask_reg);
 684   }
 685 
 686   void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
 687     bool vex_w = true;
 688     int vector_len = AVX_128bit;
 689     vex_prefix(src, nds->encoding(), dst->encoding(),
 690                VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
 691                vector_len, no_mask_reg);
 692   }
 693 
 694   void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
 695     bool vex_w = true;
 696     int vector_len = AVX_128bit;
 697     vex_prefix(src, nds->encoding(), dst->encoding(),
 698                VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
 699                vector_len, true, no_mask_reg);
 700   }
 701 
 702   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
 703                              VexSimdPrefix pre, VexOpcode opc,
 704                              bool vex_w, int vector_len,
 705                              bool legacy_mode, bool no_mask_reg);
 706 
 707   int  vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
 708     bool vex_w = false;
 709     int vector_len = AVX_128bit;
 710     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
 711                                  VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
 712                                  false, no_mask_reg);
 713   }
 714 
 715   int  vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
 716     bool vex_w = false;
 717     int vector_len = AVX_128bit;
 718     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
 719       VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
 720       true, no_mask_reg);
 721   }
 722 
 723   int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
 724     bool vex_w = true;
 725     int vector_len = AVX_128bit;
 726     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
 727                                  VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
 728                                  false, no_mask_reg);
 729   }
 730 
 731   int  vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
 732     bool vex_w = true;
 733     int vector_len = AVX_128bit;
 734     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
 735                                  VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
 736                                  true, no_mask_reg);
 737   }
 738 
 739   int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
 740                              VexSimdPrefix pre, int vector_len = AVX_128bit,
 741                              VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
 742                              bool no_mask_reg = false) {
 743     int src_enc = src->encoding();
 744     int dst_enc = dst->encoding();
 745     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
 746     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
 747   }
 748 
 749   void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
 750                    VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
 751                    bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
 752 
 753   void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
 754                    bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
 755     simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
 756   }
 757 
 758   void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
 759     simd_prefix(src, dst, pre, no_mask_reg);
 760   }
 761   void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
 762                      VexSimdPrefix pre, bool no_mask_reg = false) {
 763     bool rex_w = true;
 764     simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
 765   }
 766 
 767   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
 768                              VexSimdPrefix pre, bool no_mask_reg,
 769                              VexOpcode opc = VEX_OPCODE_0F,
 770                              bool rex_w = false, int vector_len = AVX_128bit,
 771                              bool legacy_mode = false);
 772 
 773   int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src,
 774                              VexSimdPrefix pre, bool no_mask_reg,
 775                              VexOpcode opc = VEX_OPCODE_0F,
 776                              bool rex_w = false, int vector_len = AVX_128bit);
 777 
 778   int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src,
 779                              VexSimdPrefix pre, bool no_mask_reg,
 780                              VexOpcode opc = VEX_OPCODE_0F,
 781                              bool rex_w = false, int vector_len = AVX_128bit);
 782 
 783   // Move/convert 32-bit integer value.
 784   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
 785                              VexSimdPrefix pre, bool no_mask_reg) {
 786     // It is OK to cast from Register to XMMRegister to pass argument here
 787     // since only encoding is used in simd_prefix_and_encode() and number of
 788     // Gen and Xmm registers are the same.
 789     return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F);
 790   }
 791   int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
 792     return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg);
 793   }
 794   int simd_prefix_and_encode(Register dst, XMMRegister src,
 795                              VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
 796                              bool no_mask_reg = false) {
 797     return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc);
 798   }
 799 
 800   // Move/convert 64-bit integer value.
 801   int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
 802                                VexSimdPrefix pre, bool no_mask_reg = false) {
 803     bool rex_w = true;
 804     return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
 805   }
 806   int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
 807     return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg);
 808   }
 809   int simd_prefix_and_encode_q(Register dst, XMMRegister src,
 810                                VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
 811                                bool no_mask_reg = false) {
 812     bool rex_w = true;
 813     return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w);
 814   }
 815 
 816   // Helper functions for groups of instructions
 817   void emit_arith_b(int op1, int op2, Register dst, int imm8);
 818 
 819   void emit_arith(int op1, int op2, Register dst, int32_t imm32);
 820   // Force generation of a 4 byte immediate value even if it fits into 8bit
 821   void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
 822   void emit_arith(int op1, int op2, Register dst, Register src);
 823 
 824   void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
 825   void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
 826   void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
 827   void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
 828   void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
 829   void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
 830   void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
 831   void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
 832   void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
 833                       Address src, VexSimdPrefix pre, int vector_len,
 834                       bool no_mask_reg = false, bool legacy_mode = false);
 835   void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
 836                         Address src, VexSimdPrefix pre, int vector_len,
 837                         bool no_mask_reg = false);
 838   void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
 839                       XMMRegister src, VexSimdPrefix pre, int vector_len,
 840                       bool no_mask_reg = false, bool legacy_mode = false);
 841   void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
 842                         XMMRegister src, VexSimdPrefix pre, int vector_len,
 843                         bool no_mask_reg = false);
 844 
 845   bool emit_compressed_disp_byte(int &disp);
 846 
 847   void emit_operand(Register reg,
 848                     Register base, Register index, Address::ScaleFactor scale,
 849                     int disp,
 850                     RelocationHolder const& rspec,
 851                     int rip_relative_correction = 0);
 852 
 853   void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
 854 
 855   // operands that only take the original 32bit registers
 856   void emit_operand32(Register reg, Address adr);
 857 
 858   void emit_operand(XMMRegister reg,
 859                     Register base, Register index, Address::ScaleFactor scale,
 860                     int disp,
 861                     RelocationHolder const& rspec);
 862 
 863   void emit_operand(XMMRegister reg, Address adr);
 864 


 969 
 970   // Creation
 971   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
 972     init_attributes();
 973   }
 974 
 975   // Decoding
 976   static address locate_operand(address inst, WhichOperand which);
 977   static address locate_next_instruction(address inst);
 978 
 979   // Utilities
 980   static bool is_polling_page_far() NOT_LP64({ return false;});
 981   static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 982                                          int cur_tuple_type, int in_size_in_bits, int cur_encoding);
 983 
 984   // Generic instructions
 985   // Does 32bit or 64bit as needed for the platform. In some sense these
 986   // belong in macro assembler but there is no need for both varieties to exist
 987 
 988   void init_attributes(void) {
 989     _evex_encoding = 0;
 990     _input_size_in_bits = 0;
 991     _avx_vector_len = AVX_NoVec;
 992     _tuple_type = EVEX_ETUP;
 993     _is_evex_instruction = false;
 994     _legacy_mode_bw = (VM_Version::supports_avx512bw() == false);
 995     _legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
 996     _legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
 997     _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
 998     _instruction_uses_vl = false;
 999   }
1000 



1001   void lea(Register dst, Address src);
1002 
1003   void mov(Register dst, Register src);
1004 
1005   void pusha();
1006   void popa();
1007 
1008   void pushf();
1009   void popf();
1010 
1011   void push(int32_t imm32);
1012 
1013   void push(Register src);
1014 
1015   void pop(Register dst);
1016 
1017   // These are dummies to prevent surprise implicit conversions to Register
1018   void push(void* v);
1019   void pop(void* v);
1020 


2089   void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2090 
2091   // Xor packed integers
2092   void pxor(XMMRegister dst, XMMRegister src);
2093   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2094   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2095 
2096   // Copy low 128bit into high 128bit of YMM registers.
2097   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2098   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2099   void vextractf128h(XMMRegister dst, XMMRegister src);
2100   void vextracti128h(XMMRegister dst, XMMRegister src);
2101 
2102   // Load/store high 128bit of YMM registers which does not destroy other half.
2103   void vinsertf128h(XMMRegister dst, Address src);
2104   void vinserti128h(XMMRegister dst, Address src);
2105   void vextractf128h(Address dst, XMMRegister src);
2106   void vextracti128h(Address dst, XMMRegister src);
2107 
2108   // Copy low 256bit into high 256bit of ZMM registers.
2109   void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2110   void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
2111   void vextracti64x4h(XMMRegister dst, XMMRegister src);
2112   void vextractf64x4h(XMMRegister dst, XMMRegister src);
2113   void vextractf64x4h(Address dst, XMMRegister src);
2114   void vinsertf64x4h(XMMRegister dst, Address src);
2115 
2116   // Copy targeted 128bit segments of the ZMM registers
2117   void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
2118   void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
2119   void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
2120   void vextractf32x4h(Address dst, XMMRegister src, int value);
2121   void vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
2122   void vinsertf32x4h(XMMRegister dst, Address src, int value);
2123 
2124   // duplicate 4-bytes integer data from src into 8 locations in dest
2125   void vpbroadcastd(XMMRegister dst, XMMRegister src);
2126 
2127   // duplicate 2-bytes integer data from src into 16 locations in dest
2128   void vpbroadcastw(XMMRegister dst, XMMRegister src);
2129 
2130   // duplicate n-bytes integer data from src into vector_len locations in dest
2131   void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2132   void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
2133   void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2134   void evpbroadcastw(XMMRegister dst, Address src, int vector_len);


2153 
2154   // AVX instruction which is used to clear upper 128 bits of YMM registers and
2155   // to avoid transaction penalty between AVX and SSE states. There is no
2156   // penalty if legacy SSE instructions are encoded using VEX prefix because
2157   // they always clear upper 128 bits. It should be used before calling
2158   // runtime code and native libraries.
2159   void vzeroupper();
2160 
2161   // AVX support for vectorized conditional move (double). The following two instructions used only coupled.
2162   void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2163   void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2164 
2165 
2166  protected:
2167   // Next instructions require address alignment 16 bytes SSE mode.
2168   // They should be called only from corresponding MacroAssembler instructions.
2169   void andpd(XMMRegister dst, Address src);
2170   void andps(XMMRegister dst, Address src);
2171   void xorpd(XMMRegister dst, Address src);
2172   void xorps(XMMRegister dst, Address src);



























































































2173 
2174 };
2175 
2176 #endif // CPU_X86_VM_ASSEMBLER_X86_HPP


 421 
 422 // x86 can do array addressing as a single operation since disp can be an absolute
 423 // address amd64 can't. We create a class that expresses the concept but does extra
 424 // magic on amd64 to get the final result
 425 
 426 class ArrayAddress VALUE_OBJ_CLASS_SPEC {
 427   private:
 428 
 429   AddressLiteral _base;
 430   Address        _index;
 431 
 432   public:
 433 
 434   ArrayAddress() {};
 435   ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
 436   AddressLiteral base() { return _base; }
 437   Address index() { return _index; }
 438 
 439 };
 440 
 441 class InstructionAttr;
 442 
 443 // 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
 444 // See fxsave and xsave(EVEX enabled) documentation for layout
 445 const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);
 446 
 447 // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
 448 // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
 449 // is what you get. The Assembler is generating code into a CodeBuffer.
 450 
 451 class Assembler : public AbstractAssembler  {
 452   friend class AbstractAssembler; // for the non-virtual hack
 453   friend class LIR_Assembler; // as_Address()
 454   friend class StubGenerator;
 455 
 456  public:
 457   enum Condition {                     // The x86 condition codes used for conditional jumps/moves.
 458     zero          = 0x4,
 459     notZero       = 0x5,
 460     equal         = 0x4,
 461     notEqual      = 0x5,
 462     less          = 0xc,


 553     EVEX_FV   = 0,
 554     EVEX_HV   = 4,
 555     EVEX_FVM  = 6,
 556     EVEX_T1S  = 7,
 557     EVEX_T1F  = 11,
 558     EVEX_T2   = 13,
 559     EVEX_T4   = 15,
 560     EVEX_T8   = 17,
 561     EVEX_HVM  = 18,
 562     EVEX_QVM  = 19,
 563     EVEX_OVM  = 20,
 564     EVEX_M128 = 21,
 565     EVEX_DUP  = 22,
 566     EVEX_ETUP = 23
 567   };
 568 
 569   enum EvexInputSizeInBits {
 570     EVEX_8bit  = 0,
 571     EVEX_16bit = 1,
 572     EVEX_32bit = 2,
 573     EVEX_64bit = 3,
 574     EVEX_NObit = 4
 575   };
 576 
 577   enum WhichOperand {
 578     // input to locate_operand, and format code for relocations
 579     imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
 580     disp32_operand = 1,          // embedded 32-bit displacement or address
 581     call32_operand = 2,          // embedded 32-bit self-relative displacement
 582 #ifndef _LP64
 583     _WhichOperand_limit = 3
 584 #else
 585      narrow_oop_operand = 3,     // embedded 32-bit immediate narrow oop
 586     _WhichOperand_limit = 4
 587 #endif
 588   };
 589 
 590 
 591 
 592   // NOTE: The general philopsophy of the declarations here is that 64bit versions
 593   // of instructions are freely declared without the need for wrapping them an ifdef.
 594   // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
 595   // In the .cpp file the implementations are wrapped so that they are dropped out
 596   // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
 597   // to the size it was prior to merging up the 32bit and 64bit assemblers.
 598   //
 599   // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
 600   // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
 601 
 602 private:
 603 





 604   bool _legacy_mode_bw;
 605   bool _legacy_mode_dq;
 606   bool _legacy_mode_vl;
 607   bool _legacy_mode_vlbw;
 608 
 609   class InstructionAttr *_attributes;
 610 
 611   // 64bit prefixes
 612   int prefix_and_encode(int reg_enc, bool byteinst = false);
 613   int prefixq_and_encode(int reg_enc);
 614 
 615   int prefix_and_encode(int dst_enc, int src_enc) {
 616     return prefix_and_encode(dst_enc, false, src_enc, false);
 617   }
 618   int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);
 619   int prefixq_and_encode(int dst_enc, int src_enc);
 620 
 621   void prefix(Register reg);
 622   void prefix(Register dst, Register src, Prefix p);
 623   void prefix(Register dst, Address adr, Prefix p);
 624   void prefix(Address adr);
 625   void prefixq(Address adr);
 626 
 627   void prefix(Address adr, Register reg,  bool byteinst = false);
 628   void prefix(Address adr, XMMRegister reg);
 629   void prefixq(Address adr, Register reg);
 630   void prefixq(Address adr, XMMRegister reg);
 631 
 632   void prefetch_prefix(Address src);
 633 
 634   void rex_prefix(Address adr, XMMRegister xreg,
 635                   VexSimdPrefix pre, VexOpcode opc, bool rex_w);
 636   int  rex_prefix_and_encode(int dst_enc, int src_enc,
 637                              VexSimdPrefix pre, VexOpcode opc, bool rex_w);
 638 
 639   void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
 640 
 641   void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
 642                    int nds_enc, VexSimdPrefix pre, VexOpcode opc);




 643 
 644   void vex_prefix(Address adr, int nds_enc, int xreg_enc,
 645                   VexSimdPrefix pre, VexOpcode opc,
 646                   InstructionAttr *attributes);

















































 647 
 648   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
 649                              VexSimdPrefix pre, VexOpcode opc,
 650                              InstructionAttr *attributes);

































 651 
 652   void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
 653                    VexOpcode opc, InstructionAttr *attributes);







 654 
 655   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
 656                              VexOpcode opc, InstructionAttr *attributes);






 657 
 658   int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
 659                              VexOpcode opc, InstructionAttr *attributes);







































 660 
 661   int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
 662                              VexOpcode opc, InstructionAttr *attributes);













 663 
 664   // Helper functions for groups of instructions
 665   void emit_arith_b(int op1, int op2, Register dst, int imm8);
 666 
 667   void emit_arith(int op1, int op2, Register dst, int32_t imm32);
 668   // Force generation of a 4 byte immediate value even if it fits into 8bit
 669   void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
 670   void emit_arith(int op1, int op2, Register dst, Register src);
 671 





















 672   bool emit_compressed_disp_byte(int &disp);
 673 
 674   void emit_operand(Register reg,
 675                     Register base, Register index, Address::ScaleFactor scale,
 676                     int disp,
 677                     RelocationHolder const& rspec,
 678                     int rip_relative_correction = 0);
 679 
 680   void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
 681 
 682   // operands that only take the original 32bit registers
 683   void emit_operand32(Register reg, Address adr);
 684 
 685   void emit_operand(XMMRegister reg,
 686                     Register base, Register index, Address::ScaleFactor scale,
 687                     int disp,
 688                     RelocationHolder const& rspec);
 689 
 690   void emit_operand(XMMRegister reg, Address adr);
 691 


 796 
 797   // Creation
 798   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
 799     init_attributes();
 800   }
 801 
 802   // Decoding
 803   static address locate_operand(address inst, WhichOperand which);
 804   static address locate_next_instruction(address inst);
 805 
 806   // Utilities
 807   static bool is_polling_page_far() NOT_LP64({ return false;});
 808   static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 809                                          int cur_tuple_type, int in_size_in_bits, int cur_encoding);
 810 
 811   // Generic instructions
 812   // Does 32bit or 64bit as needed for the platform. In some sense these
 813   // belong in macro assembler but there is no need for both varieties to exist
 814 
 815   void init_attributes(void) {





 816     _legacy_mode_bw = (VM_Version::supports_avx512bw() == false);
 817     _legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
 818     _legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
 819     _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
 820     _attributes = NULL;
 821   }
 822 
 823   void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
 824   void clear_attributes(void) { _attributes = NULL; }
 825 
 826   void lea(Register dst, Address src);
 827 
 828   void mov(Register dst, Register src);
 829 
 830   void pusha();
 831   void popa();
 832 
 833   void pushf();
 834   void popf();
 835 
 836   void push(int32_t imm32);
 837 
 838   void push(Register src);
 839 
 840   void pop(Register dst);
 841 
 842   // These are dummies to prevent surprise implicit conversions to Register
 843   void push(void* v);
 844   void pop(void* v);
 845 


1914   void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1915 
1916   // Xor packed integers
1917   void pxor(XMMRegister dst, XMMRegister src);
1918   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1919   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1920 
1921   // Copy low 128bit into high 128bit of YMM registers.
1922   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1923   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1924   void vextractf128h(XMMRegister dst, XMMRegister src);
1925   void vextracti128h(XMMRegister dst, XMMRegister src);
1926 
1927   // Load/store high 128bit of YMM registers which does not destroy other half.
1928   void vinsertf128h(XMMRegister dst, Address src);
1929   void vinserti128h(XMMRegister dst, Address src);
1930   void vextractf128h(Address dst, XMMRegister src);
1931   void vextracti128h(Address dst, XMMRegister src);
1932 
1933   // Copy low 256bit into high 256bit of ZMM registers.
1934   void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
1935   void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
1936   void vextracti64x4h(XMMRegister dst, XMMRegister src, int value);
1937   void vextractf64x4h(XMMRegister dst, XMMRegister src, int value);
1938   void vextractf64x4h(Address dst, XMMRegister src, int value);
1939   void vinsertf64x4h(XMMRegister dst, Address src, int value);
1940 
1941   // Copy targeted 128bit segments of the ZMM registers
1942   void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
1943   void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
1944   void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
1945   void vextractf32x4h(Address dst, XMMRegister src, int value);
1946   void vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
1947   void vinsertf32x4h(XMMRegister dst, Address src, int value);
1948 
1949   // duplicate 4-bytes integer data from src into 8 locations in dest
1950   void vpbroadcastd(XMMRegister dst, XMMRegister src);
1951 
1952   // duplicate 2-bytes integer data from src into 16 locations in dest
1953   void vpbroadcastw(XMMRegister dst, XMMRegister src);
1954 
1955   // duplicate n-bytes integer data from src into vector_len locations in dest
1956   void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
1957   void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
1958   void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
1959   void evpbroadcastw(XMMRegister dst, Address src, int vector_len);


1978 
1979   // AVX instruction which is used to clear upper 128 bits of YMM registers and
1980   // to avoid transaction penalty between AVX and SSE states. There is no
1981   // penalty if legacy SSE instructions are encoded using VEX prefix because
1982   // they always clear upper 128 bits. It should be used before calling
1983   // runtime code and native libraries.
1984   void vzeroupper();
1985 
1986   // AVX support for vectorized conditional move (double). The following two instructions used only coupled.
1987   void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
1988   void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
1989 
1990 
1991  protected:
1992   // Next instructions require address alignment 16 bytes SSE mode.
1993   // They should be called only from corresponding MacroAssembler instructions.
1994   void andpd(XMMRegister dst, Address src);
1995   void andps(XMMRegister dst, Address src);
1996   void xorpd(XMMRegister dst, Address src);
1997   void xorps(XMMRegister dst, Address src);
1998 
1999 };
2000 
2001 // The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2002 // Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2003 // are applied.
2004 class InstructionAttr {
2005 public:
2006   InstructionAttr(
2007     int vector_len,
2008     bool rex_vex_w,
2009     bool legacy_mode,
2010     bool no_reg_mask,
2011     bool uses_vl)
2012     :
2013       _avx_vector_len(vector_len),
2014       _rex_vex_w(rex_vex_w),
2015       _legacy_mode(legacy_mode),
2016       _no_reg_mask(no_reg_mask),
2017       _uses_vl(uses_vl),
2018       _tuple_type(Assembler::EVEX_ETUP),
2019       _input_size_in_bits(Assembler::EVEX_NObit),
2020       _is_evex_instruction(false),
2021       _evex_encoding(0),
2022       _is_clear_context(false),
2023       _is_extended_context(false),
2024       _current_assembler(NULL) {
2025     if (UseAVX < 3) _legacy_mode = true;
2026   }
2027 
2028   ~InstructionAttr() {
2029     if (_current_assembler != NULL) {
2030       _current_assembler->clear_attributes();
2031     }
2032     _current_assembler = NULL;
2033   }
2034 
2035 private:
2036   int  _avx_vector_len;
2037   bool _rex_vex_w;
2038   bool _legacy_mode;
2039   bool _no_reg_mask;
2040   bool _uses_vl;
2041   int  _tuple_type;
2042   int  _input_size_in_bits;
2043   bool _is_evex_instruction;
2044   int  _evex_encoding;
2045   bool _is_clear_context;
2046   bool _is_extended_context;
2047 
2048   Assembler *_current_assembler;
2049 
2050 public:
2051   // query functions for field accessors
2052   int  get_vector_len(void) const { return _avx_vector_len; }
2053   bool is_rex_vex_w(void) const { return _rex_vex_w; }
2054   bool is_legacy_mode(void) const { return _legacy_mode; }
2055   bool is_no_reg_mask(void) const { return _no_reg_mask; }
2056   bool uses_vl(void) const { return _uses_vl; }
2057   int  get_tuple_type(void) const { return _tuple_type; }
2058   int  get_input_size(void) const { return _input_size_in_bits; }
2059   int  is_evex_instruction(void) const { return _is_evex_instruction; }
2060   int  get_evex_encoding(void) const { return _evex_encoding; }
2061   bool is_clear_context(void) const { return _is_clear_context; }
2062   bool is_extended_context(void) const { return _is_extended_context; }
2063 
2064   // Set the vector len manually
2065   void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
2066 
2067   // Set the instruction to be encoded in AVX mode
2068   void set_is_legacy_mode(void) { _legacy_mode = true; }
2069 
2070   // Set the current instuction to be encoded as an EVEX instuction
2071   void set_is_evex_instruction(void) { _is_evex_instruction = true; }
2072 
2073   // Internal encoding data used in compressed immediate offset programming
2074   void set_evex_encoding(int value) { _evex_encoding = value; }
2075 
2076   // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
2077   void set_is_clear_context(void) { _is_clear_context = true; }
2078 
2079   // Map back to current asembler so that we can manage object level assocation
2080   void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
2081 
2082   // Address modifiers used for compressed displacement calculation
2083   void set_address_attributes(int tuple_type, int input_size_in_bits) {
2084     if (VM_Version::supports_evex()) {
2085       _tuple_type = tuple_type;
2086       _input_size_in_bits = input_size_in_bits;
2087     }
2088   }
2089 
2090 };
2091 
2092 #endif // CPU_X86_VM_ASSEMBLER_X86_HPP
< prev index next >