2710 __ aesdec(xmm_result, as_XMMRegister(rnum)); 2711 } 2712 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0 2713 aes_dec_key(xmm_result, xmm_temp, key, key_offset); 2714 } 2715 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 2716 __ aesdeclast(xmm_result, xmm_temp); 2717 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); 2718 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2719 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2720 // no need to store r to memory until we exit 2721 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr 2722 __ addptr(pos, AESBlockSize); 2723 __ subptr(len_reg, AESBlockSize); 2724 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); 2725 __ jmp(L_exit); 2726 2727 return start; 2728 } 2729 2730 /** 2731 * Arguments: 2732 * 2733 * Inputs: 2734 * rsp(4) - int crc 2735 * rsp(8) - byte* buf 2736 * rsp(12) - int length 2737 * 2738 * Ouput: 2739 * rax - int crc result 2740 */ 2741 address generate_updateBytesCRC32() { 2742 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); 2743 2744 __ align(CodeEntryAlignment); 2745 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); 2746 2747 address start = __ pc(); 2748 2749 const Register crc = rdx; // crc 3007 3008 //------------------------------------------------------------------------------------------------------------------------ 3009 // entry points that are platform specific 3010 3011 // support for verify_oop (must happen after universe_init) 3012 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); 3013 3014 // arraycopy stubs used by compilers 3015 generate_arraycopy_stubs(); 3016 3017 generate_math_stubs(); 3018 3019 // don't bother generating these AES intrinsic stubs unless global flag is set 3020 if (UseAESIntrinsics) { 3021 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others 3022 3023 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); 3024 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); 3025 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); 3026 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); 3027 } 3028 3029 // Safefetch stubs. 3030 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, 3031 &StubRoutines::_safefetch32_fault_pc, 3032 &StubRoutines::_safefetch32_continuation_pc); 3033 StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry; 3034 StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc; 3035 StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc; 3036 } 3037 3038 3039 public: 3040 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 3041 if (all) { 3042 generate_all(); 3043 } else { 3044 generate_initial(); 3045 } 3046 } | 2710 __ aesdec(xmm_result, as_XMMRegister(rnum)); 2711 } 2712 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0 2713 aes_dec_key(xmm_result, xmm_temp, key, key_offset); 2714 } 2715 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 2716 __ aesdeclast(xmm_result, xmm_temp); 2717 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); 2718 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2719 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2720 // no need to store r to memory until we exit 2721 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr 2722 __ addptr(pos, AESBlockSize); 2723 __ subptr(len_reg, AESBlockSize); 2724 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); 2725 __ jmp(L_exit); 2726 2727 return start; 2728 } 2729 2730 // byte swap x86 long 2731 address generate_ghash_long_swap_mask() { 2732 __ align(CodeEntryAlignment); 2733 StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); 2734 address start = __ pc(); 2735 __ emit_data(0x0b0a0908, relocInfo::none, 0); 2736 __ emit_data(0x0f0e0d0c, relocInfo::none, 0); 2737 __ emit_data(0x03020100, relocInfo::none, 0); 2738 __ emit_data(0x07060504, relocInfo::none, 0); 2739 2740 return start; 2741 } 2742 2743 // byte swap x86 byte array 2744 address generate_ghash_byte_swap_mask() { 2745 __ align(CodeEntryAlignment); 2746 StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); 2747 address start = __ pc(); 2748 __ emit_data(0x0c0d0e0f, relocInfo::none, 0); 2749 __ emit_data(0x08090a0b, relocInfo::none, 0); 2750 __ emit_data(0x04050607, relocInfo::none, 0); 2751 __ emit_data(0x00010203, relocInfo::none, 0); 2752 return start; 2753 } 2754 2755 /* Single and multi-block ghash operations */ 2756 address generate_ghash_processBlocks() { 2757 assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support"); 2758 __ align(CodeEntryAlignment); 2759 Label L_ghash_loop, L_exit; 2760 StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); 2761 address start = __ pc(); 2762 2763 const Register state = rdi; 2764 const Register subkeyH = rsi; 2765 const Register data = rdx; 2766 const Register blocks = rcx; 2767 2768 const Address state_param(rbp, 8+0); 2769 const Address subkeyH_param(rbp, 8+4); 2770 const Address data_param(rbp, 8+8); 2771 const Address blocks_param(rbp, 8+12); 2772 2773 const XMMRegister xmm_temp0 = xmm0; 2774 const XMMRegister xmm_temp1 = xmm1; 2775 const XMMRegister xmm_temp2 = xmm2; 2776 const XMMRegister xmm_temp3 = xmm3; 2777 const XMMRegister xmm_temp4 = xmm4; 2778 const XMMRegister xmm_temp5 = xmm5; 2779 const XMMRegister xmm_temp6 = xmm6; 2780 const XMMRegister xmm_temp7 = xmm7; 2781 2782 __ enter(); 2783 2784 __ movptr(state, state_param); 2785 __ movptr(subkeyH, subkeyH_param); 2786 __ movptr(data, data_param); 2787 __ movptr(blocks, blocks_param); 2788 2789 __ movdqu(xmm_temp0, Address(state, 0)); 2790 __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); 2791 2792 __ movdqu(xmm_temp1, Address(subkeyH, 0)); 2793 __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); 2794 2795 __ BIND(L_ghash_loop); 2796 __ movdqu(xmm_temp2, Address(data, 0)); 2797 __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); 2798 2799 __ pxor(xmm_temp0, xmm_temp2); 2800 2801 // 2802 // Multiply with the hash key 2803 // 2804 __ movdqu(xmm_temp3, xmm_temp0); 2805 __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 2806 __ movdqu(xmm_temp4, xmm_temp0); 2807 __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 2808 2809 __ movdqu(xmm_temp5, xmm_temp0); 2810 __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 2811 __ movdqu(xmm_temp6, xmm_temp0); 2812 __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 2813 2814 __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 2815 2816 __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 2817 __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right 2818 __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left 2819 __ pxor(xmm_temp3, xmm_temp5); 2820 __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result 2821 // of the carry-less multiplication of 2822 // xmm0 by xmm1. 2823 2824 // We shift the result of the multiplication by one bit position 2825 // to the left to cope for the fact that the bits are reversed. 2826 __ movdqu(xmm_temp7, xmm_temp3); 2827 __ movdqu(xmm_temp4, xmm_temp6); 2828 __ pslld (xmm_temp3, 1); 2829 __ pslld(xmm_temp6, 1); 2830 __ psrld(xmm_temp7, 31); 2831 __ psrld(xmm_temp4, 31); 2832 __ movdqu(xmm_temp5, xmm_temp7); 2833 __ pslldq(xmm_temp4, 4); 2834 __ pslldq(xmm_temp7, 4); 2835 __ psrldq(xmm_temp5, 12); 2836 __ por(xmm_temp3, xmm_temp7); 2837 __ por(xmm_temp6, xmm_temp4); 2838 __ por(xmm_temp6, xmm_temp5); 2839 2840 // 2841 // First phase of the reduction 2842 // 2843 // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts 2844 // independently. 2845 __ movdqu(xmm_temp7, xmm_temp3); 2846 __ movdqu(xmm_temp4, xmm_temp3); 2847 __ movdqu(xmm_temp5, xmm_temp3); 2848 __ pslld(xmm_temp7, 31); // packed right shift shifting << 31 2849 __ pslld(xmm_temp4, 30); // packed right shift shifting << 30 2850 __ pslld(xmm_temp5, 25); // packed right shift shifting << 25 2851 __ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions 2852 __ pxor(xmm_temp7, xmm_temp5); 2853 __ movdqu(xmm_temp4, xmm_temp7); 2854 __ pslldq(xmm_temp7, 12); 2855 __ psrldq(xmm_temp4, 4); 2856 __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete 2857 2858 // 2859 // Second phase of the reduction 2860 // 2861 // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these 2862 // shift operations. 2863 __ movdqu(xmm_temp2, xmm_temp3); 2864 __ movdqu(xmm_temp7, xmm_temp3); 2865 __ movdqu(xmm_temp5, xmm_temp3); 2866 __ psrld(xmm_temp2, 1); // packed left shifting >> 1 2867 __ psrld(xmm_temp7, 2); // packed left shifting >> 2 2868 __ psrld(xmm_temp5, 7); // packed left shifting >> 7 2869 __ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions 2870 __ pxor(xmm_temp2, xmm_temp5); 2871 __ pxor(xmm_temp2, xmm_temp4); 2872 __ pxor(xmm_temp3, xmm_temp2); 2873 __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 2874 2875 __ decrement(blocks); 2876 __ jcc(Assembler::zero, L_exit); 2877 __ movdqu(xmm_temp0, xmm_temp6); 2878 __ addptr(data, 16); 2879 __ jmp(L_ghash_loop); 2880 2881 __ BIND(L_exit); 2882 // Byte swap 16-byte result 2883 __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); 2884 __ movdqu(Address(state, 0), xmm_temp6); // store the result 2885 2886 __ leave(); 2887 __ ret(0); 2888 return start; 2889 } 2890 2891 /** 2892 * Arguments: 2893 * 2894 * Inputs: 2895 * rsp(4) - int crc 2896 * rsp(8) - byte* buf 2897 * rsp(12) - int length 2898 * 2899 * Ouput: 2900 * rax - int crc result 2901 */ 2902 address generate_updateBytesCRC32() { 2903 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); 2904 2905 __ align(CodeEntryAlignment); 2906 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); 2907 2908 address start = __ pc(); 2909 2910 const Register crc = rdx; // crc 3168 3169 //------------------------------------------------------------------------------------------------------------------------ 3170 // entry points that are platform specific 3171 3172 // support for verify_oop (must happen after universe_init) 3173 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); 3174 3175 // arraycopy stubs used by compilers 3176 generate_arraycopy_stubs(); 3177 3178 generate_math_stubs(); 3179 3180 // don't bother generating these AES intrinsic stubs unless global flag is set 3181 if (UseAESIntrinsics) { 3182 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others 3183 3184 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); 3185 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); 3186 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); 3187 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); 3188 } 3189 3190 // Generate GHASH intrinsics code 3191 if (UseGHASHIntrinsics) { 3192 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); 3193 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); 3194 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); 3195 } 3196 3197 // Safefetch stubs. 3198 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, 3199 &StubRoutines::_safefetch32_fault_pc, 3200 &StubRoutines::_safefetch32_continuation_pc); 3201 StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry; 3202 StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc; 3203 StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc; 3204 } 3205 3206 3207 public: 3208 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 3209 if (all) { 3210 generate_all(); 3211 } else { 3212 generate_initial(); 3213 } 3214 } |