1 /*
2 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
2701 __ aesdec(xmm_result, as_XMMRegister(rnum));
2702 }
2703 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0
2704 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2705 }
2706 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
2707 __ aesdeclast(xmm_result, xmm_temp);
2708 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2709 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2710 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
2711 // no need to store r to memory until we exit
2712 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
2713 __ addptr(pos, AESBlockSize);
2714 __ subptr(len_reg, AESBlockSize);
2715 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
2716 __ jmp(L_exit);
2717
2718 return start;
2719 }
2720
2721 /**
2722 * Arguments:
2723 *
2724 * Inputs:
2725 * rsp(4) - int crc
2726 * rsp(8) - byte* buf
2727 * rsp(12) - int length
2728 *
2729 * Ouput:
2730 * rax - int crc result
2731 */
2732 address generate_updateBytesCRC32() {
2733 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
2734
2735 __ align(CodeEntryAlignment);
2736 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
2737
2738 address start = __ pc();
2739
2740 const Register crc = rdx; // crc
2998
2999 //------------------------------------------------------------------------------------------------------------------------
3000 // entry points that are platform specific
3001
3002 // support for verify_oop (must happen after universe_init)
3003 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3004
3005 // arraycopy stubs used by compilers
3006 generate_arraycopy_stubs();
3007
3008 generate_math_stubs();
3009
3010 // don't bother generating these AES intrinsic stubs unless global flag is set
3011 if (UseAESIntrinsics) {
3012 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
3013
3014 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3015 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3016 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
3017 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
3018 }
3019
3020 // Safefetch stubs.
3021 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3022 &StubRoutines::_safefetch32_fault_pc,
3023 &StubRoutines::_safefetch32_continuation_pc);
3024 StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry;
3025 StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc;
3026 StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
3027 }
3028
3029
3030 public:
3031 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3032 if (all) {
3033 generate_all();
3034 } else {
3035 generate_initial();
3036 }
3037 }
|
1 /*
2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
2701 __ aesdec(xmm_result, as_XMMRegister(rnum));
2702 }
2703 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0
2704 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2705 }
2706 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
2707 __ aesdeclast(xmm_result, xmm_temp);
2708 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2709 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2710 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
2711 // no need to store r to memory until we exit
2712 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
2713 __ addptr(pos, AESBlockSize);
2714 __ subptr(len_reg, AESBlockSize);
2715 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
2716 __ jmp(L_exit);
2717
2718 return start;
2719 }
2720
2721 // byte swap x86 long
2722 address generate_ghash_long_swap_mask() {
2723 __ align(CodeEntryAlignment);
2724 StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
2725 address start = __ pc();
2726 __ emit_data(0x0b0a0908, relocInfo::none, 0);
2727 __ emit_data(0x0f0e0d0c, relocInfo::none, 0);
2728 __ emit_data(0x03020100, relocInfo::none, 0);
2729 __ emit_data(0x07060504, relocInfo::none, 0);
2730
2731 return start;
2732 }
2733
2734 // byte swap x86 byte array
2735 address generate_ghash_byte_swap_mask() {
2736 __ align(CodeEntryAlignment);
2737 StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
2738 address start = __ pc();
2739 __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
2740 __ emit_data(0x08090a0b, relocInfo::none, 0);
2741 __ emit_data(0x04050607, relocInfo::none, 0);
2742 __ emit_data(0x00010203, relocInfo::none, 0);
2743 return start;
2744 }
2745
2746 /* Single and multi-block ghash operations */
2747 address generate_ghash_processBlocks() {
2748 assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
2749 __ align(CodeEntryAlignment);
2750 Label L_ghash_loop, L_exit;
2751 StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
2752 address start = __ pc();
2753
2754 const Register state = rdi;
2755 const Register subkeyH = rsi;
2756 const Register data = rdx;
2757 const Register blocks = rcx;
2758
2759 const Address state_param(rbp, 8+0);
2760 const Address subkeyH_param(rbp, 8+4);
2761 const Address data_param(rbp, 8+8);
2762 const Address blocks_param(rbp, 8+12);
2763
2764 #ifdef _WIN64
2765 const int XMM_REG_LAST = 7;
2766 #endif
2767
2768 const XMMRegister xmm_temp0 = xmm0;
2769 const XMMRegister xmm_temp1 = xmm1;
2770 const XMMRegister xmm_temp2 = xmm2;
2771 const XMMRegister xmm_temp3 = xmm3;
2772 const XMMRegister xmm_temp4 = xmm4;
2773 const XMMRegister xmm_temp5 = xmm5;
2774 const XMMRegister xmm_temp6 = xmm6;
2775 const XMMRegister xmm_temp7 = xmm7;
2776
2777 __ enter();
2778
2779 #ifdef _WIN64
2780 // save the xmm registers which must be preserved 6-10
2781 __ subptr(rsp, -rsp_after_call_off * wordSize);
2782 for (int i = 6; i <= XMM_REG_LAST; i++) {
2783 __ movdqu(xmm_save(i), as_XMMRegister(i));
2784 }
2785 #endif
2786 __ movptr(state, state_param);
2787 __ movptr(subkeyH, subkeyH_param);
2788 __ movptr(data, data_param);
2789 __ movptr(blocks, blocks_param);
2790
2791 __ movdqu(xmm_temp0, Address(state, 0));
2792 __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2793
2794 __ movdqu(xmm_temp1, Address(subkeyH, 0));
2795 __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2796
2797 __ BIND(L_ghash_loop);
2798 __ movdqu(xmm_temp2, Address(data, 0));
2799 __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
2800
2801 __ pxor(xmm_temp0, xmm_temp2);
2802
2803 //
2804 // Multiply with the hash key
2805 //
2806 __ movdqu(xmm_temp3, xmm_temp0);
2807 __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
2808 __ movdqu(xmm_temp4, xmm_temp0);
2809 __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
2810
2811 __ movdqu(xmm_temp5, xmm_temp0);
2812 __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
2813 __ movdqu(xmm_temp6, xmm_temp0);
2814 __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
2815
2816 __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
2817
2818 __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
2819 __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
2820 __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
2821 __ pxor(xmm_temp3, xmm_temp5);
2822 __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
2823 // of the carry-less multiplication of
2824 // xmm0 by xmm1.
2825
2826 // We shift the result of the multiplication by one bit position
2827 // to the left to cope for the fact that the bits are reversed.
2828 __ movdqu(xmm_temp7, xmm_temp3);
2829 __ movdqu(xmm_temp4, xmm_temp6);
2830 __ pslld (xmm_temp3, 1);
2831 __ pslld(xmm_temp6, 1);
2832 __ psrld(xmm_temp7, 31);
2833 __ psrld(xmm_temp4, 31);
2834 __ movdqu(xmm_temp5, xmm_temp7);
2835 __ pslldq(xmm_temp4, 4);
2836 __ pslldq(xmm_temp7, 4);
2837 __ psrldq(xmm_temp5, 12);
2838 __ por(xmm_temp3, xmm_temp7);
2839 __ por(xmm_temp6, xmm_temp4);
2840 __ por(xmm_temp6, xmm_temp5);
2841
2842 //
2843 // First phase of the reduction
2844 //
2845 // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
2846 // independently.
2847 __ movdqu(xmm_temp7, xmm_temp3);
2848 __ movdqu(xmm_temp4, xmm_temp3);
2849 __ movdqu(xmm_temp5, xmm_temp3);
2850 __ pslld(xmm_temp7, 31); // packed right shift shifting << 31
2851 __ pslld(xmm_temp4, 30); // packed right shift shifting << 30
2852 __ pslld(xmm_temp5, 25); // packed right shift shifting << 25
2853 __ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions
2854 __ pxor(xmm_temp7, xmm_temp5);
2855 __ movdqu(xmm_temp4, xmm_temp7);
2856 __ pslldq(xmm_temp7, 12);
2857 __ psrldq(xmm_temp4, 4);
2858 __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
2859
2860 //
2861 // Second phase of the reduction
2862 //
2863 // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
2864 // shift operations.
2865 __ movdqu(xmm_temp2, xmm_temp3);
2866 __ movdqu(xmm_temp7, xmm_temp3);
2867 __ movdqu(xmm_temp5, xmm_temp3);
2868 __ psrld(xmm_temp2, 1); // packed left shifting >> 1
2869 __ psrld(xmm_temp7, 2); // packed left shifting >> 2
2870 __ psrld(xmm_temp5, 7); // packed left shifting >> 7
2871 __ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions
2872 __ pxor(xmm_temp2, xmm_temp5);
2873 __ pxor(xmm_temp2, xmm_temp4);
2874 __ pxor(xmm_temp3, xmm_temp2);
2875 __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
2876
2877 __ decrement(blocks);
2878 __ jcc(Assembler::zero, L_exit);
2879 __ movdqu(xmm_temp0, xmm_temp6);
2880 __ addptr(data, 16);
2881 __ jmp(L_ghash_loop);
2882
2883 __ BIND(L_exit);
2884 // Byte swap 16-byte result
2885 __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2886 __ movdqu(Address(state, 0), xmm_temp6); // store the result
2887
2888 #ifdef _WIN64
2889 // restore xmm regs belonging to calling function
2890 for (int i = 6; i <= XMM_REG_LAST; i++) {
2891 __ movdqu(as_XMMRegister(i), xmm_save(i));
2892 }
2893 #endif
2894 __ leave();
2895 __ ret(0);
2896 return start;
2897 }
2898
2899 /**
2900 * Arguments:
2901 *
2902 * Inputs:
2903 * rsp(4) - int crc
2904 * rsp(8) - byte* buf
2905 * rsp(12) - int length
2906 *
2907 * Ouput:
2908 * rax - int crc result
2909 */
2910 address generate_updateBytesCRC32() {
2911 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
2912
2913 __ align(CodeEntryAlignment);
2914 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
2915
2916 address start = __ pc();
2917
2918 const Register crc = rdx; // crc
3176
3177 //------------------------------------------------------------------------------------------------------------------------
3178 // entry points that are platform specific
3179
3180 // support for verify_oop (must happen after universe_init)
3181 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3182
3183 // arraycopy stubs used by compilers
3184 generate_arraycopy_stubs();
3185
3186 generate_math_stubs();
3187
3188 // don't bother generating these AES intrinsic stubs unless global flag is set
3189 if (UseAESIntrinsics) {
3190 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
3191
3192 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3193 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3194 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
3195 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
3196 }
3197
3198 // Generate GHASH intrinsics code
3199 if (UseGHASHIntrinsics) {
3200 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
3201 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
3202 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
3203 }
3204
3205 // Safefetch stubs.
3206 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3207 &StubRoutines::_safefetch32_fault_pc,
3208 &StubRoutines::_safefetch32_continuation_pc);
3209 StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry;
3210 StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc;
3211 StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
3212 }
3213
3214
3215 public:
3216 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3217 if (all) {
3218 generate_all();
3219 } else {
3220 generate_initial();
3221 }
3222 }
|