1 /*
2 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
2701 __ aesdec(xmm_result, as_XMMRegister(rnum));
2702 }
2703 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0
2704 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2705 }
2706 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
2707 __ aesdeclast(xmm_result, xmm_temp);
2708 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2709 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2710 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
2711 // no need to store r to memory until we exit
2712 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
2713 __ addptr(pos, AESBlockSize);
2714 __ subptr(len_reg, AESBlockSize);
2715 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
2716 __ jmp(L_exit);
2717
2718 return start;
2719 }
2720
2721 /**
2722 * Arguments:
2723 *
2724 * Inputs:
2725 * rsp(4) - int crc
2726 * rsp(8) - byte* buf
2727 * rsp(12) - int length
2728 *
2729 * Ouput:
2730 * rax - int crc result
2731 */
2732 address generate_updateBytesCRC32() {
2733 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
2734
2735 __ align(CodeEntryAlignment);
2736 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
2737
2738 address start = __ pc();
2739
2740 const Register crc = rdx; // crc
2998
2999 //------------------------------------------------------------------------------------------------------------------------
3000 // entry points that are platform specific
3001
3002 // support for verify_oop (must happen after universe_init)
3003 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3004
3005 // arraycopy stubs used by compilers
3006 generate_arraycopy_stubs();
3007
3008 generate_math_stubs();
3009
3010 // don't bother generating these AES intrinsic stubs unless global flag is set
3011 if (UseAESIntrinsics) {
3012 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
3013
3014 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3015 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3016 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
3017 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
3018 }
3019
3020 // Safefetch stubs.
3021 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3022 &StubRoutines::_safefetch32_fault_pc,
3023 &StubRoutines::_safefetch32_continuation_pc);
3024 StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry;
3025 StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc;
3026 StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
3027 }
3028
3029
3030 public:
3031 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3032 if (all) {
3033 generate_all();
3034 } else {
3035 generate_initial();
3036 }
3037 }
|
1 /*
2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
2701 __ aesdec(xmm_result, as_XMMRegister(rnum));
2702 }
2703 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0
2704 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2705 }
2706 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
2707 __ aesdeclast(xmm_result, xmm_temp);
2708 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2709 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2710 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
2711 // no need to store r to memory until we exit
2712 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
2713 __ addptr(pos, AESBlockSize);
2714 __ subptr(len_reg, AESBlockSize);
2715 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
2716 __ jmp(L_exit);
2717
2718 return start;
2719 }
2720
2721 // byte swap x86 long
2722 address generate_ghash_long_swap_mask() {
2723 __ align(CodeEntryAlignment);
2724 StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
2725 address start = __ pc();
2726 __ emit_data(0x0b0a0908, relocInfo::none, 0);
2727 __ emit_data(0x0f0e0d0c, relocInfo::none, 0);
2728 __ emit_data(0x03020100, relocInfo::none, 0);
2729 __ emit_data(0x07060504, relocInfo::none, 0);
2730
2731 return start;
2732 }
2733
2734 // byte swap x86 byte array
2735 address generate_ghash_byte_swap_mask() {
2736 __ align(CodeEntryAlignment);
2737 StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
2738 address start = __ pc();
2739 __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
2740 __ emit_data(0x08090a0b, relocInfo::none, 0);
2741 __ emit_data(0x04050607, relocInfo::none, 0);
2742 __ emit_data(0x00010203, relocInfo::none, 0);
2743 return start;
2744 }
2745
2746 /* Single and multi-block ghash operations */
2747 address generate_ghash_processBlocks() {
2748 assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
2749 __ align(CodeEntryAlignment);
2750 Label L_ghash_loop, L_exit;
2751 StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
2752 address start = __ pc();
2753
2754 const Register state = rdi;
2755 const Register subkeyH = rsi;
2756 const Register data = rdx;
2757 const Register blocks = rcx;
2758
2759 const Address state_param(rbp, 8+0);
2760 const Address subkeyH_param(rbp, 8+4);
2761 const Address data_param(rbp, 8+8);
2762 const Address blocks_param(rbp, 8+12);
2763
2764 const XMMRegister xmm_temp0 = xmm0;
2765 const XMMRegister xmm_temp1 = xmm1;
2766 const XMMRegister xmm_temp2 = xmm2;
2767 const XMMRegister xmm_temp3 = xmm3;
2768 const XMMRegister xmm_temp4 = xmm4;
2769 const XMMRegister xmm_temp5 = xmm5;
2770 const XMMRegister xmm_temp6 = xmm6;
2771 const XMMRegister xmm_temp7 = xmm7;
2772
2773 __ enter();
2774
2775 __ movptr(state, state_param);
2776 __ movptr(subkeyH, subkeyH_param);
2777 __ movptr(data, data_param);
2778 __ movptr(blocks, blocks_param);
2779
2780 __ movdqu(xmm_temp0, Address(state, 0));
2781 __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2782
2783 __ movdqu(xmm_temp1, Address(subkeyH, 0));
2784 __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2785
2786 __ BIND(L_ghash_loop);
2787 __ movdqu(xmm_temp2, Address(data, 0));
2788 __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
2789
2790 __ pxor(xmm_temp0, xmm_temp2);
2791
2792 //
2793 // Multiply with the hash key
2794 //
2795 __ movdqu(xmm_temp3, xmm_temp0);
2796 __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
2797 __ movdqu(xmm_temp4, xmm_temp0);
2798 __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
2799
2800 __ movdqu(xmm_temp5, xmm_temp0);
2801 __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
2802 __ movdqu(xmm_temp6, xmm_temp0);
2803 __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
2804
2805 __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
2806
2807 __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
2808 __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
2809 __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
2810 __ pxor(xmm_temp3, xmm_temp5);
2811 __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
2812 // of the carry-less multiplication of
2813 // xmm0 by xmm1.
2814
2815 // We shift the result of the multiplication by one bit position
2816 // to the left to cope for the fact that the bits are reversed.
2817 __ movdqu(xmm_temp7, xmm_temp3);
2818 __ movdqu(xmm_temp4, xmm_temp6);
2819 __ pslld (xmm_temp3, 1);
2820 __ pslld(xmm_temp6, 1);
2821 __ psrld(xmm_temp7, 31);
2822 __ psrld(xmm_temp4, 31);
2823 __ movdqu(xmm_temp5, xmm_temp7);
2824 __ pslldq(xmm_temp4, 4);
2825 __ pslldq(xmm_temp7, 4);
2826 __ psrldq(xmm_temp5, 12);
2827 __ por(xmm_temp3, xmm_temp7);
2828 __ por(xmm_temp6, xmm_temp4);
2829 __ por(xmm_temp6, xmm_temp5);
2830
2831 //
2832 // First phase of the reduction
2833 //
2834 // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
2835 // independently.
2836 __ movdqu(xmm_temp7, xmm_temp3);
2837 __ movdqu(xmm_temp4, xmm_temp3);
2838 __ movdqu(xmm_temp5, xmm_temp3);
2839 __ pslld(xmm_temp7, 31); // packed right shift shifting << 31
2840 __ pslld(xmm_temp4, 30); // packed right shift shifting << 30
2841 __ pslld(xmm_temp5, 25); // packed right shift shifting << 25
2842 __ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions
2843 __ pxor(xmm_temp7, xmm_temp5);
2844 __ movdqu(xmm_temp4, xmm_temp7);
2845 __ pslldq(xmm_temp7, 12);
2846 __ psrldq(xmm_temp4, 4);
2847 __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
2848
2849 //
2850 // Second phase of the reduction
2851 //
2852 // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
2853 // shift operations.
2854 __ movdqu(xmm_temp2, xmm_temp3);
2855 __ movdqu(xmm_temp7, xmm_temp3);
2856 __ movdqu(xmm_temp5, xmm_temp3);
2857 __ psrld(xmm_temp2, 1); // packed left shifting >> 1
2858 __ psrld(xmm_temp7, 2); // packed left shifting >> 2
2859 __ psrld(xmm_temp5, 7); // packed left shifting >> 7
2860 __ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions
2861 __ pxor(xmm_temp2, xmm_temp5);
2862 __ pxor(xmm_temp2, xmm_temp4);
2863 __ pxor(xmm_temp3, xmm_temp2);
2864 __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
2865
2866 __ decrement(blocks);
2867 __ jcc(Assembler::zero, L_exit);
2868 __ movdqu(xmm_temp0, xmm_temp6);
2869 __ addptr(data, 16);
2870 __ jmp(L_ghash_loop);
2871
2872 __ BIND(L_exit);
2873 // Byte swap 16-byte result
2874 __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2875 __ movdqu(Address(state, 0), xmm_temp6); // store the result
2876
2877 __ leave();
2878 __ ret(0);
2879 return start;
2880 }
2881
2882 /**
2883 * Arguments:
2884 *
2885 * Inputs:
2886 * rsp(4) - int crc
2887 * rsp(8) - byte* buf
2888 * rsp(12) - int length
2889 *
2890 * Ouput:
2891 * rax - int crc result
2892 */
2893 address generate_updateBytesCRC32() {
2894 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
2895
2896 __ align(CodeEntryAlignment);
2897 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
2898
2899 address start = __ pc();
2900
2901 const Register crc = rdx; // crc
3159
3160 //------------------------------------------------------------------------------------------------------------------------
3161 // entry points that are platform specific
3162
3163 // support for verify_oop (must happen after universe_init)
3164 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3165
3166 // arraycopy stubs used by compilers
3167 generate_arraycopy_stubs();
3168
3169 generate_math_stubs();
3170
3171 // don't bother generating these AES intrinsic stubs unless global flag is set
3172 if (UseAESIntrinsics) {
3173 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
3174
3175 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3176 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3177 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
3178 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
3179 }
3180
3181 // Generate GHASH intrinsics code
3182 if (UseGHASHIntrinsics) {
3183 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
3184 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
3185 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
3186 }
3187
3188 // Safefetch stubs.
3189 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3190 &StubRoutines::_safefetch32_fault_pc,
3191 &StubRoutines::_safefetch32_continuation_pc);
3192 StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry;
3193 StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc;
3194 StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
3195 }
3196
3197
3198 public:
3199 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3200 if (all) {
3201 generate_all();
3202 } else {
3203 generate_initial();
3204 }
3205 }
|