1 /*
2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
3646 __ aesdec(xmm_result, xmm_key11);
3647 load_key(xmm_temp, key, 0xc0);
3648 __ aesdec(xmm_result, xmm_temp);
3649 load_key(xmm_temp, key, 0xd0);
3650 __ aesdec(xmm_result, xmm_temp);
3651 load_key(xmm_temp, key, 0xe0); // 256-bit key goes up to e0
3652 __ aesdec(xmm_result, xmm_temp);
3653 __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0
3654 __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector
3655 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
3656 // no need to store r to memory until we exit
3657 __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
3658 __ addptr(pos, AESBlockSize);
3659 __ subptr(len_reg, AESBlockSize);
3660 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
3661 __ jmp(L_exit);
3662
3663 return start;
3664 }
3665
3666 /**
3667 * Arguments:
3668 *
3669 * Inputs:
3670 * c_rarg0 - int crc
3671 * c_rarg1 - byte* buf
3672 * c_rarg2 - int length
3673 *
3674 * Ouput:
3675 * rax - int crc result
3676 */
3677 address generate_updateBytesCRC32() {
3678 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
3679
3680 __ align(CodeEntryAlignment);
3681 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
3682
3683 address start = __ pc();
3684 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3685 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
3982 StubRoutines::x86::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000);
3983 StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
3984 StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
3985
3986 // support for verify_oop (must happen after universe_init)
3987 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3988
3989 // arraycopy stubs used by compilers
3990 generate_arraycopy_stubs();
3991
3992 generate_math_stubs();
3993
3994 // don't bother generating these AES intrinsic stubs unless global flag is set
3995 if (UseAESIntrinsics) {
3996 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
3997
3998 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3999 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
4000 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
4001 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
4002 }
4003
4004 // Safefetch stubs.
4005 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
4006 &StubRoutines::_safefetch32_fault_pc,
4007 &StubRoutines::_safefetch32_continuation_pc);
4008 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
4009 &StubRoutines::_safefetchN_fault_pc,
4010 &StubRoutines::_safefetchN_continuation_pc);
4011 #ifdef COMPILER2
4012 if (UseMultiplyToLenIntrinsic) {
4013 StubRoutines::_multiplyToLen = generate_multiplyToLen();
4014 }
4015 #endif
4016 }
4017
4018 public:
4019 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
4020 if (all) {
4021 generate_all();
|
1 /*
2 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
3646 __ aesdec(xmm_result, xmm_key11);
3647 load_key(xmm_temp, key, 0xc0);
3648 __ aesdec(xmm_result, xmm_temp);
3649 load_key(xmm_temp, key, 0xd0);
3650 __ aesdec(xmm_result, xmm_temp);
3651 load_key(xmm_temp, key, 0xe0); // 256-bit key goes up to e0
3652 __ aesdec(xmm_result, xmm_temp);
3653 __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0
3654 __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector
3655 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
3656 // no need to store r to memory until we exit
3657 __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
3658 __ addptr(pos, AESBlockSize);
3659 __ subptr(len_reg, AESBlockSize);
3660 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
3661 __ jmp(L_exit);
3662
3663 return start;
3664 }
3665
3666
3667 // byte swap x86 long
3668 address generate_ghash_long_swap_mask() {
3669 __ align(CodeEntryAlignment);
3670 StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
3671 address start = __ pc();
3672 __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
3673 __ emit_data64(0x0706050403020100, relocInfo::none );
3674 return start;
3675 }
3676
3677 // byte swap x86 byte array
3678 address generate_ghash_byte_swap_mask() {
3679 __ align(CodeEntryAlignment);
3680 StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
3681 address start = __ pc();
3682 __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
3683 __ emit_data64(0x0001020304050607, relocInfo::none );
3684 return start;
3685 }
3686
3687 /* Single and multi-block ghash operations */
3688 address generate_ghash_processBlocks() {
3689 __ align(CodeEntryAlignment);
3690 Label L_ghash_loop, L_exit;
3691 StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
3692 address start = __ pc();
3693
3694 const Register state = c_rarg0;
3695 const Register subkeyH = c_rarg1;
3696 const Register data = c_rarg2;
3697 const Register blocks = c_rarg3;
3698
3699 #ifdef _WIN64
3700 const int XMM_REG_LAST = 10;
3701 #endif
3702
3703 const XMMRegister xmm_temp0 = xmm0;
3704 const XMMRegister xmm_temp1 = xmm1;
3705 const XMMRegister xmm_temp2 = xmm2;
3706 const XMMRegister xmm_temp3 = xmm3;
3707 const XMMRegister xmm_temp4 = xmm4;
3708 const XMMRegister xmm_temp5 = xmm5;
3709 const XMMRegister xmm_temp6 = xmm6;
3710 const XMMRegister xmm_temp7 = xmm7;
3711 const XMMRegister xmm_temp8 = xmm8;
3712 const XMMRegister xmm_temp9 = xmm9;
3713 const XMMRegister xmm_temp10 = xmm10;
3714
3715 __ enter();
3716
3717 #ifdef _WIN64
3718 // save the xmm registers which must be preserved 6-10
3719 __ subptr(rsp, -rsp_after_call_off * wordSize);
3720 for (int i = 6; i <= XMM_REG_LAST; i++) {
3721 __ movdqu(xmm_save(i), as_XMMRegister(i));
3722 }
3723 #endif
3724
3725 __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
3726
3727 __ movdqu(xmm_temp0, Address(state, 0));
3728 __ pshufb(xmm_temp0, xmm_temp10);
3729
3730
3731 __ BIND(L_ghash_loop);
3732 __ movdqu(xmm_temp2, Address(data, 0));
3733 __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
3734
3735 __ movdqu(xmm_temp1, Address(subkeyH, 0));
3736 __ pshufb(xmm_temp1, xmm_temp10);
3737
3738 __ pxor(xmm_temp0, xmm_temp2);
3739
3740 //
3741 // Multiply with the hash key
3742 //
3743 __ movdqu(xmm_temp3, xmm_temp0);
3744 __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
3745 __ movdqu(xmm_temp4, xmm_temp0);
3746 __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
3747
3748 __ movdqu(xmm_temp5, xmm_temp0);
3749 __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
3750 __ movdqu(xmm_temp6, xmm_temp0);
3751 __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
3752
3753 __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
3754
3755 __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
3756 __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
3757 __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
3758 __ pxor(xmm_temp3, xmm_temp5);
3759 __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
3760 // of the carry-less multiplication of
3761 // xmm0 by xmm1.
3762
3763 // We shift the result of the multiplication by one bit position
3764 // to the left to cope for the fact that the bits are reversed.
3765 __ movdqu(xmm_temp7, xmm_temp3);
3766 __ movdqu(xmm_temp8, xmm_temp6);
3767 __ pslld(xmm_temp3, 1);
3768 __ pslld(xmm_temp6, 1);
3769 __ psrld(xmm_temp7, 31);
3770 __ psrld(xmm_temp8, 31);
3771 __ movdqu(xmm_temp9, xmm_temp7);
3772 __ pslldq(xmm_temp8, 4);
3773 __ pslldq(xmm_temp7, 4);
3774 __ psrldq(xmm_temp9, 12);
3775 __ por(xmm_temp3, xmm_temp7);
3776 __ por(xmm_temp6, xmm_temp8);
3777 __ por(xmm_temp6, xmm_temp9);
3778
3779 //
3780 // First phase of the reduction
3781 //
3782 // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
3783 // independently.
3784 __ movdqu(xmm_temp7, xmm_temp3);
3785 __ movdqu(xmm_temp8, xmm_temp3);
3786 __ movdqu(xmm_temp9, xmm_temp3);
3787 __ pslld(xmm_temp7, 31); // packed right shift shifting << 31
3788 __ pslld(xmm_temp8, 30); // packed right shift shifting << 30
3789 __ pslld(xmm_temp9, 25); // packed right shift shifting << 25
3790 __ pxor(xmm_temp7, xmm_temp8); // xor the shifted versions
3791 __ pxor(xmm_temp7, xmm_temp9);
3792 __ movdqu(xmm_temp8, xmm_temp7);
3793 __ pslldq(xmm_temp7, 12);
3794 __ psrldq(xmm_temp8, 4);
3795 __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
3796
3797 //
3798 // Second phase of the reduction
3799 //
3800 // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
3801 // shift operations.
3802 __ movdqu(xmm_temp2, xmm_temp3);
3803 __ movdqu(xmm_temp4, xmm_temp3);
3804 __ movdqu(xmm_temp5, xmm_temp3);
3805 __ psrld(xmm_temp2, 1); // packed left shifting >> 1
3806 __ psrld(xmm_temp4, 2); // packed left shifting >> 2
3807 __ psrld(xmm_temp5, 7); // packed left shifting >> 7
3808 __ pxor(xmm_temp2, xmm_temp4); // xor the shifted versions
3809 __ pxor(xmm_temp2, xmm_temp5);
3810 __ pxor(xmm_temp2, xmm_temp8);
3811 __ pxor(xmm_temp3, xmm_temp2);
3812 __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
3813
3814 __ decrement(blocks);
3815 __ jcc(Assembler::zero, L_exit);
3816 __ movdqu(xmm_temp0, xmm_temp6);
3817 __ addptr(data, 16);
3818 __ jmp(L_ghash_loop);
3819
3820 __ BIND(L_exit);
3821 __ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result
3822 __ movdqu(Address(state, 0), xmm_temp6); // store the result
3823
3824 #ifdef _WIN64
3825 // restore xmm regs belonging to calling function
3826 for (int i = 6; i <= XMM_REG_LAST; i++) {
3827 __ movdqu(as_XMMRegister(i), xmm_save(i));
3828 }
3829 #endif
3830 __ leave();
3831 __ ret(0);
3832 return start;
3833 }
3834
3835 /**
3836 * Arguments:
3837 *
3838 * Inputs:
3839 * c_rarg0 - int crc
3840 * c_rarg1 - byte* buf
3841 * c_rarg2 - int length
3842 *
3843 * Ouput:
3844 * rax - int crc result
3845 */
3846 address generate_updateBytesCRC32() {
3847 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
3848
3849 __ align(CodeEntryAlignment);
3850 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
3851
3852 address start = __ pc();
3853 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3854 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
4151 StubRoutines::x86::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000);
4152 StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
4153 StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
4154
4155 // support for verify_oop (must happen after universe_init)
4156 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
4157
4158 // arraycopy stubs used by compilers
4159 generate_arraycopy_stubs();
4160
4161 generate_math_stubs();
4162
4163 // don't bother generating these AES intrinsic stubs unless global flag is set
4164 if (UseAESIntrinsics) {
4165 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
4166
4167 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
4168 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
4169 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
4170 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
4171 }
4172
4173 // Generate GHASH intrinsics code
4174 if (UseGHASHIntrinsics) {
4175 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
4176 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
4177 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
4178 }
4179
4180 // Safefetch stubs.
4181 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
4182 &StubRoutines::_safefetch32_fault_pc,
4183 &StubRoutines::_safefetch32_continuation_pc);
4184 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
4185 &StubRoutines::_safefetchN_fault_pc,
4186 &StubRoutines::_safefetchN_continuation_pc);
4187 #ifdef COMPILER2
4188 if (UseMultiplyToLenIntrinsic) {
4189 StubRoutines::_multiplyToLen = generate_multiplyToLen();
4190 }
4191 #endif
4192 }
4193
4194 public:
4195 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
4196 if (all) {
4197 generate_all();
|