3678 __ aesdec(xmm_result, xmm_key11); 3679 load_key(xmm_temp, key, 0xc0); 3680 __ aesdec(xmm_result, xmm_temp); 3681 load_key(xmm_temp, key, 0xd0); 3682 __ aesdec(xmm_result, xmm_temp); 3683 load_key(xmm_temp, key, 0xe0); // 256-bit key goes up to e0 3684 __ aesdec(xmm_result, xmm_temp); 3685 __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0 3686 __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector 3687 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 3688 // no need to store r to memory until we exit 3689 __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block 3690 __ addptr(pos, AESBlockSize); 3691 __ subptr(len_reg, AESBlockSize); 3692 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); 3693 __ jmp(L_exit); 3694 3695 return start; 3696 } 3697 3698 // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time 3699 // to hide instruction latency 3700 // 3701 // Arguments: 3702 // 3703 // Inputs: 3704 // c_rarg0 - source byte array address 3705 // c_rarg1 - destination byte array address 3706 // c_rarg2 - K (key) in little endian int array 3707 // c_rarg3 - counter vector byte array address 3708 // Linux 3709 // c_rarg4 - input length 3710 // c_rarg5 - saved encryptedCounter start 3711 // rbp + 6 * wordSize - saved used length 3712 // Windows 3713 // rbp + 6 * wordSize - input length 3714 // rbp + 7 * wordSize - saved encryptedCounter start 3715 // rbp + 8 * wordSize - saved used length 3716 // 3717 // Output: 4955 4956 // support for verify_oop (must happen after universe_init) 4957 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); 4958 4959 // arraycopy stubs used by compilers 4960 generate_arraycopy_stubs(); 4961 4962 generate_math_stubs(); 4963 4964 // don't bother generating these AES intrinsic stubs unless global flag is set 4965 if (UseAESIntrinsics) { 4966 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others 4967 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); 4968 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); 4969 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); 4970 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); 4971 } 4972 if (UseAESCTRIntrinsics){ 4973 StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask(); 4974 StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); 4975 } 4976 4977 // Generate GHASH intrinsics code 4978 if (UseGHASHIntrinsics) { 4979 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); 4980 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); 4981 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); 4982 } 4983 4984 // Safefetch stubs. 4985 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, 4986 &StubRoutines::_safefetch32_fault_pc, 4987 &StubRoutines::_safefetch32_continuation_pc); 4988 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, 4989 &StubRoutines::_safefetchN_fault_pc, 4990 &StubRoutines::_safefetchN_continuation_pc); 4991 #ifdef COMPILER2 4992 if (UseMultiplyToLenIntrinsic) { 4993 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 4994 } | 3678 __ aesdec(xmm_result, xmm_key11); 3679 load_key(xmm_temp, key, 0xc0); 3680 __ aesdec(xmm_result, xmm_temp); 3681 load_key(xmm_temp, key, 0xd0); 3682 __ aesdec(xmm_result, xmm_temp); 3683 load_key(xmm_temp, key, 0xe0); // 256-bit key goes up to e0 3684 __ aesdec(xmm_result, xmm_temp); 3685 __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0 3686 __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector 3687 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 3688 // no need to store r to memory until we exit 3689 __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block 3690 __ addptr(pos, AESBlockSize); 3691 __ subptr(len_reg, AESBlockSize); 3692 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); 3693 __ jmp(L_exit); 3694 3695 return start; 3696 } 3697 3698 address generate_upper_word_mask() { 3699 __ align(64); 3700 StubCodeMark mark(this, "StubRoutines", "upper_word_mask"); 3701 address start = __ pc(); 3702 __ emit_data64(0x0000000000000000, relocInfo::none); 3703 __ emit_data64(0xFFFFFFFF00000000, relocInfo::none); 3704 return start; 3705 } 3706 3707 address generate_shuffle_byte_flip_mask() { 3708 __ align(64); 3709 StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask"); 3710 address start = __ pc(); 3711 __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); 3712 __ emit_data64(0x0001020304050607, relocInfo::none); 3713 return start; 3714 } 3715 3716 // ofs and limit are use for multi-block byte array. 3717 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) 3718 address generate_sha1_implCompress(bool multi_block, const char *name) { 3719 __ align(CodeEntryAlignment); 3720 StubCodeMark mark(this, "StubRoutines", name); 3721 address start = __ pc(); 3722 3723 Register buf = c_rarg0; 3724 Register state = c_rarg1; 3725 Register ofs = c_rarg2; 3726 Register limit = c_rarg3; 3727 3728 const XMMRegister abcd = xmm0; 3729 const XMMRegister e0 = xmm1; 3730 const XMMRegister e1 = xmm2; 3731 const XMMRegister msg0 = xmm3; 3732 3733 const XMMRegister msg1 = xmm4; 3734 const XMMRegister msg2 = xmm5; 3735 const XMMRegister msg3 = xmm6; 3736 const XMMRegister shuf_mask = xmm7; 3737 3738 __ enter(); 3739 3740 #ifdef _WIN64 3741 // save the xmm registers which must be preserved 6-7 3742 __ subptr(rsp, 4 * wordSize); 3743 __ movdqu(Address(rsp, 0), xmm6); 3744 __ movdqu(Address(rsp, 2 * wordSize), xmm7); 3745 #endif 3746 3747 __ subptr(rsp, 4 * wordSize); 3748 3749 __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask, 3750 buf, state, ofs, limit, rsp, multi_block); 3751 3752 __ addptr(rsp, 4 * wordSize); 3753 #ifdef _WIN64 3754 // restore xmm regs belonging to calling function 3755 __ movdqu(xmm6, Address(rsp, 0)); 3756 __ movdqu(xmm7, Address(rsp, 2 * wordSize)); 3757 __ addptr(rsp, 4 * wordSize); 3758 #endif 3759 3760 __ leave(); 3761 __ ret(0); 3762 return start; 3763 } 3764 3765 address generate_pshuffle_byte_flip_mask() { 3766 __ align(64); 3767 StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask"); 3768 address start = __ pc(); 3769 __ emit_data64(0x0405060700010203, relocInfo::none); 3770 __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none); 3771 return start; 3772 } 3773 3774 // ofs and limit are use for multi-block byte array. 3775 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) 3776 address generate_sha256_implCompress(bool multi_block, const char *name) { 3777 __ align(CodeEntryAlignment); 3778 StubCodeMark mark(this, "StubRoutines", name); 3779 address start = __ pc(); 3780 3781 Register buf = c_rarg0; 3782 Register state = c_rarg1; 3783 Register ofs = c_rarg2; 3784 Register limit = c_rarg3; 3785 3786 const XMMRegister msg = xmm0; 3787 const XMMRegister state0 = xmm1; 3788 const XMMRegister state1 = xmm2; 3789 const XMMRegister msgtmp0 = xmm3; 3790 3791 const XMMRegister msgtmp1 = xmm4; 3792 const XMMRegister msgtmp2 = xmm5; 3793 const XMMRegister msgtmp3 = xmm6; 3794 const XMMRegister msgtmp4 = xmm7; 3795 3796 const XMMRegister shuf_mask = xmm8; 3797 3798 __ enter(); 3799 #ifdef _WIN64 3800 // save the xmm registers which must be preserved 6-7 3801 __ subptr(rsp, 6 * wordSize); 3802 __ movdqu(Address(rsp, 0), xmm6); 3803 __ movdqu(Address(rsp, 2 * wordSize), xmm7); 3804 __ movdqu(Address(rsp, 4 * wordSize), xmm8); 3805 #endif 3806 3807 __ subptr(rsp, 4 * wordSize); 3808 3809 __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, 3810 buf, state, ofs, limit, rsp, multi_block, shuf_mask); 3811 3812 __ addptr(rsp, 4 * wordSize); 3813 #ifdef _WIN64 3814 // restore xmm regs belonging to calling function 3815 __ movdqu(xmm6, Address(rsp, 0)); 3816 __ movdqu(xmm7, Address(rsp, 2 * wordSize)); 3817 __ movdqu(xmm8, Address(rsp, 4 * wordSize)); 3818 __ addptr(rsp, 6 * wordSize); 3819 #endif 3820 __ leave(); 3821 __ ret(0); 3822 return start; 3823 } 3824 3825 // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time 3826 // to hide instruction latency 3827 // 3828 // Arguments: 3829 // 3830 // Inputs: 3831 // c_rarg0 - source byte array address 3832 // c_rarg1 - destination byte array address 3833 // c_rarg2 - K (key) in little endian int array 3834 // c_rarg3 - counter vector byte array address 3835 // Linux 3836 // c_rarg4 - input length 3837 // c_rarg5 - saved encryptedCounter start 3838 // rbp + 6 * wordSize - saved used length 3839 // Windows 3840 // rbp + 6 * wordSize - input length 3841 // rbp + 7 * wordSize - saved encryptedCounter start 3842 // rbp + 8 * wordSize - saved used length 3843 // 3844 // Output: 5082 5083 // support for verify_oop (must happen after universe_init) 5084 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); 5085 5086 // arraycopy stubs used by compilers 5087 generate_arraycopy_stubs(); 5088 5089 generate_math_stubs(); 5090 5091 // don't bother generating these AES intrinsic stubs unless global flag is set 5092 if (UseAESIntrinsics) { 5093 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others 5094 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); 5095 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); 5096 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); 5097 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); 5098 } 5099 if (UseAESCTRIntrinsics){ 5100 StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask(); 5101 StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); 5102 } 5103 5104 if (UseSHA1Intrinsics) { 5105 StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask(); 5106 StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask(); 5107 StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); 5108 StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); 5109 } 5110 if (UseSHA256Intrinsics) { 5111 StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256; 5112 StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask(); 5113 StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); 5114 StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); 5115 } 5116 5117 // Generate GHASH intrinsics code 5118 if (UseGHASHIntrinsics) { 5119 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); 5120 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); 5121 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); 5122 } 5123 5124 // Safefetch stubs. 5125 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, 5126 &StubRoutines::_safefetch32_fault_pc, 5127 &StubRoutines::_safefetch32_continuation_pc); 5128 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, 5129 &StubRoutines::_safefetchN_fault_pc, 5130 &StubRoutines::_safefetchN_continuation_pc); 5131 #ifdef COMPILER2 5132 if (UseMultiplyToLenIntrinsic) { 5133 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 5134 } |