3755
3756 __ addptr(rsp, 4 * wordSize);
3757 #ifdef _WIN64
3758 // restore xmm regs belonging to calling function
3759 __ movdqu(xmm6, Address(rsp, 0));
3760 __ movdqu(xmm7, Address(rsp, 2 * wordSize));
3761 __ addptr(rsp, 4 * wordSize);
3762 #endif
3763
3764 __ leave();
3765 __ ret(0);
3766 return start;
3767 }
3768
3769 address generate_pshuffle_byte_flip_mask() {
3770 __ align(64);
3771 StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
3772 address start = __ pc();
3773 __ emit_data64(0x0405060700010203, relocInfo::none);
3774 __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
3775 return start;
3776 }
3777
3778 // ofs and limit are use for multi-block byte array.
3779 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
3780 address generate_sha256_implCompress(bool multi_block, const char *name) {
3781 __ align(CodeEntryAlignment);
3782 StubCodeMark mark(this, "StubRoutines", name);
3783 address start = __ pc();
3784
3785 Register buf = c_rarg0;
3786 Register state = c_rarg1;
3787 Register ofs = c_rarg2;
3788 Register limit = c_rarg3;
3789
3790 const XMMRegister msg = xmm0;
3791 const XMMRegister state0 = xmm1;
3792 const XMMRegister state1 = xmm2;
3793 const XMMRegister msgtmp0 = xmm3;
3794
3795 const XMMRegister msgtmp1 = xmm4;
3796 const XMMRegister msgtmp2 = xmm5;
3797 const XMMRegister msgtmp3 = xmm6;
3798 const XMMRegister msgtmp4 = xmm7;
3799
3800 const XMMRegister shuf_mask = xmm8;
3801
3802 __ enter();
3803 #ifdef _WIN64
3804 // save the xmm registers which must be preserved 6-7
3805 __ subptr(rsp, 6 * wordSize);
3806 __ movdqu(Address(rsp, 0), xmm6);
3807 __ movdqu(Address(rsp, 2 * wordSize), xmm7);
3808 __ movdqu(Address(rsp, 4 * wordSize), xmm8);
3809 #endif
3810
3811 __ subptr(rsp, 4 * wordSize);
3812
3813 __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
3814 buf, state, ofs, limit, rsp, multi_block, shuf_mask);
3815
3816 __ addptr(rsp, 4 * wordSize);
3817 #ifdef _WIN64
3818 // restore xmm regs belonging to calling function
3819 __ movdqu(xmm6, Address(rsp, 0));
3820 __ movdqu(xmm7, Address(rsp, 2 * wordSize));
3821 __ movdqu(xmm8, Address(rsp, 4 * wordSize));
3822 __ addptr(rsp, 6 * wordSize);
3823 #endif
3824 __ leave();
3825 __ ret(0);
3826 return start;
3827 }
3828
3829 // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
3830 // to hide instruction latency
3831 //
3832 // Arguments:
3833 //
3834 // Inputs:
3835 // c_rarg0 - source byte array address
3836 // c_rarg1 - destination byte array address
3837 // c_rarg2 - K (key) in little endian int array
3838 // c_rarg3 - counter vector byte array address
5201 if (UseAESIntrinsics) {
5202 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
5203 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
5204 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
5205 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
5206 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
5207 }
5208 if (UseAESCTRIntrinsics){
5209 StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
5210 StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
5211 }
5212
5213 if (UseSHA1Intrinsics) {
5214 StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
5215 StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
5216 StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
5217 StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
5218 }
5219 if (UseSHA256Intrinsics) {
5220 StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
5221 StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
5222 StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
5223 StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
5224 }
5225
5226 // Generate GHASH intrinsics code
5227 if (UseGHASHIntrinsics) {
5228 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
5229 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
5230 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
5231 }
5232
5233 // Safefetch stubs.
5234 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
5235 &StubRoutines::_safefetch32_fault_pc,
5236 &StubRoutines::_safefetch32_continuation_pc);
5237 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
5238 &StubRoutines::_safefetchN_fault_pc,
5239 &StubRoutines::_safefetchN_continuation_pc);
5240 #ifdef COMPILER2
|
3755
3756 __ addptr(rsp, 4 * wordSize);
3757 #ifdef _WIN64
3758 // restore xmm regs belonging to calling function
3759 __ movdqu(xmm6, Address(rsp, 0));
3760 __ movdqu(xmm7, Address(rsp, 2 * wordSize));
3761 __ addptr(rsp, 4 * wordSize);
3762 #endif
3763
3764 __ leave();
3765 __ ret(0);
3766 return start;
3767 }
3768
3769 address generate_pshuffle_byte_flip_mask() {
3770 __ align(64);
3771 StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
3772 address start = __ pc();
3773 __ emit_data64(0x0405060700010203, relocInfo::none);
3774 __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
3775
3776 if (VM_Version::supports_avx2()) {
3777 __ emit_data64(0x0405060700010203, relocInfo::none); // second copy
3778 __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
3779 // _SHUF_00BA
3780 __ emit_data64(0x0b0a090803020100, relocInfo::none);
3781 __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
3782 __ emit_data64(0x0b0a090803020100, relocInfo::none);
3783 __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
3784 // _SHUF_DC00
3785 __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
3786 __ emit_data64(0x0b0a090803020100, relocInfo::none);
3787 __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
3788 __ emit_data64(0x0b0a090803020100, relocInfo::none);
3789 }
3790
3791 return start;
3792 }
3793
3794 // ofs and limit are use for multi-block byte array.
3795 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
3796 address generate_sha256_implCompress(bool multi_block, const char *name) {
3797 assert(VM_Version::supports_sha() || VM_Version::supports_avx2(), "");
3798 __ align(CodeEntryAlignment);
3799 StubCodeMark mark(this, "StubRoutines", name);
3800 address start = __ pc();
3801
3802 Register buf = c_rarg0;
3803 Register state = c_rarg1;
3804 Register ofs = c_rarg2;
3805 Register limit = c_rarg3;
3806
3807 const XMMRegister msg = xmm0;
3808 const XMMRegister state0 = xmm1;
3809 const XMMRegister state1 = xmm2;
3810 const XMMRegister msgtmp0 = xmm3;
3811
3812 const XMMRegister msgtmp1 = xmm4;
3813 const XMMRegister msgtmp2 = xmm5;
3814 const XMMRegister msgtmp3 = xmm6;
3815 const XMMRegister msgtmp4 = xmm7;
3816
3817 const XMMRegister shuf_mask = xmm8;
3818
3819 __ enter();
3820 #ifdef _WIN64
3821 // save the xmm registers which must be preserved 6-7
3822 __ subptr(rsp, 6 * wordSize);
3823 __ movdqu(Address(rsp, 0), xmm6);
3824 __ movdqu(Address(rsp, 2 * wordSize), xmm7);
3825 __ movdqu(Address(rsp, 4 * wordSize), xmm8);
3826
3827 if (!VM_Version::supports_sha() && VM_Version::supports_avx2()) {
3828 __ subptr(rsp, 10 * wordSize);
3829 __ movdqu(Address(rsp, 0), xmm9);
3830 __ movdqu(Address(rsp, 2 * wordSize), xmm10);
3831 __ movdqu(Address(rsp, 4 * wordSize), xmm11);
3832 __ movdqu(Address(rsp, 6 * wordSize), xmm12);
3833 __ movdqu(Address(rsp, 8 * wordSize), xmm13);
3834 }
3835 #endif
3836
3837 __ subptr(rsp, 4 * wordSize);
3838
3839 if (VM_Version::supports_sha()) {
3840 __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
3841 buf, state, ofs, limit, rsp, multi_block, shuf_mask);
3842 } else if (VM_Version::supports_avx2()) {
3843 __ sha256_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
3844 buf, state, ofs, limit, rsp, multi_block, shuf_mask);
3845 }
3846 __ addptr(rsp, 4 * wordSize);
3847 #ifdef _WIN64
3848 // restore xmm regs belonging to calling function
3849 if (!VM_Version::supports_sha() && VM_Version::supports_avx2()) {
3850 __ movdqu(xmm9, Address(rsp, 0));
3851 __ movdqu(xmm10, Address(rsp, 2 * wordSize));
3852 __ movdqu(xmm11, Address(rsp, 4 * wordSize));
3853 __ movdqu(xmm12, Address(rsp, 6 * wordSize));
3854 __ movdqu(xmm13, Address(rsp, 8 * wordSize));
3855 __ addptr(rsp, 10 * wordSize);
3856 }
3857 __ movdqu(xmm6, Address(rsp, 0));
3858 __ movdqu(xmm7, Address(rsp, 2 * wordSize));
3859 __ movdqu(xmm8, Address(rsp, 4 * wordSize));
3860 __ addptr(rsp, 6 * wordSize);
3861 #endif
3862 __ leave();
3863 __ ret(0);
3864 return start;
3865 }
3866
3867 // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
3868 // to hide instruction latency
3869 //
3870 // Arguments:
3871 //
3872 // Inputs:
3873 // c_rarg0 - source byte array address
3874 // c_rarg1 - destination byte array address
3875 // c_rarg2 - K (key) in little endian int array
3876 // c_rarg3 - counter vector byte array address
5239 if (UseAESIntrinsics) {
5240 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
5241 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
5242 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
5243 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
5244 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
5245 }
5246 if (UseAESCTRIntrinsics){
5247 StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
5248 StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
5249 }
5250
5251 if (UseSHA1Intrinsics) {
5252 StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
5253 StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
5254 StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
5255 StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
5256 }
5257 if (UseSHA256Intrinsics) {
5258 StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
5259 char* dst = (char*)StubRoutines::x86::_k256_W;
5260 char* src = (char*)StubRoutines::x86::_k256;
5261 for (int ii = 0; ii < 16; ++ii) {
5262 memcpy(dst + 32 * ii, src + 16 * ii, 16);
5263 memcpy(dst + 32 * ii + 16, src + 16 * ii, 16);
5264 }
5265 StubRoutines::x86::_k256_W_adr = (address)StubRoutines::x86::_k256_W;
5266 StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
5267 StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
5268 StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
5269 }
5270
5271 // Generate GHASH intrinsics code
5272 if (UseGHASHIntrinsics) {
5273 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
5274 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
5275 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
5276 }
5277
5278 // Safefetch stubs.
5279 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
5280 &StubRoutines::_safefetch32_fault_pc,
5281 &StubRoutines::_safefetch32_continuation_pc);
5282 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
5283 &StubRoutines::_safefetchN_fault_pc,
5284 &StubRoutines::_safefetchN_continuation_pc);
5285 #ifdef COMPILER2
|