< prev index next >

src/cpu/x86/vm/stubGenerator_x86_64.cpp

Print this page




3755 
3756     __ addptr(rsp, 4 * wordSize);
3757 #ifdef _WIN64
3758     // restore xmm regs belonging to calling function
3759     __ movdqu(xmm6, Address(rsp, 0));
3760     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
3761     __ addptr(rsp, 4 * wordSize);
3762 #endif
3763 
3764     __ leave();
3765     __ ret(0);
3766     return start;
3767   }
3768 
3769   address generate_pshuffle_byte_flip_mask() {
3770     __ align(64);
3771     StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
3772     address start = __ pc();
3773     __ emit_data64(0x0405060700010203, relocInfo::none);
3774     __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
















3775     return start;
3776   }
3777 
3778 // ofs and limit are use for multi-block byte array.
3779 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
3780   address generate_sha256_implCompress(bool multi_block, const char *name) {

3781     __ align(CodeEntryAlignment);
3782     StubCodeMark mark(this, "StubRoutines", name);
3783     address start = __ pc();
3784 
3785     Register buf = c_rarg0;
3786     Register state = c_rarg1;
3787     Register ofs = c_rarg2;
3788     Register limit = c_rarg3;
3789 
3790     const XMMRegister msg = xmm0;
3791     const XMMRegister state0 = xmm1;
3792     const XMMRegister state1 = xmm2;
3793     const XMMRegister msgtmp0 = xmm3;
3794 
3795     const XMMRegister msgtmp1 = xmm4;
3796     const XMMRegister msgtmp2 = xmm5;
3797     const XMMRegister msgtmp3 = xmm6;
3798     const XMMRegister msgtmp4 = xmm7;
3799 
3800     const XMMRegister shuf_mask = xmm8;
3801 
3802     __ enter();
3803 #ifdef _WIN64
3804     // save the xmm registers which must be preserved 6-7
3805     __ subptr(rsp, 6 * wordSize);
3806     __ movdqu(Address(rsp, 0), xmm6);
3807     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
3808     __ movdqu(Address(rsp, 4 * wordSize), xmm8);









3809 #endif
3810 
3811     __ subptr(rsp, 4 * wordSize);
3812 

3813     __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
3814       buf, state, ofs, limit, rsp, multi_block, shuf_mask);
3815 



3816     __ addptr(rsp, 4 * wordSize);
3817 #ifdef _WIN64
3818     // restore xmm regs belonging to calling function








3819     __ movdqu(xmm6, Address(rsp, 0));
3820     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
3821     __ movdqu(xmm8, Address(rsp, 4 * wordSize));
3822     __ addptr(rsp, 6 * wordSize);
3823 #endif
3824     __ leave();
3825     __ ret(0);
3826     return start;
3827   }
3828 
3829   // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
3830   // to hide instruction latency
3831   //
3832   // Arguments:
3833   //
3834   // Inputs:
3835   //   c_rarg0   - source byte array address
3836   //   c_rarg1   - destination byte array address
3837   //   c_rarg2   - K (key) in little endian int array
3838   //   c_rarg3   - counter vector byte array address


5201     if (UseAESIntrinsics) {
5202       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // needed by the others
5203       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
5204       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
5205       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
5206       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
5207     }
5208     if (UseAESCTRIntrinsics){
5209       StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
5210       StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
5211     }
5212 
5213     if (UseSHA1Intrinsics) {
5214       StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
5215       StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
5216       StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
5217       StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
5218     }
5219     if (UseSHA256Intrinsics) {
5220       StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;







5221       StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
5222       StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
5223       StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
5224     }
5225 
5226     // Generate GHASH intrinsics code
5227     if (UseGHASHIntrinsics) {
5228       StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
5229       StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
5230       StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
5231     }
5232 
5233     // Safefetch stubs.
5234     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
5235                                                        &StubRoutines::_safefetch32_fault_pc,
5236                                                        &StubRoutines::_safefetch32_continuation_pc);
5237     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
5238                                                        &StubRoutines::_safefetchN_fault_pc,
5239                                                        &StubRoutines::_safefetchN_continuation_pc);
5240 #ifdef COMPILER2




3755 
3756     __ addptr(rsp, 4 * wordSize);
3757 #ifdef _WIN64
3758     // restore xmm regs belonging to calling function
3759     __ movdqu(xmm6, Address(rsp, 0));
3760     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
3761     __ addptr(rsp, 4 * wordSize);
3762 #endif
3763 
3764     __ leave();
3765     __ ret(0);
3766     return start;
3767   }
3768 
3769   address generate_pshuffle_byte_flip_mask() {
3770     __ align(64);
3771     StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
3772     address start = __ pc();
3773     __ emit_data64(0x0405060700010203, relocInfo::none);
3774     __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
3775 
3776     if (VM_Version::supports_avx2()) {
3777       __ emit_data64(0x0405060700010203, relocInfo::none); // second copy
3778       __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
3779       // _SHUF_00BA
3780       __ emit_data64(0x0b0a090803020100, relocInfo::none);
3781       __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
3782       __ emit_data64(0x0b0a090803020100, relocInfo::none);
3783       __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
3784       // _SHUF_DC00
3785       __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
3786       __ emit_data64(0x0b0a090803020100, relocInfo::none);
3787       __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
3788       __ emit_data64(0x0b0a090803020100, relocInfo::none);
3789     }
3790 
3791     return start;
3792   }
3793 
3794 // ofs and limit are use for multi-block byte array.
3795 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
3796   address generate_sha256_implCompress(bool multi_block, const char *name) {
3797     assert(VM_Version::supports_sha() || VM_Version::supports_avx2(), "");
3798     __ align(CodeEntryAlignment);
3799     StubCodeMark mark(this, "StubRoutines", name);
3800     address start = __ pc();
3801 
3802     Register buf = c_rarg0;
3803     Register state = c_rarg1;
3804     Register ofs = c_rarg2;
3805     Register limit = c_rarg3;
3806 
3807     const XMMRegister msg = xmm0;
3808     const XMMRegister state0 = xmm1;
3809     const XMMRegister state1 = xmm2;
3810     const XMMRegister msgtmp0 = xmm3;
3811 
3812     const XMMRegister msgtmp1 = xmm4;
3813     const XMMRegister msgtmp2 = xmm5;
3814     const XMMRegister msgtmp3 = xmm6;
3815     const XMMRegister msgtmp4 = xmm7;
3816 
3817     const XMMRegister shuf_mask = xmm8;
3818 
3819     __ enter();
3820 #ifdef _WIN64
3821     // save the xmm registers which must be preserved 6-7
3822     __ subptr(rsp, 6 * wordSize);
3823     __ movdqu(Address(rsp, 0), xmm6);
3824     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
3825     __ movdqu(Address(rsp, 4 * wordSize), xmm8);
3826 
3827     if (!VM_Version::supports_sha() && VM_Version::supports_avx2()) {
3828       __ subptr(rsp, 10 * wordSize);
3829       __ movdqu(Address(rsp, 0), xmm9);
3830       __ movdqu(Address(rsp, 2 * wordSize), xmm10);
3831       __ movdqu(Address(rsp, 4 * wordSize), xmm11);
3832       __ movdqu(Address(rsp, 6 * wordSize), xmm12);
3833       __ movdqu(Address(rsp, 8 * wordSize), xmm13);
3834     }
3835 #endif
3836 
3837     __ subptr(rsp, 4 * wordSize);
3838 
3839     if (VM_Version::supports_sha()) {
3840       __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
3841         buf, state, ofs, limit, rsp, multi_block, shuf_mask);
3842     } else if (VM_Version::supports_avx2()) {
3843       __ sha256_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
3844         buf, state, ofs, limit, rsp, multi_block, shuf_mask);
3845     }
3846     __ addptr(rsp, 4 * wordSize);
3847 #ifdef _WIN64
3848     // restore xmm regs belonging to calling function
3849     if (!VM_Version::supports_sha() && VM_Version::supports_avx2()) {
3850       __ movdqu(xmm9, Address(rsp, 0));
3851       __ movdqu(xmm10, Address(rsp, 2 * wordSize));
3852       __ movdqu(xmm11, Address(rsp, 4 * wordSize));
3853       __ movdqu(xmm12, Address(rsp, 6 * wordSize));
3854       __ movdqu(xmm13, Address(rsp, 8 * wordSize));
3855       __ addptr(rsp, 10 * wordSize);
3856     }
3857     __ movdqu(xmm6, Address(rsp, 0));
3858     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
3859     __ movdqu(xmm8, Address(rsp, 4 * wordSize));
3860     __ addptr(rsp, 6 * wordSize);
3861 #endif
3862     __ leave();
3863     __ ret(0);
3864     return start;
3865   }
3866 
3867   // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
3868   // to hide instruction latency
3869   //
3870   // Arguments:
3871   //
3872   // Inputs:
3873   //   c_rarg0   - source byte array address
3874   //   c_rarg1   - destination byte array address
3875   //   c_rarg2   - K (key) in little endian int array
3876   //   c_rarg3   - counter vector byte array address


5239     if (UseAESIntrinsics) {
5240       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // needed by the others
5241       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
5242       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
5243       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
5244       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
5245     }
5246     if (UseAESCTRIntrinsics){
5247       StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
5248       StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
5249     }
5250 
5251     if (UseSHA1Intrinsics) {
5252       StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
5253       StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
5254       StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
5255       StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
5256     }
5257     if (UseSHA256Intrinsics) {
5258       StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
5259       char* dst = (char*)StubRoutines::x86::_k256_W;
5260       char* src = (char*)StubRoutines::x86::_k256;
5261       for (int ii = 0; ii < 16; ++ii) {
5262         memcpy(dst + 32 * ii,      src + 16 * ii, 16);
5263         memcpy(dst + 32 * ii + 16, src + 16 * ii, 16);
5264       }
5265       StubRoutines::x86::_k256_W_adr = (address)StubRoutines::x86::_k256_W;
5266       StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
5267       StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
5268       StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
5269     }
5270 
5271     // Generate GHASH intrinsics code
5272     if (UseGHASHIntrinsics) {
5273       StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
5274       StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
5275       StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
5276     }
5277 
5278     // Safefetch stubs.
5279     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
5280                                                        &StubRoutines::_safefetch32_fault_pc,
5281                                                        &StubRoutines::_safefetch32_continuation_pc);
5282     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
5283                                                        &StubRoutines::_safefetchN_fault_pc,
5284                                                        &StubRoutines::_safefetchN_continuation_pc);
5285 #ifdef COMPILER2


< prev index next >