< prev index next >

src/cpu/x86/vm/stubGenerator_x86_64.cpp

Print this page




3678     __ aesdec(xmm_result, xmm_key11);
3679     load_key(xmm_temp, key, 0xc0);
3680     __ aesdec(xmm_result, xmm_temp);
3681     load_key(xmm_temp, key, 0xd0);
3682     __ aesdec(xmm_result, xmm_temp);
3683     load_key(xmm_temp, key, 0xe0);     // 256-bit key goes up to e0
3684     __ aesdec(xmm_result, xmm_temp);
3685     __ aesdeclast(xmm_result, xmm_key_last);          // xmm15 came from key+0
3686     __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
3687     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);  // store into the next 16 bytes of output
3688     // no need to store r to memory until we exit
3689     __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);  // set up next r vector with cipher input from this block
3690     __ addptr(pos, AESBlockSize);
3691     __ subptr(len_reg, AESBlockSize);
3692     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
3693     __ jmp(L_exit);
3694 
3695     return start;
3696   }
3697 































































































































3698   // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
3699   // to hide instruction latency
3700   //
3701   // Arguments:
3702   //
3703   // Inputs:
3704   //   c_rarg0   - source byte array address
3705   //   c_rarg1   - destination byte array address
3706   //   c_rarg2   - K (key) in little endian int array
3707   //   c_rarg3   - counter vector byte array address
3708   //   Linux
3709   //     c_rarg4   -          input length
3710   //     c_rarg5   -          saved encryptedCounter start
3711   //     rbp + 6 * wordSize - saved used length
3712   //   Windows
3713   //     rbp + 6 * wordSize - input length
3714   //     rbp + 7 * wordSize - saved encryptedCounter start
3715   //     rbp + 8 * wordSize - saved used length
3716   //
3717   // Output:


4955 
4956     // support for verify_oop (must happen after universe_init)
4957     StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
4958 
4959     // arraycopy stubs used by compilers
4960     generate_arraycopy_stubs();
4961 
4962     generate_math_stubs();
4963 
4964     // don't bother generating these AES intrinsic stubs unless global flag is set
4965     if (UseAESIntrinsics) {
4966       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // needed by the others
4967       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
4968       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
4969       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
4970       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
4971     }
4972     if (UseAESCTRIntrinsics){
4973       StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
4974       StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();













4975     }
4976 
4977     // Generate GHASH intrinsics code
4978     if (UseGHASHIntrinsics) {
4979       StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
4980       StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
4981       StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
4982     }
4983 
4984     // Safefetch stubs.
4985     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
4986                                                        &StubRoutines::_safefetch32_fault_pc,
4987                                                        &StubRoutines::_safefetch32_continuation_pc);
4988     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
4989                                                        &StubRoutines::_safefetchN_fault_pc,
4990                                                        &StubRoutines::_safefetchN_continuation_pc);
4991 #ifdef COMPILER2
4992     if (UseMultiplyToLenIntrinsic) {
4993       StubRoutines::_multiplyToLen = generate_multiplyToLen();
4994     }




3678     __ aesdec(xmm_result, xmm_key11);
3679     load_key(xmm_temp, key, 0xc0);
3680     __ aesdec(xmm_result, xmm_temp);
3681     load_key(xmm_temp, key, 0xd0);
3682     __ aesdec(xmm_result, xmm_temp);
3683     load_key(xmm_temp, key, 0xe0);     // 256-bit key goes up to e0
3684     __ aesdec(xmm_result, xmm_temp);
3685     __ aesdeclast(xmm_result, xmm_key_last);          // xmm15 came from key+0
3686     __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
3687     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);  // store into the next 16 bytes of output
3688     // no need to store r to memory until we exit
3689     __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);  // set up next r vector with cipher input from this block
3690     __ addptr(pos, AESBlockSize);
3691     __ subptr(len_reg, AESBlockSize);
3692     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
3693     __ jmp(L_exit);
3694 
3695     return start;
3696   }
3697 
3698   address generate_upper_word_mask() {
3699     __ align(64);
3700     StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
3701     address start = __ pc();
3702     __ emit_data64(0x0000000000000000, relocInfo::none);
3703     __ emit_data64(0xFFFFFFFF00000000, relocInfo::none);
3704     return start;
3705   }
3706 
3707   address generate_shuffle_byte_flip_mask() {
3708     __ align(64);
3709     StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask");
3710     address start = __ pc();
3711     __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
3712     __ emit_data64(0x0001020304050607, relocInfo::none);
3713     return start;
3714   }
3715 
3716   // ofs and limit are use for multi-block byte array.
3717   // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
3718   address generate_sha1_implCompress(bool multi_block, const char *name) {
3719     __ align(CodeEntryAlignment);
3720     StubCodeMark mark(this, "StubRoutines", name);
3721     address start = __ pc();
3722 
3723     Register buf = c_rarg0;
3724     Register state = c_rarg1;
3725     Register ofs = c_rarg2;
3726     Register limit = c_rarg3;
3727 
3728     const XMMRegister abcd = xmm0;
3729     const XMMRegister e0 = xmm1;
3730     const XMMRegister e1 = xmm2;
3731     const XMMRegister msg0 = xmm3;
3732 
3733     const XMMRegister msg1 = xmm4;
3734     const XMMRegister msg2 = xmm5;
3735     const XMMRegister msg3 = xmm6;
3736     const XMMRegister shuf_mask = xmm7;
3737 
3738     __ enter();
3739 
3740 #ifdef _WIN64
3741     // save the xmm registers which must be preserved 6-7
3742     __ subptr(rsp, 4 * wordSize);
3743     __ movdqu(Address(rsp, 0), xmm6);
3744     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
3745 #endif
3746 
3747     __ subptr(rsp, 4 * wordSize);
3748 
3749     __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
3750       buf, state, ofs, limit, rsp, multi_block);
3751 
3752     __ addptr(rsp, 4 * wordSize);
3753 #ifdef _WIN64
3754     // restore xmm regs belonging to calling function
3755     __ movdqu(xmm6, Address(rsp, 0));
3756     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
3757     __ addptr(rsp, 4 * wordSize);
3758 #endif
3759 
3760     __ leave();
3761     __ ret(0);
3762     return start;
3763   }
3764 
3765   address generate_pshuffle_byte_flip_mask() {
3766     __ align(64);
3767     StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
3768     address start = __ pc();
3769     __ emit_data64(0x0405060700010203, relocInfo::none);
3770     __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
3771     return start;
3772   }
3773 
3774 // ofs and limit are use for multi-block byte array.
3775 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
3776   address generate_sha256_implCompress(bool multi_block, const char *name) {
3777     __ align(CodeEntryAlignment);
3778     StubCodeMark mark(this, "StubRoutines", name);
3779     address start = __ pc();
3780 
3781     Register buf = c_rarg0;
3782     Register state = c_rarg1;
3783     Register ofs = c_rarg2;
3784     Register limit = c_rarg3;
3785 
3786     const XMMRegister msg = xmm0;
3787     const XMMRegister state0 = xmm1;
3788     const XMMRegister state1 = xmm2;
3789     const XMMRegister msgtmp0 = xmm3;
3790 
3791     const XMMRegister msgtmp1 = xmm4;
3792     const XMMRegister msgtmp2 = xmm5;
3793     const XMMRegister msgtmp3 = xmm6;
3794     const XMMRegister msgtmp4 = xmm7;
3795 
3796     const XMMRegister shuf_mask = xmm8;
3797 
3798     __ enter();
3799 #ifdef _WIN64
3800     // save the xmm registers which must be preserved 6-7
3801     __ subptr(rsp, 6 * wordSize);
3802     __ movdqu(Address(rsp, 0), xmm6);
3803     __ movdqu(Address(rsp, 2 * wordSize), xmm7);
3804     __ movdqu(Address(rsp, 4 * wordSize), xmm8);
3805 #endif
3806 
3807     __ subptr(rsp, 4 * wordSize);
3808 
3809     __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
3810       buf, state, ofs, limit, rsp, multi_block, shuf_mask);
3811 
3812     __ addptr(rsp, 4 * wordSize);
3813 #ifdef _WIN64
3814     // restore xmm regs belonging to calling function
3815     __ movdqu(xmm6, Address(rsp, 0));
3816     __ movdqu(xmm7, Address(rsp, 2 * wordSize));
3817     __ movdqu(xmm8, Address(rsp, 4 * wordSize));
3818     __ addptr(rsp, 6 * wordSize);
3819 #endif
3820     __ leave();
3821     __ ret(0);
3822     return start;
3823   }
3824 
3825   // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
3826   // to hide instruction latency
3827   //
3828   // Arguments:
3829   //
3830   // Inputs:
3831   //   c_rarg0   - source byte array address
3832   //   c_rarg1   - destination byte array address
3833   //   c_rarg2   - K (key) in little endian int array
3834   //   c_rarg3   - counter vector byte array address
3835   //   Linux
3836   //     c_rarg4   -          input length
3837   //     c_rarg5   -          saved encryptedCounter start
3838   //     rbp + 6 * wordSize - saved used length
3839   //   Windows
3840   //     rbp + 6 * wordSize - input length
3841   //     rbp + 7 * wordSize - saved encryptedCounter start
3842   //     rbp + 8 * wordSize - saved used length
3843   //
3844   // Output:


5082 
5083     // support for verify_oop (must happen after universe_init)
5084     StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
5085 
5086     // arraycopy stubs used by compilers
5087     generate_arraycopy_stubs();
5088 
5089     generate_math_stubs();
5090 
5091     // don't bother generating these AES intrinsic stubs unless global flag is set
5092     if (UseAESIntrinsics) {
5093       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // needed by the others
5094       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
5095       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
5096       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
5097       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
5098     }
5099     if (UseAESCTRIntrinsics){
5100       StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
5101       StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
5102     }
5103 
5104     if (UseSHA1Intrinsics) {
5105       StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
5106       StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
5107       StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
5108       StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
5109     }
5110     if (UseSHA256Intrinsics) {
5111       StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
5112       StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
5113       StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
5114       StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
5115     }
5116 
5117     // Generate GHASH intrinsics code
5118     if (UseGHASHIntrinsics) {
5119       StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
5120       StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
5121       StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
5122     }
5123 
5124     // Safefetch stubs.
5125     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
5126                                                        &StubRoutines::_safefetch32_fault_pc,
5127                                                        &StubRoutines::_safefetch32_continuation_pc);
5128     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
5129                                                        &StubRoutines::_safefetchN_fault_pc,
5130                                                        &StubRoutines::_safefetchN_continuation_pc);
5131 #ifdef COMPILER2
5132     if (UseMultiplyToLenIntrinsic) {
5133       StubRoutines::_multiplyToLen = generate_multiplyToLen();
5134     }


< prev index next >