< prev index next >

src/cpu/x86/vm/stubGenerator_x86_64.cpp

Print this page


   1 /*
   2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


3646     __ aesdec(xmm_result, xmm_key11);
3647     load_key(xmm_temp, key, 0xc0);
3648     __ aesdec(xmm_result, xmm_temp);
3649     load_key(xmm_temp, key, 0xd0);
3650     __ aesdec(xmm_result, xmm_temp);
3651     load_key(xmm_temp, key, 0xe0);     // 256-bit key goes up to e0
3652     __ aesdec(xmm_result, xmm_temp);
3653     __ aesdeclast(xmm_result, xmm_key_last);          // xmm15 came from key+0
3654     __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
3655     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);  // store into the next 16 bytes of output
3656     // no need to store r to memory until we exit
3657     __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);  // set up next r vector with cipher input from this block
3658     __ addptr(pos, AESBlockSize);
3659     __ subptr(len_reg, AESBlockSize);
3660     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
3661     __ jmp(L_exit);
3662 
3663     return start;
3664   }
3665 









































































































































































3666   /**
3667    *  Arguments:
3668    *
3669    * Inputs:
3670    *   c_rarg0   - int crc
3671    *   c_rarg1   - byte* buf
3672    *   c_rarg2   - int length
3673    *
3674    * Ouput:
3675    *       rax   - int crc result
3676    */
3677   address generate_updateBytesCRC32() {
3678     assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
3679 
3680     __ align(CodeEntryAlignment);
3681     StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
3682 
3683     address start = __ pc();
3684     // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3685     // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)


3982     StubRoutines::x86::_float_sign_flip  = generate_fp_mask("float_sign_flip",  0x8000000080000000);
3983     StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
3984     StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
3985 
3986     // support for verify_oop (must happen after universe_init)
3987     StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3988 
3989     // arraycopy stubs used by compilers
3990     generate_arraycopy_stubs();
3991 
3992     generate_math_stubs();
3993 
3994     // don't bother generating these AES intrinsic stubs unless global flag is set
3995     if (UseAESIntrinsics) {
3996       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // needed by the others
3997 
3998       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3999       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
4000       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
4001       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();







4002     }
4003 
4004     // Safefetch stubs.
4005     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
4006                                                        &StubRoutines::_safefetch32_fault_pc,
4007                                                        &StubRoutines::_safefetch32_continuation_pc);
4008     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
4009                                                        &StubRoutines::_safefetchN_fault_pc,
4010                                                        &StubRoutines::_safefetchN_continuation_pc);
4011 #ifdef COMPILER2
4012     if (UseMultiplyToLenIntrinsic) {
4013       StubRoutines::_multiplyToLen = generate_multiplyToLen();
4014     }
4015 #endif
4016   }
4017 
4018  public:
4019   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
4020     if (all) {
4021       generate_all();
   1 /*
   2  * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


3646     __ aesdec(xmm_result, xmm_key11);
3647     load_key(xmm_temp, key, 0xc0);
3648     __ aesdec(xmm_result, xmm_temp);
3649     load_key(xmm_temp, key, 0xd0);
3650     __ aesdec(xmm_result, xmm_temp);
3651     load_key(xmm_temp, key, 0xe0);     // 256-bit key goes up to e0
3652     __ aesdec(xmm_result, xmm_temp);
3653     __ aesdeclast(xmm_result, xmm_key_last);          // xmm15 came from key+0
3654     __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
3655     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);  // store into the next 16 bytes of output
3656     // no need to store r to memory until we exit
3657     __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);  // set up next r vector with cipher input from this block
3658     __ addptr(pos, AESBlockSize);
3659     __ subptr(len_reg, AESBlockSize);
3660     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
3661     __ jmp(L_exit);
3662 
3663     return start;
3664   }
3665 
3666 
3667   // byte swap x86 long
3668   address generate_ghash_long_swap_mask() {
3669     __ align(CodeEntryAlignment);
3670     StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
3671     address start = __ pc();
3672     __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
3673     __ emit_data64(0x0706050403020100, relocInfo::none );
3674   return start;
3675   }
3676 
3677   // byte swap x86 byte array
3678   address generate_ghash_byte_swap_mask() {
3679     __ align(CodeEntryAlignment);
3680     StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
3681     address start = __ pc();
3682     __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
3683     __ emit_data64(0x0001020304050607, relocInfo::none );
3684   return start;
3685   }
3686 
3687   /* Single and multi-block ghash operations */
3688   address generate_ghash_processBlocks() {
3689     __ align(CodeEntryAlignment);
3690     Label L_ghash_loop, L_exit;
3691     StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
3692     address start = __ pc();
3693 
3694     const Register state        = c_rarg0;
3695     const Register subkeyH      = c_rarg1;
3696     const Register data         = c_rarg2;
3697     const Register blocks       = c_rarg3;
3698 
3699 #ifdef _WIN64
3700     const int XMM_REG_LAST  = 10;
3701 #endif
3702 
3703     const XMMRegister xmm_temp0 = xmm0;
3704     const XMMRegister xmm_temp1 = xmm1;
3705     const XMMRegister xmm_temp2 = xmm2;
3706     const XMMRegister xmm_temp3 = xmm3;
3707     const XMMRegister xmm_temp4 = xmm4;
3708     const XMMRegister xmm_temp5 = xmm5;
3709     const XMMRegister xmm_temp6 = xmm6;
3710     const XMMRegister xmm_temp7 = xmm7;
3711     const XMMRegister xmm_temp8 = xmm8;
3712     const XMMRegister xmm_temp9 = xmm9;
3713     const XMMRegister xmm_temp10 = xmm10;
3714 
3715     __ enter();
3716 
3717 #ifdef _WIN64
3718     // save the xmm registers which must be preserved 6-10
3719     __ subptr(rsp, -rsp_after_call_off * wordSize);
3720     for (int i = 6; i <= XMM_REG_LAST; i++) {
3721       __ movdqu(xmm_save(i), as_XMMRegister(i));
3722     }
3723 #endif
3724 
3725     __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
3726 
3727     __ movdqu(xmm_temp0, Address(state, 0));
3728     __ pshufb(xmm_temp0, xmm_temp10);
3729 
3730 
3731     __ BIND(L_ghash_loop);
3732     __ movdqu(xmm_temp2, Address(data, 0));
3733     __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
3734 
3735     __ movdqu(xmm_temp1, Address(subkeyH, 0));
3736     __ pshufb(xmm_temp1, xmm_temp10);
3737 
3738     __ pxor(xmm_temp0, xmm_temp2); 
3739 
3740     //
3741     // Multiply with the hash key
3742     //
3743     __ movdqu(xmm_temp3, xmm_temp0);
3744     __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
3745     __ movdqu(xmm_temp4, xmm_temp0);
3746     __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
3747 
3748     __ movdqu(xmm_temp5, xmm_temp0);
3749     __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
3750     __ movdqu(xmm_temp6, xmm_temp0);
3751     __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
3752 
3753     __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
3754 
3755     __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
3756     __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
3757     __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
3758     __ pxor(xmm_temp3, xmm_temp5);
3759     __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
3760                                 // of the carry-less multiplication of
3761                                 // xmm0 by xmm1.
3762 
3763     // We shift the result of the multiplication by one bit position
3764     // to the left to cope for the fact that the bits are reversed.
3765     __ movdqu(xmm_temp7, xmm_temp3);
3766     __ movdqu(xmm_temp8, xmm_temp6);
3767     __ pslld(xmm_temp3, 1);
3768     __ pslld(xmm_temp6, 1);
3769     __ psrld(xmm_temp7, 31);
3770     __ psrld(xmm_temp8, 31);
3771     __ movdqu(xmm_temp9, xmm_temp7);
3772     __ pslldq(xmm_temp8, 4);
3773     __ pslldq(xmm_temp7, 4);
3774     __ psrldq(xmm_temp9, 12);
3775     __ por(xmm_temp3, xmm_temp7);
3776     __ por(xmm_temp6, xmm_temp8);
3777     __ por(xmm_temp6, xmm_temp9);
3778 
3779     //
3780     // First phase of the reduction
3781     //
3782     // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
3783     // independently.
3784     __ movdqu(xmm_temp7, xmm_temp3);
3785     __ movdqu(xmm_temp8, xmm_temp3);
3786     __ movdqu(xmm_temp9, xmm_temp3);
3787     __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
3788     __ pslld(xmm_temp8, 30);    // packed right shift shifting << 30
3789     __ pslld(xmm_temp9, 25);    // packed right shift shifting << 25
3790     __ pxor(xmm_temp7, xmm_temp8);      // xor the shifted versions
3791     __ pxor(xmm_temp7, xmm_temp9);
3792     __ movdqu(xmm_temp8, xmm_temp7);
3793     __ pslldq(xmm_temp7, 12);
3794     __ psrldq(xmm_temp8, 4);
3795     __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
3796 
3797     //
3798     // Second phase of the reduction
3799     //
3800     // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
3801     // shift operations.
3802     __ movdqu(xmm_temp2, xmm_temp3);
3803     __ movdqu(xmm_temp4, xmm_temp3);
3804     __ movdqu(xmm_temp5, xmm_temp3);
3805     __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
3806     __ psrld(xmm_temp4, 2);     // packed left shifting >> 2
3807     __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
3808     __ pxor(xmm_temp2, xmm_temp4);      // xor the shifted versions
3809     __ pxor(xmm_temp2, xmm_temp5);
3810     __ pxor(xmm_temp2, xmm_temp8);
3811     __ pxor(xmm_temp3, xmm_temp2);
3812     __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
3813 
3814     __ decrement(blocks);
3815     __ jcc(Assembler::zero, L_exit);
3816     __ movdqu(xmm_temp0, xmm_temp6);
3817     __ addptr(data, 16);
3818     __ jmp(L_ghash_loop);
3819 
3820     __ BIND(L_exit);
3821     __ pshufb(xmm_temp6, xmm_temp10);           // Byte swap 16-byte result
3822     __ movdqu(Address(state, 0), xmm_temp6);   // store the result
3823 
3824 #ifdef _WIN64
3825     // restore xmm regs belonging to calling function
3826     for (int i = 6; i <= XMM_REG_LAST; i++) {
3827       __ movdqu(as_XMMRegister(i), xmm_save(i));
3828     }
3829 #endif
3830     __ leave();
3831     __ ret(0);
3832     return start;
3833   }
3834 
3835   /**
3836    *  Arguments:
3837    *
3838    * Inputs:
3839    *   c_rarg0   - int crc
3840    *   c_rarg1   - byte* buf
3841    *   c_rarg2   - int length
3842    *
3843    * Ouput:
3844    *       rax   - int crc result
3845    */
3846   address generate_updateBytesCRC32() {
3847     assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
3848 
3849     __ align(CodeEntryAlignment);
3850     StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
3851 
3852     address start = __ pc();
3853     // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
3854     // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)


4151     StubRoutines::x86::_float_sign_flip  = generate_fp_mask("float_sign_flip",  0x8000000080000000);
4152     StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
4153     StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
4154 
4155     // support for verify_oop (must happen after universe_init)
4156     StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
4157 
4158     // arraycopy stubs used by compilers
4159     generate_arraycopy_stubs();
4160 
4161     generate_math_stubs();
4162 
4163     // don't bother generating these AES intrinsic stubs unless global flag is set
4164     if (UseAESIntrinsics) {
4165       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // needed by the others
4166 
4167       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
4168       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
4169       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
4170       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
4171     }
4172 
4173     // Generate GHASH intrinsics code
4174     if (UseGHASHIntrinsics) {
4175       StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
4176       StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
4177       StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
4178     }
4179 
4180     // Safefetch stubs.
4181     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
4182                                                        &StubRoutines::_safefetch32_fault_pc,
4183                                                        &StubRoutines::_safefetch32_continuation_pc);
4184     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
4185                                                        &StubRoutines::_safefetchN_fault_pc,
4186                                                        &StubRoutines::_safefetchN_continuation_pc);
4187 #ifdef COMPILER2
4188     if (UseMultiplyToLenIntrinsic) {
4189       StubRoutines::_multiplyToLen = generate_multiplyToLen();
4190     }
4191 #endif
4192   }
4193 
4194  public:
4195   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
4196     if (all) {
4197       generate_all();
< prev index next >