< prev index next >

src/cpu/x86/vm/stubGenerator_x86_32.cpp

Print this page


   1 /*
   2  * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


2701       __ aesdec(xmm_result, as_XMMRegister(rnum));
2702     }
2703     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) {   // 256-bit runs up to key offset e0
2704       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2705     }
2706     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
2707     __ aesdeclast(xmm_result, xmm_temp);
2708     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2709     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
2710     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2711     // no need to store r to memory until we exit
2712     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
2713     __ addptr(pos, AESBlockSize);
2714     __ subptr(len_reg, AESBlockSize);
2715     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
2716     __ jmp(L_exit);
2717 
2718     return start;
2719   }
2720 

































































































































































2721   /**
2722    *  Arguments:
2723    *
2724    * Inputs:
2725    *   rsp(4)   - int crc
2726    *   rsp(8)   - byte* buf
2727    *   rsp(12)  - int length
2728    *
2729    * Ouput:
2730    *       rax   - int crc result
2731    */
2732   address generate_updateBytesCRC32() {
2733     assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
2734 
2735     __ align(CodeEntryAlignment);
2736     StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
2737 
2738     address start = __ pc();
2739 
2740     const Register crc   = rdx;  // crc


2998 
2999     //------------------------------------------------------------------------------------------------------------------------
3000     // entry points that are platform specific
3001 
3002     // support for verify_oop (must happen after universe_init)
3003     StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
3004 
3005     // arraycopy stubs used by compilers
3006     generate_arraycopy_stubs();
3007 
3008     generate_math_stubs();
3009 
3010     // don't bother generating these AES intrinsic stubs unless global flag is set
3011     if (UseAESIntrinsics) {
3012       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // might be needed by the others
3013 
3014       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3015       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3016       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
3017       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();







3018     }
3019 
3020     // Safefetch stubs.
3021     generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3022                                                    &StubRoutines::_safefetch32_fault_pc,
3023                                                    &StubRoutines::_safefetch32_continuation_pc);
3024     StubRoutines::_safefetchN_entry           = StubRoutines::_safefetch32_entry;
3025     StubRoutines::_safefetchN_fault_pc        = StubRoutines::_safefetch32_fault_pc;
3026     StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
3027   }
3028 
3029 
3030  public:
3031   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3032     if (all) {
3033       generate_all();
3034     } else {
3035       generate_initial();
3036     }
3037   }
   1 /*
   2  * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


2701       __ aesdec(xmm_result, as_XMMRegister(rnum));
2702     }
2703     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) {   // 256-bit runs up to key offset e0
2704       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2705     }
2706     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
2707     __ aesdeclast(xmm_result, xmm_temp);
2708     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2709     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
2710     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2711     // no need to store r to memory until we exit
2712     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
2713     __ addptr(pos, AESBlockSize);
2714     __ subptr(len_reg, AESBlockSize);
2715     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
2716     __ jmp(L_exit);
2717 
2718     return start;
2719   }
2720 
2721   // byte swap x86 long
2722   address generate_ghash_long_swap_mask() {
2723     __ align(CodeEntryAlignment);
2724     StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
2725     address start = __ pc();
2726     __ emit_data(0x0b0a0908, relocInfo::none, 0);
2727     __ emit_data(0x0f0e0d0c, relocInfo::none, 0);
2728     __ emit_data(0x03020100, relocInfo::none, 0);
2729     __ emit_data(0x07060504, relocInfo::none, 0);
2730 
2731   return start;
2732   }
2733 
2734   // byte swap x86 byte array 
2735   address generate_ghash_byte_swap_mask() {
2736     __ align(CodeEntryAlignment);
2737     StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
2738     address start = __ pc();
2739     __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
2740     __ emit_data(0x08090a0b, relocInfo::none, 0);
2741     __ emit_data(0x04050607, relocInfo::none, 0);
2742     __ emit_data(0x00010203, relocInfo::none, 0);
2743   return start;
2744   }
2745 
2746   /* Single and multi-block ghash operations */
2747   address generate_ghash_processBlocks() {
2748     assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
2749     __ align(CodeEntryAlignment);
2750     Label L_ghash_loop, L_exit;
2751     StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
2752     address start = __ pc();
2753 
2754     const Register state        = rdi;
2755     const Register subkeyH      = rsi;
2756     const Register data         = rdx;
2757     const Register blocks       = rcx;
2758 
2759     const Address  state_param(rbp, 8+0);
2760     const Address  subkeyH_param(rbp, 8+4);
2761     const Address  data_param(rbp, 8+8);
2762     const Address  blocks_param(rbp, 8+12);
2763 
2764     const XMMRegister xmm_temp0 = xmm0;
2765     const XMMRegister xmm_temp1 = xmm1;
2766     const XMMRegister xmm_temp2 = xmm2;
2767     const XMMRegister xmm_temp3 = xmm3;
2768     const XMMRegister xmm_temp4 = xmm4;
2769     const XMMRegister xmm_temp5 = xmm5;
2770     const XMMRegister xmm_temp6 = xmm6;
2771     const XMMRegister xmm_temp7 = xmm7;
2772 
2773     __ enter();
2774 
2775     __ movptr(state, state_param);
2776     __ movptr(subkeyH, subkeyH_param);
2777     __ movptr(data, data_param);
2778     __ movptr(blocks, blocks_param);
2779 
2780     __ movdqu(xmm_temp0, Address(state, 0));
2781     __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2782   
2783     __ movdqu(xmm_temp1, Address(subkeyH, 0));
2784     __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2785 
2786     __ BIND(L_ghash_loop);
2787     __ movdqu(xmm_temp2, Address(data, 0));
2788     __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
2789 
2790     __ pxor(xmm_temp0, xmm_temp2); 
2791 
2792     //
2793     // Multiply with the hash key
2794     //
2795     __ movdqu(xmm_temp3, xmm_temp0);
2796     __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
2797     __ movdqu(xmm_temp4, xmm_temp0);
2798     __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
2799 
2800     __ movdqu(xmm_temp5, xmm_temp0);
2801     __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
2802     __ movdqu(xmm_temp6, xmm_temp0);
2803     __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
2804 
2805     __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
2806 
2807     __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
2808     __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
2809     __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
2810     __ pxor(xmm_temp3, xmm_temp5);
2811     __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
2812                                 // of the carry-less multiplication of
2813                                 // xmm0 by xmm1.
2814 
2815     // We shift the result of the multiplication by one bit position
2816     // to the left to cope for the fact that the bits are reversed.
2817     __ movdqu(xmm_temp7, xmm_temp3);
2818     __ movdqu(xmm_temp4, xmm_temp6);
2819     __ pslld (xmm_temp3, 1);
2820     __ pslld(xmm_temp6, 1);
2821     __ psrld(xmm_temp7, 31);
2822     __ psrld(xmm_temp4, 31);
2823     __ movdqu(xmm_temp5, xmm_temp7);
2824     __ pslldq(xmm_temp4, 4);
2825     __ pslldq(xmm_temp7, 4);
2826     __ psrldq(xmm_temp5, 12);
2827     __ por(xmm_temp3, xmm_temp7);
2828     __ por(xmm_temp6, xmm_temp4);
2829     __ por(xmm_temp6, xmm_temp5);
2830 
2831     //
2832     // First phase of the reduction
2833     //
2834     // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
2835     // independently.
2836     __ movdqu(xmm_temp7, xmm_temp3);
2837     __ movdqu(xmm_temp4, xmm_temp3);
2838     __ movdqu(xmm_temp5, xmm_temp3);
2839     __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
2840     __ pslld(xmm_temp4, 30);    // packed right shift shifting << 30
2841     __ pslld(xmm_temp5, 25);    // packed right shift shifting << 25
2842     __ pxor(xmm_temp7, xmm_temp4);      // xor the shifted versions
2843     __ pxor(xmm_temp7, xmm_temp5);
2844     __ movdqu(xmm_temp4, xmm_temp7);
2845     __ pslldq(xmm_temp7, 12);
2846     __ psrldq(xmm_temp4, 4);
2847     __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
2848 
2849     //
2850     // Second phase of the reduction
2851     //
2852     // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
2853     // shift operations.
2854     __ movdqu(xmm_temp2, xmm_temp3);
2855     __ movdqu(xmm_temp7, xmm_temp3);
2856     __ movdqu(xmm_temp5, xmm_temp3);
2857     __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
2858     __ psrld(xmm_temp7, 2);     // packed left shifting >> 2
2859     __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
2860     __ pxor(xmm_temp2, xmm_temp7);      // xor the shifted versions
2861     __ pxor(xmm_temp2, xmm_temp5);
2862     __ pxor(xmm_temp2, xmm_temp4);
2863     __ pxor(xmm_temp3, xmm_temp2);
2864     __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
2865 
2866     __ decrement(blocks);
2867     __ jcc(Assembler::zero, L_exit);
2868     __ movdqu(xmm_temp0, xmm_temp6);
2869     __ addptr(data, 16);
2870     __ jmp(L_ghash_loop);
2871 
2872     __ BIND(L_exit);
2873        // Byte swap 16-byte result
2874     __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2875     __ movdqu(Address(state, 0), xmm_temp6);   // store the result
2876 
2877     __ leave();
2878     __ ret(0);
2879     return start;
2880   }
2881 
2882   /**
2883    *  Arguments:
2884    *
2885    * Inputs:
2886    *   rsp(4)   - int crc
2887    *   rsp(8)   - byte* buf
2888    *   rsp(12)  - int length
2889    *
2890    * Ouput:
2891    *       rax   - int crc result
2892    */
2893   address generate_updateBytesCRC32() {
2894     assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
2895 
2896     __ align(CodeEntryAlignment);
2897     StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
2898 
2899     address start = __ pc();
2900 
2901     const Register crc   = rdx;  // crc


3159 
3160     //------------------------------------------------------------------------------------------------------------------------
3161     // entry points that are platform specific
3162 
3163     // support for verify_oop (must happen after universe_init)
3164     StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
3165 
3166     // arraycopy stubs used by compilers
3167     generate_arraycopy_stubs();
3168 
3169     generate_math_stubs();
3170 
3171     // don't bother generating these AES intrinsic stubs unless global flag is set
3172     if (UseAESIntrinsics) {
3173       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // might be needed by the others
3174 
3175       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3176       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3177       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
3178       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
3179     }
3180 
3181     // Generate GHASH intrinsics code
3182     if (UseGHASHIntrinsics) {
3183       StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
3184       StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
3185       StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
3186     }
3187 
3188     // Safefetch stubs.
3189     generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3190                                                    &StubRoutines::_safefetch32_fault_pc,
3191                                                    &StubRoutines::_safefetch32_continuation_pc);
3192     StubRoutines::_safefetchN_entry           = StubRoutines::_safefetch32_entry;
3193     StubRoutines::_safefetchN_fault_pc        = StubRoutines::_safefetch32_fault_pc;
3194     StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
3195   }
3196 
3197 
3198  public:
3199   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3200     if (all) {
3201       generate_all();
3202     } else {
3203       generate_initial();
3204     }
3205   }
< prev index next >