< prev index next >

src/cpu/x86/vm/stubGenerator_x86_32.cpp

Print this page


   1 /*
   2  * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


2701       __ aesdec(xmm_result, as_XMMRegister(rnum));
2702     }
2703     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) {   // 256-bit runs up to key offset e0
2704       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2705     }
2706     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
2707     __ aesdeclast(xmm_result, xmm_temp);
2708     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2709     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
2710     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2711     // no need to store r to memory until we exit
2712     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
2713     __ addptr(pos, AESBlockSize);
2714     __ subptr(len_reg, AESBlockSize);
2715     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
2716     __ jmp(L_exit);
2717 
2718     return start;
2719   }
2720 


















































































































































































2721   /**
2722    *  Arguments:
2723    *
2724    * Inputs:
2725    *   rsp(4)   - int crc
2726    *   rsp(8)   - byte* buf
2727    *   rsp(12)  - int length
2728    *
2729    * Ouput:
2730    *       rax   - int crc result
2731    */
2732   address generate_updateBytesCRC32() {
2733     assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
2734 
2735     __ align(CodeEntryAlignment);
2736     StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
2737 
2738     address start = __ pc();
2739 
2740     const Register crc   = rdx;  // crc


2998 
2999     //------------------------------------------------------------------------------------------------------------------------
3000     // entry points that are platform specific
3001 
3002     // support for verify_oop (must happen after universe_init)
3003     StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
3004 
3005     // arraycopy stubs used by compilers
3006     generate_arraycopy_stubs();
3007 
3008     generate_math_stubs();
3009 
3010     // don't bother generating these AES intrinsic stubs unless global flag is set
3011     if (UseAESIntrinsics) {
3012       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // might be needed by the others
3013 
3014       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3015       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3016       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
3017       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();







3018     }
3019 
3020     // Safefetch stubs.
3021     generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3022                                                    &StubRoutines::_safefetch32_fault_pc,
3023                                                    &StubRoutines::_safefetch32_continuation_pc);
3024     StubRoutines::_safefetchN_entry           = StubRoutines::_safefetch32_entry;
3025     StubRoutines::_safefetchN_fault_pc        = StubRoutines::_safefetch32_fault_pc;
3026     StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
3027   }
3028 
3029 
3030  public:
3031   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3032     if (all) {
3033       generate_all();
3034     } else {
3035       generate_initial();
3036     }
3037   }
   1 /*
   2  * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


2701       __ aesdec(xmm_result, as_XMMRegister(rnum));
2702     }
2703     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) {   // 256-bit runs up to key offset e0
2704       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2705     }
2706     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
2707     __ aesdeclast(xmm_result, xmm_temp);
2708     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2709     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
2710     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2711     // no need to store r to memory until we exit
2712     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
2713     __ addptr(pos, AESBlockSize);
2714     __ subptr(len_reg, AESBlockSize);
2715     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
2716     __ jmp(L_exit);
2717 
2718     return start;
2719   }
2720 
2721   // byte swap x86 long
2722   address generate_ghash_long_swap_mask() {
2723     __ align(CodeEntryAlignment);
2724     StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
2725     address start = __ pc();
2726     __ emit_data(0x0b0a0908, relocInfo::none, 0);
2727     __ emit_data(0x0f0e0d0c, relocInfo::none, 0);
2728     __ emit_data(0x03020100, relocInfo::none, 0);
2729     __ emit_data(0x07060504, relocInfo::none, 0);
2730 
2731   return start;
2732   }
2733 
2734   // byte swap x86 byte array 
2735   address generate_ghash_byte_swap_mask() {
2736     __ align(CodeEntryAlignment);
2737     StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
2738     address start = __ pc();
2739     __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
2740     __ emit_data(0x08090a0b, relocInfo::none, 0);
2741     __ emit_data(0x04050607, relocInfo::none, 0);
2742     __ emit_data(0x00010203, relocInfo::none, 0);
2743   return start;
2744   }
2745 
2746   /* Single and multi-block ghash operations */
2747   address generate_ghash_processBlocks() {
2748     assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
2749     __ align(CodeEntryAlignment);
2750     Label L_ghash_loop, L_exit;
2751     StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
2752     address start = __ pc();
2753 
2754     const Register state        = rdi;
2755     const Register subkeyH      = rsi;
2756     const Register data         = rdx;
2757     const Register blocks       = rcx;
2758 
2759     const Address  state_param(rbp, 8+0);
2760     const Address  subkeyH_param(rbp, 8+4);
2761     const Address  data_param(rbp, 8+8);
2762     const Address  blocks_param(rbp, 8+12);
2763 
2764 #ifdef _WIN64
2765     const int XMM_REG_LAST  = 7;
2766 #endif
2767 
2768     const XMMRegister xmm_temp0 = xmm0;
2769     const XMMRegister xmm_temp1 = xmm1;
2770     const XMMRegister xmm_temp2 = xmm2;
2771     const XMMRegister xmm_temp3 = xmm3;
2772     const XMMRegister xmm_temp4 = xmm4;
2773     const XMMRegister xmm_temp5 = xmm5;
2774     const XMMRegister xmm_temp6 = xmm6;
2775     const XMMRegister xmm_temp7 = xmm7;
2776 
2777     __ enter();
2778 
2779 #ifdef _WIN64
2780     // save the xmm registers which must be preserved 6-10
2781     __ subptr(rsp, -rsp_after_call_off * wordSize);
2782     for (int i = 6; i <= XMM_REG_LAST; i++) {
2783       __ movdqu(xmm_save(i), as_XMMRegister(i));
2784     }
2785 #endif
2786     __ movptr(state, state_param);
2787     __ movptr(subkeyH, subkeyH_param);
2788     __ movptr(data, data_param);
2789     __ movptr(blocks, blocks_param);
2790 
2791     __ movdqu(xmm_temp0, Address(state, 0));
2792     __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2793   
2794     __ movdqu(xmm_temp1, Address(subkeyH, 0));
2795     __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2796 
2797     __ BIND(L_ghash_loop);
2798     __ movdqu(xmm_temp2, Address(data, 0));
2799     __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
2800 
2801     __ pxor(xmm_temp0, xmm_temp2); 
2802 
2803     //
2804     // Multiply with the hash key
2805     //
2806     __ movdqu(xmm_temp3, xmm_temp0);
2807     __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
2808     __ movdqu(xmm_temp4, xmm_temp0);
2809     __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
2810 
2811     __ movdqu(xmm_temp5, xmm_temp0);
2812     __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
2813     __ movdqu(xmm_temp6, xmm_temp0);
2814     __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
2815 
2816     __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
2817 
2818     __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
2819     __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
2820     __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
2821     __ pxor(xmm_temp3, xmm_temp5);
2822     __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
2823                                 // of the carry-less multiplication of
2824                                 // xmm0 by xmm1.
2825 
2826     // We shift the result of the multiplication by one bit position
2827     // to the left to cope for the fact that the bits are reversed.
2828     __ movdqu(xmm_temp7, xmm_temp3);
2829     __ movdqu(xmm_temp4, xmm_temp6);
2830     __ pslld (xmm_temp3, 1);
2831     __ pslld(xmm_temp6, 1);
2832     __ psrld(xmm_temp7, 31);
2833     __ psrld(xmm_temp4, 31);
2834     __ movdqu(xmm_temp5, xmm_temp7);
2835     __ pslldq(xmm_temp4, 4);
2836     __ pslldq(xmm_temp7, 4);
2837     __ psrldq(xmm_temp5, 12);
2838     __ por(xmm_temp3, xmm_temp7);
2839     __ por(xmm_temp6, xmm_temp4);
2840     __ por(xmm_temp6, xmm_temp5);
2841 
2842     //
2843     // First phase of the reduction
2844     //
2845     // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
2846     // independently.
2847     __ movdqu(xmm_temp7, xmm_temp3);
2848     __ movdqu(xmm_temp4, xmm_temp3);
2849     __ movdqu(xmm_temp5, xmm_temp3);
2850     __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
2851     __ pslld(xmm_temp4, 30);    // packed right shift shifting << 30
2852     __ pslld(xmm_temp5, 25);    // packed right shift shifting << 25
2853     __ pxor(xmm_temp7, xmm_temp4);      // xor the shifted versions
2854     __ pxor(xmm_temp7, xmm_temp5);
2855     __ movdqu(xmm_temp4, xmm_temp7);
2856     __ pslldq(xmm_temp7, 12);
2857     __ psrldq(xmm_temp4, 4);
2858     __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
2859 
2860     //
2861     // Second phase of the reduction
2862     //
2863     // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
2864     // shift operations.
2865     __ movdqu(xmm_temp2, xmm_temp3);
2866     __ movdqu(xmm_temp7, xmm_temp3);
2867     __ movdqu(xmm_temp5, xmm_temp3);
2868     __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
2869     __ psrld(xmm_temp7, 2);     // packed left shifting >> 2
2870     __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
2871     __ pxor(xmm_temp2, xmm_temp7);      // xor the shifted versions
2872     __ pxor(xmm_temp2, xmm_temp5);
2873     __ pxor(xmm_temp2, xmm_temp4);
2874     __ pxor(xmm_temp3, xmm_temp2);
2875     __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
2876 
2877     __ decrement(blocks);
2878     __ jcc(Assembler::zero, L_exit);
2879     __ movdqu(xmm_temp0, xmm_temp6);
2880     __ addptr(data, 16);
2881     __ jmp(L_ghash_loop);
2882 
2883     __ BIND(L_exit);
2884        // Byte swap 16-byte result
2885     __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2886     __ movdqu(Address(state, 0), xmm_temp6);   // store the result
2887 
2888 #ifdef _WIN64
2889     // restore xmm regs belonging to calling function
2890     for (int i = 6; i <= XMM_REG_LAST; i++) {
2891       __ movdqu(as_XMMRegister(i), xmm_save(i));
2892     }
2893 #endif
2894     __ leave();
2895     __ ret(0);
2896     return start;
2897   }
2898 
2899   /**
2900    *  Arguments:
2901    *
2902    * Inputs:
2903    *   rsp(4)   - int crc
2904    *   rsp(8)   - byte* buf
2905    *   rsp(12)  - int length
2906    *
2907    * Ouput:
2908    *       rax   - int crc result
2909    */
2910   address generate_updateBytesCRC32() {
2911     assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
2912 
2913     __ align(CodeEntryAlignment);
2914     StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
2915 
2916     address start = __ pc();
2917 
2918     const Register crc   = rdx;  // crc


3176 
3177     //------------------------------------------------------------------------------------------------------------------------
3178     // entry points that are platform specific
3179 
3180     // support for verify_oop (must happen after universe_init)
3181     StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
3182 
3183     // arraycopy stubs used by compilers
3184     generate_arraycopy_stubs();
3185 
3186     generate_math_stubs();
3187 
3188     // don't bother generating these AES intrinsic stubs unless global flag is set
3189     if (UseAESIntrinsics) {
3190       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // might be needed by the others
3191 
3192       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3193       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3194       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
3195       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
3196     }
3197 
3198     // Generate GHASH intrinsics code
3199     if (UseGHASHIntrinsics) {
3200       StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
3201       StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
3202       StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
3203     }
3204 
3205     // Safefetch stubs.
3206     generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3207                                                    &StubRoutines::_safefetch32_fault_pc,
3208                                                    &StubRoutines::_safefetch32_continuation_pc);
3209     StubRoutines::_safefetchN_entry           = StubRoutines::_safefetch32_entry;
3210     StubRoutines::_safefetchN_fault_pc        = StubRoutines::_safefetch32_fault_pc;
3211     StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
3212   }
3213 
3214 
3215  public:
3216   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3217     if (all) {
3218       generate_all();
3219     } else {
3220       generate_initial();
3221     }
3222   }
< prev index next >