< prev index next >

src/cpu/x86/vm/stubGenerator_x86_32.cpp

Print this page




2710       __ aesdec(xmm_result, as_XMMRegister(rnum));
2711     }
2712     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) {   // 256-bit runs up to key offset e0
2713       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2714     }
2715     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
2716     __ aesdeclast(xmm_result, xmm_temp);
2717     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2718     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
2719     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2720     // no need to store r to memory until we exit
2721     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
2722     __ addptr(pos, AESBlockSize);
2723     __ subptr(len_reg, AESBlockSize);
2724     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
2725     __ jmp(L_exit);
2726 
2727     return start;
2728   }
2729 

































































































































































2730   /**
2731    *  Arguments:
2732    *
2733    * Inputs:
2734    *   rsp(4)   - int crc
2735    *   rsp(8)   - byte* buf
2736    *   rsp(12)  - int length
2737    *
2738    * Ouput:
2739    *       rax   - int crc result
2740    */
2741   address generate_updateBytesCRC32() {
2742     assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
2743 
2744     __ align(CodeEntryAlignment);
2745     StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
2746 
2747     address start = __ pc();
2748 
2749     const Register crc   = rdx;  // crc


3007 
3008     //------------------------------------------------------------------------------------------------------------------------
3009     // entry points that are platform specific
3010 
3011     // support for verify_oop (must happen after universe_init)
3012     StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
3013 
3014     // arraycopy stubs used by compilers
3015     generate_arraycopy_stubs();
3016 
3017     generate_math_stubs();
3018 
3019     // don't bother generating these AES intrinsic stubs unless global flag is set
3020     if (UseAESIntrinsics) {
3021       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // might be needed by the others
3022 
3023       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3024       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3025       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
3026       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();







3027     }
3028 
3029     // Safefetch stubs.
3030     generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3031                                                    &StubRoutines::_safefetch32_fault_pc,
3032                                                    &StubRoutines::_safefetch32_continuation_pc);
3033     StubRoutines::_safefetchN_entry           = StubRoutines::_safefetch32_entry;
3034     StubRoutines::_safefetchN_fault_pc        = StubRoutines::_safefetch32_fault_pc;
3035     StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
3036   }
3037 
3038 
3039  public:
3040   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3041     if (all) {
3042       generate_all();
3043     } else {
3044       generate_initial();
3045     }
3046   }


2710       __ aesdec(xmm_result, as_XMMRegister(rnum));
2711     }
2712     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) {   // 256-bit runs up to key offset e0
2713       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2714     }
2715     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
2716     __ aesdeclast(xmm_result, xmm_temp);
2717     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2718     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
2719     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2720     // no need to store r to memory until we exit
2721     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
2722     __ addptr(pos, AESBlockSize);
2723     __ subptr(len_reg, AESBlockSize);
2724     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
2725     __ jmp(L_exit);
2726 
2727     return start;
2728   }
2729 
2730   // byte swap x86 long
2731   address generate_ghash_long_swap_mask() {
2732     __ align(CodeEntryAlignment);
2733     StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
2734     address start = __ pc();
2735     __ emit_data(0x0b0a0908, relocInfo::none, 0);
2736     __ emit_data(0x0f0e0d0c, relocInfo::none, 0);
2737     __ emit_data(0x03020100, relocInfo::none, 0);
2738     __ emit_data(0x07060504, relocInfo::none, 0);
2739 
2740   return start;
2741   }
2742 
2743   // byte swap x86 byte array 
2744   address generate_ghash_byte_swap_mask() {
2745     __ align(CodeEntryAlignment);
2746     StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
2747     address start = __ pc();
2748     __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
2749     __ emit_data(0x08090a0b, relocInfo::none, 0);
2750     __ emit_data(0x04050607, relocInfo::none, 0);
2751     __ emit_data(0x00010203, relocInfo::none, 0);
2752   return start;
2753   }
2754 
2755   /* Single and multi-block ghash operations */
2756   address generate_ghash_processBlocks() {
2757     assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
2758     __ align(CodeEntryAlignment);
2759     Label L_ghash_loop, L_exit;
2760     StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
2761     address start = __ pc();
2762 
2763     const Register state        = rdi;
2764     const Register subkeyH      = rsi;
2765     const Register data         = rdx;
2766     const Register blocks       = rcx;
2767 
2768     const Address  state_param(rbp, 8+0);
2769     const Address  subkeyH_param(rbp, 8+4);
2770     const Address  data_param(rbp, 8+8);
2771     const Address  blocks_param(rbp, 8+12);
2772 
2773     const XMMRegister xmm_temp0 = xmm0;
2774     const XMMRegister xmm_temp1 = xmm1;
2775     const XMMRegister xmm_temp2 = xmm2;
2776     const XMMRegister xmm_temp3 = xmm3;
2777     const XMMRegister xmm_temp4 = xmm4;
2778     const XMMRegister xmm_temp5 = xmm5;
2779     const XMMRegister xmm_temp6 = xmm6;
2780     const XMMRegister xmm_temp7 = xmm7;
2781 
2782     __ enter();
2783 
2784     __ movptr(state, state_param);
2785     __ movptr(subkeyH, subkeyH_param);
2786     __ movptr(data, data_param);
2787     __ movptr(blocks, blocks_param);
2788 
2789     __ movdqu(xmm_temp0, Address(state, 0));
2790     __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2791   
2792     __ movdqu(xmm_temp1, Address(subkeyH, 0));
2793     __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2794 
2795     __ BIND(L_ghash_loop);
2796     __ movdqu(xmm_temp2, Address(data, 0));
2797     __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
2798 
2799     __ pxor(xmm_temp0, xmm_temp2); 
2800 
2801     //
2802     // Multiply with the hash key
2803     //
2804     __ movdqu(xmm_temp3, xmm_temp0);
2805     __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
2806     __ movdqu(xmm_temp4, xmm_temp0);
2807     __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
2808 
2809     __ movdqu(xmm_temp5, xmm_temp0);
2810     __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
2811     __ movdqu(xmm_temp6, xmm_temp0);
2812     __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
2813 
2814     __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
2815 
2816     __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
2817     __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
2818     __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
2819     __ pxor(xmm_temp3, xmm_temp5);
2820     __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
2821                                 // of the carry-less multiplication of
2822                                 // xmm0 by xmm1.
2823 
2824     // We shift the result of the multiplication by one bit position
2825     // to the left to cope for the fact that the bits are reversed.
2826     __ movdqu(xmm_temp7, xmm_temp3);
2827     __ movdqu(xmm_temp4, xmm_temp6);
2828     __ pslld (xmm_temp3, 1);
2829     __ pslld(xmm_temp6, 1);
2830     __ psrld(xmm_temp7, 31);
2831     __ psrld(xmm_temp4, 31);
2832     __ movdqu(xmm_temp5, xmm_temp7);
2833     __ pslldq(xmm_temp4, 4);
2834     __ pslldq(xmm_temp7, 4);
2835     __ psrldq(xmm_temp5, 12);
2836     __ por(xmm_temp3, xmm_temp7);
2837     __ por(xmm_temp6, xmm_temp4);
2838     __ por(xmm_temp6, xmm_temp5);
2839 
2840     //
2841     // First phase of the reduction
2842     //
2843     // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
2844     // independently.
2845     __ movdqu(xmm_temp7, xmm_temp3);
2846     __ movdqu(xmm_temp4, xmm_temp3);
2847     __ movdqu(xmm_temp5, xmm_temp3);
2848     __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
2849     __ pslld(xmm_temp4, 30);    // packed right shift shifting << 30
2850     __ pslld(xmm_temp5, 25);    // packed right shift shifting << 25
2851     __ pxor(xmm_temp7, xmm_temp4);      // xor the shifted versions
2852     __ pxor(xmm_temp7, xmm_temp5);
2853     __ movdqu(xmm_temp4, xmm_temp7);
2854     __ pslldq(xmm_temp7, 12);
2855     __ psrldq(xmm_temp4, 4);
2856     __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
2857 
2858     //
2859     // Second phase of the reduction
2860     //
2861     // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
2862     // shift operations.
2863     __ movdqu(xmm_temp2, xmm_temp3);
2864     __ movdqu(xmm_temp7, xmm_temp3);
2865     __ movdqu(xmm_temp5, xmm_temp3);
2866     __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
2867     __ psrld(xmm_temp7, 2);     // packed left shifting >> 2
2868     __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
2869     __ pxor(xmm_temp2, xmm_temp7);      // xor the shifted versions
2870     __ pxor(xmm_temp2, xmm_temp5);
2871     __ pxor(xmm_temp2, xmm_temp4);
2872     __ pxor(xmm_temp3, xmm_temp2);
2873     __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
2874 
2875     __ decrement(blocks);
2876     __ jcc(Assembler::zero, L_exit);
2877     __ movdqu(xmm_temp0, xmm_temp6);
2878     __ addptr(data, 16);
2879     __ jmp(L_ghash_loop);
2880 
2881     __ BIND(L_exit);
2882        // Byte swap 16-byte result
2883     __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
2884     __ movdqu(Address(state, 0), xmm_temp6);   // store the result
2885 
2886     __ leave();
2887     __ ret(0);
2888     return start;
2889   }
2890 
2891   /**
2892    *  Arguments:
2893    *
2894    * Inputs:
2895    *   rsp(4)   - int crc
2896    *   rsp(8)   - byte* buf
2897    *   rsp(12)  - int length
2898    *
2899    * Ouput:
2900    *       rax   - int crc result
2901    */
2902   address generate_updateBytesCRC32() {
2903     assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
2904 
2905     __ align(CodeEntryAlignment);
2906     StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
2907 
2908     address start = __ pc();
2909 
2910     const Register crc   = rdx;  // crc


3168 
3169     //------------------------------------------------------------------------------------------------------------------------
3170     // entry points that are platform specific
3171 
3172     // support for verify_oop (must happen after universe_init)
3173     StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
3174 
3175     // arraycopy stubs used by compilers
3176     generate_arraycopy_stubs();
3177 
3178     generate_math_stubs();
3179 
3180     // don't bother generating these AES intrinsic stubs unless global flag is set
3181     if (UseAESIntrinsics) {
3182       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // might be needed by the others
3183 
3184       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3185       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3186       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
3187       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
3188     }
3189 
3190     // Generate GHASH intrinsics code
3191     if (UseGHASHIntrinsics) {
3192       StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
3193       StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
3194       StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
3195     }
3196 
3197     // Safefetch stubs.
3198     generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3199                                                    &StubRoutines::_safefetch32_fault_pc,
3200                                                    &StubRoutines::_safefetch32_continuation_pc);
3201     StubRoutines::_safefetchN_entry           = StubRoutines::_safefetch32_entry;
3202     StubRoutines::_safefetchN_fault_pc        = StubRoutines::_safefetch32_fault_pc;
3203     StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
3204   }
3205 
3206 
3207  public:
3208   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3209     if (all) {
3210       generate_all();
3211     } else {
3212       generate_initial();
3213     }
3214   }
< prev index next >