< prev index next >

src/cpu/x86/vm/stubGenerator_x86_32.cpp

Print this page

        

*** 1,7 **** /* ! * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. --- 1,7 ---- /* ! * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation.
*** 2716,2725 **** --- 2716,2886 ---- __ jmp(L_exit); return start; } + // byte swap x86 long + address generate_ghash_long_swap_mask() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); + address start = __ pc(); + __ emit_data(0x0b0a0908, relocInfo::none, 0); + __ emit_data(0x0f0e0d0c, relocInfo::none, 0); + __ emit_data(0x03020100, relocInfo::none, 0); + __ emit_data(0x07060504, relocInfo::none, 0); + + return start; + } + + // byte swap x86 byte array + address generate_ghash_byte_swap_mask() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); + address start = __ pc(); + __ emit_data(0x0c0d0e0f, relocInfo::none, 0); + __ emit_data(0x08090a0b, relocInfo::none, 0); + __ emit_data(0x04050607, relocInfo::none, 0); + __ emit_data(0x00010203, relocInfo::none, 0); + return start; + } + + /* Single and multi-block ghash operations */ + address generate_ghash_processBlocks() { + assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support"); + __ align(CodeEntryAlignment); + Label L_ghash_loop, L_exit; + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); + address start = __ pc(); + + const Register state = rdi; + const Register subkeyH = rsi; + const Register data = rdx; + const Register blocks = rcx; + + const Address state_param(rbp, 8+0); + const Address subkeyH_param(rbp, 8+4); + const Address data_param(rbp, 8+8); + const Address blocks_param(rbp, 8+12); + + const XMMRegister xmm_temp0 = xmm0; + const XMMRegister xmm_temp1 = xmm1; + const XMMRegister xmm_temp2 = xmm2; + const XMMRegister xmm_temp3 = xmm3; + const XMMRegister xmm_temp4 = xmm4; + const XMMRegister xmm_temp5 = xmm5; + const XMMRegister xmm_temp6 = xmm6; + const XMMRegister xmm_temp7 = xmm7; + + __ enter(); + + __ movptr(state, state_param); + __ movptr(subkeyH, subkeyH_param); + __ movptr(data, data_param); + __ movptr(blocks, blocks_param); + + __ movdqu(xmm_temp0, Address(state, 0)); + __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + + __ movdqu(xmm_temp1, Address(subkeyH, 0)); + __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + + __ BIND(L_ghash_loop); + __ movdqu(xmm_temp2, Address(data, 0)); + __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); + + __ pxor(xmm_temp0, xmm_temp2); + + // + // Multiply with the hash key + // + __ movdqu(xmm_temp3, xmm_temp0); + __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 + __ movdqu(xmm_temp4, xmm_temp0); + __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 + + __ movdqu(xmm_temp5, xmm_temp0); + __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 + __ movdqu(xmm_temp6, xmm_temp0); + __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 + + __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 + + __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 + __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right + __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left + __ pxor(xmm_temp3, xmm_temp5); + __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result + // of the carry-less multiplication of + // xmm0 by xmm1. + + // We shift the result of the multiplication by one bit position + // to the left to cope for the fact that the bits are reversed. + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp4, xmm_temp6); + __ pslld (xmm_temp3, 1); + __ pslld(xmm_temp6, 1); + __ psrld(xmm_temp7, 31); + __ psrld(xmm_temp4, 31); + __ movdqu(xmm_temp5, xmm_temp7); + __ pslldq(xmm_temp4, 4); + __ pslldq(xmm_temp7, 4); + __ psrldq(xmm_temp5, 12); + __ por(xmm_temp3, xmm_temp7); + __ por(xmm_temp6, xmm_temp4); + __ por(xmm_temp6, xmm_temp5); + + // + // First phase of the reduction + // + // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts + // independently. + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp4, xmm_temp3); + __ movdqu(xmm_temp5, xmm_temp3); + __ pslld(xmm_temp7, 31); // packed right shift shifting << 31 + __ pslld(xmm_temp4, 30); // packed right shift shifting << 30 + __ pslld(xmm_temp5, 25); // packed right shift shifting << 25 + __ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions + __ pxor(xmm_temp7, xmm_temp5); + __ movdqu(xmm_temp4, xmm_temp7); + __ pslldq(xmm_temp7, 12); + __ psrldq(xmm_temp4, 4); + __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete + + // + // Second phase of the reduction + // + // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these + // shift operations. + __ movdqu(xmm_temp2, xmm_temp3); + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp5, xmm_temp3); + __ psrld(xmm_temp2, 1); // packed left shifting >> 1 + __ psrld(xmm_temp7, 2); // packed left shifting >> 2 + __ psrld(xmm_temp5, 7); // packed left shifting >> 7 + __ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions + __ pxor(xmm_temp2, xmm_temp5); + __ pxor(xmm_temp2, xmm_temp4); + __ pxor(xmm_temp3, xmm_temp2); + __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 + + __ decrement(blocks); + __ jcc(Assembler::zero, L_exit); + __ movdqu(xmm_temp0, xmm_temp6); + __ addptr(data, 16); + __ jmp(L_ghash_loop); + + __ BIND(L_exit); + // Byte swap 16-byte result + __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + __ movdqu(Address(state, 0), xmm_temp6); // store the result + + __ leave(); + __ ret(0); + return start; + } + /** * Arguments: * * Inputs: * rsp(4) - int crc
*** 3015,3024 **** --- 3176,3192 ---- StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); } + // Generate GHASH intrinsics code + if (UseGHASHIntrinsics) { + StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); + StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + } + // Safefetch stubs. generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, &StubRoutines::_safefetch32_continuation_pc); StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry;
< prev index next >