src/cpu/x86/vm/stubGenerator_x86_32.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 8004835 Cdiff src/cpu/x86/vm/stubGenerator_x86_32.cpp

src/cpu/x86/vm/stubGenerator_x86_32.cpp

Print this page

        

*** 2172,2237 **** // c_rarg0 - source byte array address // c_rarg1 - destination byte array address // c_rarg2 - K (key) in little endian int array // address generate_aescrypt_encryptBlock() { ! assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); Label L_doLast; address start = __ pc(); ! const Register from = rsi; // source array address const Register to = rdx; // destination array address const Register key = rcx; // key array address const Register keylen = rax; const Address from_param(rbp, 8+0); const Address to_param (rbp, 8+4); const Address key_param (rbp, 8+8); const XMMRegister xmm_result = xmm0; ! const XMMRegister xmm_temp = xmm1; ! const XMMRegister xmm_key_shuf_mask = xmm2; __ enter(); // required for proper stackwalking of RuntimeStub frame ! __ push(rsi); ! __ movptr(from , from_param); ! __ movptr(to , to_param); ! __ movptr(key , key_param); __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - // keylen = # of 32-bit words, convert to 128-bit words - __ shrl(keylen, 2); - __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input // For encryption, the java expanded key ordering is just what we need ! load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); ! __ pxor(xmm_result, xmm_temp); ! for (int offset = 0x10; offset <= 0x90; offset += 0x10) { ! aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); ! } ! load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); ! __ cmpl(keylen, 0); ! __ jcc(Assembler::equal, L_doLast); ! __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys ! aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); ! load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); ! __ subl(keylen, 2); ! __ jcc(Assembler::equal, L_doLast); ! __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys ! aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); ! load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); __ BIND(L_doLast); ! __ aesenclast(xmm_result, xmm_temp); __ movdqu(Address(to, 0), xmm_result); // store the result __ xorptr(rax, rax); // return 0 - __ pop(rsi); __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); return start; } --- 2172,2264 ---- // c_rarg0 - source byte array address // c_rarg1 - destination byte array address // c_rarg2 - K (key) in little endian int array // address generate_aescrypt_encryptBlock() { ! assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); Label L_doLast; address start = __ pc(); ! const Register from = rdx; // source array address const Register to = rdx; // destination array address const Register key = rcx; // key array address const Register keylen = rax; const Address from_param(rbp, 8+0); const Address to_param (rbp, 8+4); const Address key_param (rbp, 8+8); const XMMRegister xmm_result = xmm0; ! const XMMRegister xmm_key_shuf_mask = xmm1; ! const XMMRegister xmm_temp1 = xmm2; ! const XMMRegister xmm_temp2 = xmm3; ! const XMMRegister xmm_temp3 = xmm4; ! const XMMRegister xmm_temp4 = xmm5; __ enter(); // required for proper stackwalking of RuntimeStub frame ! __ movptr(from, from_param); ! __ movptr(key, key_param); + // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input + __ movptr(to, to_param); // For encryption, the java expanded key ordering is just what we need ! load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); ! __ pxor(xmm_result, xmm_temp1); ! ! load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); ! load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); ! load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); ! load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); ! ! __ aesenc(xmm_result, xmm_temp1); ! __ aesenc(xmm_result, xmm_temp2); ! __ aesenc(xmm_result, xmm_temp3); ! __ aesenc(xmm_result, xmm_temp4); ! ! load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); ! load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); ! load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); ! load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); ! ! __ aesenc(xmm_result, xmm_temp1); ! __ aesenc(xmm_result, xmm_temp2); ! __ aesenc(xmm_result, xmm_temp3); ! __ aesenc(xmm_result, xmm_temp4); ! ! load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); ! load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); ! ! __ cmpl(keylen, 44); ! __ jccb(Assembler::equal, L_doLast); ! ! __ aesenc(xmm_result, xmm_temp1); ! __ aesenc(xmm_result, xmm_temp2); ! ! load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); ! load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); ! ! __ cmpl(keylen, 52); ! __ jccb(Assembler::equal, L_doLast); ! ! __ aesenc(xmm_result, xmm_temp1); ! __ aesenc(xmm_result, xmm_temp2); ! ! load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); ! load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); __ BIND(L_doLast); ! __ aesenc(xmm_result, xmm_temp1); ! __ aesenclast(xmm_result, xmm_temp2); __ movdqu(Address(to, 0), xmm_result); // store the result __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); return start; }
*** 2243,2312 **** // c_rarg0 - source byte array address // c_rarg1 - destination byte array address // c_rarg2 - K (key) in little endian int array // address generate_aescrypt_decryptBlock() { ! assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); Label L_doLast; address start = __ pc(); ! const Register from = rsi; // source array address const Register to = rdx; // destination array address const Register key = rcx; // key array address const Register keylen = rax; const Address from_param(rbp, 8+0); const Address to_param (rbp, 8+4); const Address key_param (rbp, 8+8); const XMMRegister xmm_result = xmm0; ! const XMMRegister xmm_temp = xmm1; ! const XMMRegister xmm_key_shuf_mask = xmm2; __ enter(); // required for proper stackwalking of RuntimeStub frame ! __ push(rsi); ! __ movptr(from , from_param); ! __ movptr(to , to_param); ! __ movptr(key , key_param); __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - // keylen = # of 32-bit words, convert to 128-bit words - __ shrl(keylen, 2); - __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); __ movdqu(xmm_result, Address(from, 0)); // for decryption java expanded key ordering is rotated one position from what we want // so we start from 0x10 here and hit 0x00 last // we don't know if the key is aligned, hence not using load-execute form ! load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); ! __ pxor (xmm_result, xmm_temp); ! for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { ! aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); ! } ! __ cmpl(keylen, 0); ! __ jcc(Assembler::equal, L_doLast); ! // only in 192 and 256 bit keys ! aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); ! aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); ! __ subl(keylen, 2); ! __ jcc(Assembler::equal, L_doLast); ! // only in 256 bit keys ! aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); ! aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); __ BIND(L_doLast); ! // for decryption the aesdeclast operation is always on key+0x00 ! load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); ! __ aesdeclast(xmm_result, xmm_temp); __ movdqu(Address(to, 0), xmm_result); // store the result - __ xorptr(rax, rax); // return 0 - __ pop(rsi); __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); return start; } --- 2270,2364 ---- // c_rarg0 - source byte array address // c_rarg1 - destination byte array address // c_rarg2 - K (key) in little endian int array // address generate_aescrypt_decryptBlock() { ! assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); Label L_doLast; address start = __ pc(); ! const Register from = rdx; // source array address const Register to = rdx; // destination array address const Register key = rcx; // key array address const Register keylen = rax; const Address from_param(rbp, 8+0); const Address to_param (rbp, 8+4); const Address key_param (rbp, 8+8); const XMMRegister xmm_result = xmm0; ! const XMMRegister xmm_key_shuf_mask = xmm1; ! const XMMRegister xmm_temp1 = xmm2; ! const XMMRegister xmm_temp2 = xmm3; ! const XMMRegister xmm_temp3 = xmm4; ! const XMMRegister xmm_temp4 = xmm5; __ enter(); // required for proper stackwalking of RuntimeStub frame ! __ movptr(from, from_param); ! __ movptr(key, key_param); + // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); __ movdqu(xmm_result, Address(from, 0)); + __ movptr(to, to_param); // for decryption java expanded key ordering is rotated one position from what we want // so we start from 0x10 here and hit 0x00 last // we don't know if the key is aligned, hence not using load-execute form ! load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); ! load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); ! load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); ! load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); ! ! __ pxor (xmm_result, xmm_temp1); ! __ aesdec(xmm_result, xmm_temp2); ! __ aesdec(xmm_result, xmm_temp3); ! __ aesdec(xmm_result, xmm_temp4); ! ! load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); ! load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); ! load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); ! load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); ! ! __ aesdec(xmm_result, xmm_temp1); ! __ aesdec(xmm_result, xmm_temp2); ! __ aesdec(xmm_result, xmm_temp3); ! __ aesdec(xmm_result, xmm_temp4); ! ! load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); ! load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); ! load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); ! ! __ cmpl(keylen, 44); ! __ jccb(Assembler::equal, L_doLast); ! ! __ aesdec(xmm_result, xmm_temp1); ! __ aesdec(xmm_result, xmm_temp2); ! ! load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); ! load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); ! ! __ cmpl(keylen, 52); ! __ jccb(Assembler::equal, L_doLast); ! ! __ aesdec(xmm_result, xmm_temp1); ! __ aesdec(xmm_result, xmm_temp2); ! ! load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); ! load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); __ BIND(L_doLast); ! __ aesdec(xmm_result, xmm_temp1); ! __ aesdec(xmm_result, xmm_temp2); + // for decryption the aesdeclast operation is always on key+0x00 + __ aesdeclast(xmm_result, xmm_temp3); __ movdqu(Address(to, 0), xmm_result); // store the result __ xorptr(rax, rax); // return 0 __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); return start; }
*** 2338,2348 **** // c_rarg2 - K (key) in little endian int array // c_rarg3 - r vector byte array address // c_rarg4 - input length // address generate_cipherBlockChaining_encryptAESCrypt() { ! assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); address start = __ pc(); Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; --- 2390,2400 ---- // c_rarg2 - K (key) in little endian int array // c_rarg3 - r vector byte array address // c_rarg4 - input length // address generate_cipherBlockChaining_encryptAESCrypt() { ! assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); address start = __ pc(); Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
*** 2391,2401 **** __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); __ cmpl(rax, 44); __ jcc(Assembler::notEqual, L_key_192_256); // 128 bit code follows here ! __ movptr(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_loopTop_128); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector --- 2443,2453 ---- __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); __ cmpl(rax, 44); __ jcc(Assembler::notEqual, L_key_192_256); // 128 bit code follows here ! __ movl(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_loopTop_128); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector
*** 2427,2437 **** // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) __ cmpl(rax, 52); __ jcc(Assembler::notEqual, L_key_256); // 192-bit code follows here (could be changed to use more xmm registers) ! __ movptr(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_loopTop_192); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector --- 2479,2489 ---- // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) __ cmpl(rax, 52); __ jcc(Assembler::notEqual, L_key_256); // 192-bit code follows here (could be changed to use more xmm registers) ! __ movl(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_loopTop_192); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector
*** 2452,2462 **** __ jcc(Assembler::notEqual, L_loopTop_192); __ jmp(L_exit); __ BIND(L_key_256); // 256-bit code follows here (could be changed to use more xmm registers) ! __ movptr(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_loopTop_256); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector --- 2504,2514 ---- __ jcc(Assembler::notEqual, L_loopTop_192); __ jmp(L_exit); __ BIND(L_key_256); // 256-bit code follows here (could be changed to use more xmm registers) ! __ movl(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_loopTop_256); __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input __ pxor (xmm_result, xmm_temp); // xor with the current r vector
*** 2493,2503 **** // c_rarg3 - r vector byte array address // c_rarg4 - input length // address generate_cipherBlockChaining_decryptAESCrypt() { ! assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); address start = __ pc(); Label L_exit, L_key_192_256, L_key_256; --- 2545,2555 ---- // c_rarg3 - r vector byte array address // c_rarg4 - input length // address generate_cipherBlockChaining_decryptAESCrypt() { ! assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); address start = __ pc(); Label L_exit, L_key_192_256, L_key_256;
*** 2554,2564 **** __ cmpl(rax, 44); __ jcc(Assembler::notEqual, L_key_192_256); // 128-bit code follows here, parallelized ! __ movptr(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_singleBlock_loopTop_128); __ cmpptr(len_reg, 0); // any blocks left?? __ jcc(Assembler::equal, L_exit); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input --- 2606,2616 ---- __ cmpl(rax, 44); __ jcc(Assembler::notEqual, L_key_192_256); // 128-bit code follows here, parallelized ! __ movl(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_singleBlock_loopTop_128); __ cmpptr(len_reg, 0); // any blocks left?? __ jcc(Assembler::equal, L_exit); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
*** 2595,2605 **** // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) __ cmpl(rax, 52); __ jcc(Assembler::notEqual, L_key_256); // 192-bit code follows here (could be optimized to use parallelism) ! __ movptr(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_singleBlock_loopTop_192); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { --- 2647,2657 ---- // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) __ cmpl(rax, 52); __ jcc(Assembler::notEqual, L_key_256); // 192-bit code follows here (could be optimized to use parallelism) ! __ movl(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_singleBlock_loopTop_192); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
*** 2620,2630 **** __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); __ jmp(L_exit); __ BIND(L_key_256); // 256-bit code follows here (could be optimized to use parallelism) ! __ movptr(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_singleBlock_loopTop_256); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { --- 2672,2682 ---- __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); __ jmp(L_exit); __ BIND(L_key_256); // 256-bit code follows here (could be optimized to use parallelism) ! __ movl(pos, 0); __ align(OptoLoopAlignment); __ BIND(L_singleBlock_loopTop_256); __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
src/cpu/x86/vm/stubGenerator_x86_32.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File