diff a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -4299,10 +4299,20 @@ emit_int8((unsigned char)0xC2); emit_int16(imm16); } } +void Assembler::roll(Register dst, int imm8) { + assert(isShiftCount(imm8 >> 1), "illegal shift count"); + int encode = prefix_and_encode(dst->encoding()); + if (imm8 == 1) { + emit_int16((unsigned char)0xD1, (0xC0 | encode)); + } else { + emit_int24((unsigned char)0xC1, (0xc0 | encode), imm8); + } +} + void Assembler::sahf() { #ifdef _LP64 // Not supported in 64bit mode ShouldNotReachHere(); #endif diff a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -1825,10 +1825,12 @@ void rdtsc(); void ret(int imm16); + void roll(Register dst, int imm8); + #ifdef _LP64 void rorq(Register dst, int imm8); void rorxq(Register dst, Register src, int imm8); void rorxd(Register dst, Register src, int imm8); #endif diff a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -954,10 +954,13 @@ void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter, Register len_reg, Register used, Register used_addr, Register saved_encCounter_start); #endif + void fast_md5(Register buf, Address state, Address ofs, Address limit, + bool multi_block); + void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block); diff a/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp --- /dev/null +++ b/src/hotspot/cpu/x86/macroAssembler_x86_md5.cpp @@ -0,0 +1,188 @@ +/* +* Copyright (c) 2020, Microsoft Corporation. +* +* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +* +* This code is free software; you can rrdistribute it and/or modify it +* under the terms of the GNU General Public License version 2 only, as +* published by the Free Software Foundation. +* +* This code is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +* version 2 for more details (a copy is included in the LICENSE file that +* accompanied this code). +* +* You should have received a copy of the GNU General Public License version +* 2 along with this work; if not, write to the Free Software Foundation, +* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +* +* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +* or visit www.oracle.com if you need additional information or have any +* questions. +* +*/ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "macroAssembler_x86.hpp" + +// int com.sun.security.provider.MD5.implCompress0(byte[] b, int ofs) +void MacroAssembler::fast_md5(Register buf, Address state, Address ofs, Address limit, bool multi_block) { + + Label start, done_hash, loop0; + + bind(start); + + // Save hash values for addition after rounds + movptr(rsi, state); + movl(rax, Address(rsi, 0)); + movl(rbx, Address(rsi, 4)); + movl(rcx, Address(rsi, 8)); + movl(rdx, Address(rsi, 12)); + + bind(loop0); + +#define FF(r1, r2, r3, r4, k, s, t) \ + movl(rsi, r3); \ + addl(r1, Address(buf, k*4)); \ + xorl(rsi, r4); \ + andl(rsi, r2); \ + xorl(rsi, r4); \ + leal(r1, Address(r1, rsi, Address::times_1, t)); \ + roll(r1, s); \ + addl(r1, r2); + +#define GG(r1, r2, r3, r4, k, s, t) \ + movl(rsi, r4); \ + movl(rdi, r4); \ + addl(r1, Address(buf, k*4)); \ + notl(rsi); \ + andl(rdi, r2); \ + andl(rsi, r3); \ + orl(rsi, rdi); \ + leal(r1, Address(r1, rsi, Address::times_1, t)); \ + roll(r1, s); \ + addl(r1, r2); + +#define HH(r1, r2, r3, r4, k, s, t) \ + movl(rsi, r3); \ + addl(r1, Address(buf, k*4)); \ + xorl(rsi, r4); \ + xorl(rsi, r2); \ + leal(r1, Address(r1, rsi, Address::times_1, t)); \ + roll(r1, s); \ + addl(r1, r2); + +#define II(r1, r2, r3, r4, k, s, t) \ + movl(rsi, r4); \ + notl(rsi); \ + addl(r1, Address(buf, k*4)); \ + orl(rsi, r2); \ + xorl(rsi, r3); \ + leal(r1, Address(r1, rsi, Address::times_1, t)); \ + roll(r1, s); \ + addl(r1, r2); + + // Round 1 + FF(rax, rbx, rcx, rdx, 0, 7, 0xd76aa478) + FF(rdx, rax, rbx, rcx, 1, 12, 0xe8c7b756) + FF(rcx, rdx, rax, rbx, 2, 17, 0x242070db) + FF(rbx, rcx, rdx, rax, 3, 22, 0xc1bdceee) + FF(rax, rbx, rcx, rdx, 4, 7, 0xf57c0faf) + FF(rdx, rax, rbx, rcx, 5, 12, 0x4787c62a) + FF(rcx, rdx, rax, rbx, 6, 17, 0xa8304613) + FF(rbx, rcx, rdx, rax, 7, 22, 0xfd469501) + FF(rax, rbx, rcx, rdx, 8, 7, 0x698098d8) + FF(rdx, rax, rbx, rcx, 9, 12, 0x8b44f7af) + FF(rcx, rdx, rax, rbx, 10, 17, 0xffff5bb1) + FF(rbx, rcx, rdx, rax, 11, 22, 0x895cd7be) + FF(rax, rbx, rcx, rdx, 12, 7, 0x6b901122) + FF(rdx, rax, rbx, rcx, 13, 12, 0xfd987193) + FF(rcx, rdx, rax, rbx, 14, 17, 0xa679438e) + FF(rbx, rcx, rdx, rax, 15, 22, 0x49b40821) + + // Round 2 + GG(rax, rbx, rcx, rdx, 1, 5, 0xf61e2562) + GG(rdx, rax, rbx, rcx, 6, 9, 0xc040b340) + GG(rcx, rdx, rax, rbx, 11, 14, 0x265e5a51) + GG(rbx, rcx, rdx, rax, 0, 20, 0xe9b6c7aa) + GG(rax, rbx, rcx, rdx, 5, 5, 0xd62f105d) + GG(rdx, rax, rbx, rcx, 10, 9, 0x2441453) + GG(rcx, rdx, rax, rbx, 15, 14, 0xd8a1e681) + GG(rbx, rcx, rdx, rax, 4, 20, 0xe7d3fbc8) + GG(rax, rbx, rcx, rdx, 9, 5, 0x21e1cde6) + GG(rdx, rax, rbx, rcx, 14, 9, 0xc33707d6) + GG(rcx, rdx, rax, rbx, 3, 14, 0xf4d50d87) + GG(rbx, rcx, rdx, rax, 8, 20, 0x455a14ed) + GG(rax, rbx, rcx, rdx, 13, 5, 0xa9e3e905) + GG(rdx, rax, rbx, rcx, 2, 9, 0xfcefa3f8) + GG(rcx, rdx, rax, rbx, 7, 14, 0x676f02d9) + GG(rbx, rcx, rdx, rax, 12, 20, 0x8d2a4c8a) + + // Round 3 + HH(rax, rbx, rcx, rdx, 5, 4, 0xfffa3942) + HH(rdx, rax, rbx, rcx, 8, 11, 0x8771f681) + HH(rcx, rdx, rax, rbx, 11, 16, 0x6d9d6122) + HH(rbx, rcx, rdx, rax, 14, 23, 0xfde5380c) + HH(rax, rbx, rcx, rdx, 1, 4, 0xa4beea44) + HH(rdx, rax, rbx, rcx, 4, 11, 0x4bdecfa9) + HH(rcx, rdx, rax, rbx, 7, 16, 0xf6bb4b60) + HH(rbx, rcx, rdx, rax, 10, 23, 0xbebfbc70) + HH(rax, rbx, rcx, rdx, 13, 4, 0x289b7ec6) + HH(rdx, rax, rbx, rcx, 0, 11, 0xeaa127fa) + HH(rcx, rdx, rax, rbx, 3, 16, 0xd4ef3085) + HH(rbx, rcx, rdx, rax, 6, 23, 0x4881d05) + HH(rax, rbx, rcx, rdx, 9, 4, 0xd9d4d039) + HH(rdx, rax, rbx, rcx, 12, 11, 0xe6db99e5) + HH(rcx, rdx, rax, rbx, 15, 16, 0x1fa27cf8) + HH(rbx, rcx, rdx, rax, 2, 23, 0xc4ac5665) + + // Round 4 + II(rax, rbx, rcx, rdx, 0, 6, 0xf4292244) + II(rdx, rax, rbx, rcx, 7, 10, 0x432aff97) + II(rcx, rdx, rax, rbx, 14, 15, 0xab9423a7) + II(rbx, rcx, rdx, rax, 5, 21, 0xfc93a039) + II(rax, rbx, rcx, rdx, 12, 6, 0x655b59c3) + II(rdx, rax, rbx, rcx, 3, 10, 0x8f0ccc92) + II(rcx, rdx, rax, rbx, 10, 15, 0xffeff47d) + II(rbx, rcx, rdx, rax, 1, 21, 0x85845dd1) + II(rax, rbx, rcx, rdx, 8, 6, 0x6fa87e4f) + II(rdx, rax, rbx, rcx, 15, 10, 0xfe2ce6e0) + II(rcx, rdx, rax, rbx, 6, 15, 0xa3014314) + II(rbx, rcx, rdx, rax, 13, 21, 0x4e0811a1) + II(rax, rbx, rcx, rdx, 4, 6, 0xf7537e82) + II(rdx, rax, rbx, rcx, 11, 10, 0xbd3af235) + II(rcx, rdx, rax, rbx, 2, 15, 0x2ad7d2bb) + II(rbx, rcx, rdx, rax, 9, 21, 0xeb86d391) + +#undef FF +#undef GG +#undef HH +#undef II + + if (multi_block) { + // increment data pointer and loop if more to process + addptr(buf, 64); + movl(rsi, ofs); + addl(rsi, 64); + movl(ofs, rsi); + cmpl(rsi, limit); + jcc(Assembler::belowEqual, loop0); + } + + // write hash values back in the correct order + movptr(rdi, state); + addl(Address(rdi, 0), rax); + addl(Address(rdi, 4), rbx); + addl(Address(rdi, 8), rcx); + addl(Address(rdi, 12), rdx); + + if (multi_block) { + movptr(rax, ofs); //return ofs + } + + bind(done_hash); +} diff a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp --- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp @@ -2914,10 +2914,50 @@ __ jmp(L_multiBlock_loopTop[2]); //key192 return start; } + // ofs and limit are use for multi-block byte array. + // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs) + address generate_md5_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + const Register buf_param = rbp; + const Address state_param(rsp, 0 * wordSize); + const Address ofs_param (rsp, 1 * wordSize); + const Address limit_param(rsp, 2 * wordSize); + + __ enter(); + __ push(rbx); + __ push(rdi); + __ push(rsi); + __ push(rbp); + __ subptr(rsp, 3 * wordSize); + + __ movptr(rsi, Address(rbp, 8 + 4)); + __ movptr(state_param, rsi); + if (multi_block) { + __ movptr(rsi, Address(rbp, 8 + 8)); + __ movptr(ofs_param, rsi); + __ movptr(rsi, Address(rbp, 8 + 12)); + __ movptr(limit_param, rsi); + } + __ movptr(buf_param, Address(rbp, 8 + 0)); // do it last because it override rbp + __ fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block); + + __ addptr(rsp, 3 * wordSize); + __ pop(rbp); + __ pop(rsi); + __ pop(rdi); + __ pop(rbx); + __ leave(); + __ ret(0); + return start; + } + address generate_upper_word_mask() { __ align(64); StubCodeMark mark(this, "StubRoutines", "upper_word_mask"); address start = __ pc(); __ emit_data(0x00000000, relocInfo::none, 0); @@ -3919,10 +3959,14 @@ if (UseAESCTRIntrinsics) { StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask(); StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); } + if (UseMD5Intrinsics) { + StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress"); + StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB"); + } if (UseSHA1Intrinsics) { StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask(); StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask(); StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); diff a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -3808,10 +3808,47 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); return start; } + // ofs and limit are use for multi-block byte array. + // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs) + address generate_md5_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + const Register buf_param = r15; + const Address state_param(rsp, 0 * wordSize); + const Address ofs_param (rsp, 1 * wordSize ); + const Address limit_param(rsp, 1 * wordSize + 4); + + __ enter(); + __ push(rbx); + __ push(rdi); + __ push(rsi); + __ push(r15); + __ subptr(rsp, 2 * wordSize); + + __ movptr(buf_param, c_rarg0); + __ movptr(state_param, c_rarg1); + if (multi_block) { + __ movl(ofs_param, c_rarg2); + __ movl(limit_param, c_rarg3); + } + __ fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block); + + __ addptr(rsp, 2 * wordSize); + __ pop(r15); + __ pop(rsi); + __ pop(rdi); + __ pop(rbx); + __ leave(); + __ ret(0); + return start; + } + address generate_upper_word_mask() { __ align(64); StubCodeMark mark(this, "StubRoutines", "upper_word_mask"); address start = __ pc(); __ emit_data64(0x0000000000000000, relocInfo::none); @@ -6496,10 +6533,14 @@ StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask(); StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); } } + if (UseMD5Intrinsics) { + StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress"); + StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB"); + } if (UseSHA1Intrinsics) { StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask(); StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask(); StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); diff a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -1594,10 +1594,14 @@ // This machine allows unaligned memory accesses if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { FLAG_SET_DEFAULT(UseUnalignedAccesses, true); } + if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { + UseMD5Intrinsics = true; + } + #ifndef PRODUCT if (log_is_enabled(Info, os, cpu)) { LogStream ls(Log(os, cpu)::info()); outputStream* log = &ls; log->print_cr("Logical CPUs per core: %u", diff a/src/hotspot/share/classfile/vmSymbols.cpp b/src/hotspot/share/classfile/vmSymbols.cpp --- a/src/hotspot/share/classfile/vmSymbols.cpp +++ b/src/hotspot/share/classfile/vmSymbols.cpp @@ -453,11 +453,11 @@ case vmIntrinsics::_electronicCodeBook_encryptAESCrypt: case vmIntrinsics::_electronicCodeBook_decryptAESCrypt: case vmIntrinsics::_counterMode_AESCrypt: return 1; case vmIntrinsics::_digestBase_implCompressMB: - return 3; + return 4; default: return 0; } } @@ -697,21 +697,24 @@ if (!UseAESIntrinsics) return true; break; case vmIntrinsics::_counterMode_AESCrypt: if (!UseAESCTRIntrinsics) return true; break; + case vmIntrinsics::_md5_implCompress: + if (!UseMD5Intrinsics) return true; + break; case vmIntrinsics::_sha_implCompress: if (!UseSHA1Intrinsics) return true; break; case vmIntrinsics::_sha2_implCompress: if (!UseSHA256Intrinsics) return true; break; case vmIntrinsics::_sha5_implCompress: if (!UseSHA512Intrinsics) return true; break; case vmIntrinsics::_digestBase_implCompressMB: - if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return true; + if (!(UseMD5Intrinsics || UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return true; break; case vmIntrinsics::_ghash_processBlocks: if (!UseGHASHIntrinsics) return true; break; case vmIntrinsics::_base64_encodeBlock: diff a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp --- a/src/hotspot/share/classfile/vmSymbols.hpp +++ b/src/hotspot/share/classfile/vmSymbols.hpp @@ -1042,15 +1042,19 @@ \ do_class(com_sun_crypto_provider_counterMode, "com/sun/crypto/provider/CounterMode") \ do_intrinsic(_counterMode_AESCrypt, com_sun_crypto_provider_counterMode, crypt_name, byteArray_int_int_byteArray_int_signature, F_R) \ do_name( crypt_name, "implCrypt") \ \ + /* support for sun.security.provider.MD5 */ \ + do_class(sun_security_provider_md5, "sun/security/provider/MD5") \ + do_intrinsic(_md5_implCompress, sun_security_provider_md5, implCompress_name, implCompress_signature, F_R) \ + do_name( implCompress_name, "implCompress0") \ + do_signature(implCompress_signature, "([BI)V") \ + \ /* support for sun.security.provider.SHA */ \ do_class(sun_security_provider_sha, "sun/security/provider/SHA") \ do_intrinsic(_sha_implCompress, sun_security_provider_sha, implCompress_name, implCompress_signature, F_R) \ - do_name( implCompress_name, "implCompress0") \ - do_signature(implCompress_signature, "([BI)V") \ \ /* support for sun.security.provider.SHA2 */ \ do_class(sun_security_provider_sha2, "sun/security/provider/SHA2") \ do_intrinsic(_sha2_implCompress, sun_security_provider_sha2, implCompress_name, implCompress_signature, F_R) \ \ diff a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp --- a/src/hotspot/share/opto/c2compiler.cpp +++ b/src/hotspot/share/opto/c2compiler.cpp @@ -618,10 +618,11 @@ case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: case vmIntrinsics::_electronicCodeBook_encryptAESCrypt: case vmIntrinsics::_electronicCodeBook_decryptAESCrypt: case vmIntrinsics::_counterMode_AESCrypt: + case vmIntrinsics::_md5_implCompress: case vmIntrinsics::_sha_implCompress: case vmIntrinsics::_sha2_implCompress: case vmIntrinsics::_sha5_implCompress: case vmIntrinsics::_digestBase_implCompressMB: case vmIntrinsics::_multiplyToLen: diff a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -303,17 +303,17 @@ Node* inline_counterMode_AESCrypt_predicate(); Node* get_key_start_from_aescrypt_object(Node* aescrypt_object); Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object); bool inline_ghash_processBlocks(); bool inline_base64_encodeBlock(); - bool inline_sha_implCompress(vmIntrinsics::ID id); + bool inline_digestBase_implCompress(vmIntrinsics::ID id); bool inline_digestBase_implCompressMB(int predicate); - bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA, - bool long_state, address stubAddr, const char *stubName, - Node* src_start, Node* ofs, Node* limit); - Node* get_state_from_sha_object(Node *sha_object); - Node* get_state_from_sha5_object(Node *sha_object); + bool inline_digestBase_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass, + bool long_state, address stubAddr, const char *stubName, + Node* src_start, Node* ofs, Node* limit); + Node* get_state_from_digestBase_object(Node *digestBase_object); + Node* get_long_state_from_digestBase_object(Node *digestBase_object); Node* inline_digestBase_implCompressMB_predicate(int predicate); bool inline_encodeISOArray(); bool inline_updateCRC32(); bool inline_updateBytesCRC32(); bool inline_updateByteBufferCRC32(); @@ -824,14 +824,15 @@ return inline_electronicCodeBook_AESCrypt(intrinsic_id()); case vmIntrinsics::_counterMode_AESCrypt: return inline_counterMode_AESCrypt(intrinsic_id()); + case vmIntrinsics::_md5_implCompress: case vmIntrinsics::_sha_implCompress: case vmIntrinsics::_sha2_implCompress: case vmIntrinsics::_sha5_implCompress: - return inline_sha_implCompress(intrinsic_id()); + return inline_digestBase_implCompress(intrinsic_id()); case vmIntrinsics::_digestBase_implCompressMB: return inline_digestBase_implCompressMB(predicate); case vmIntrinsics::_multiplyToLen: @@ -6410,25 +6411,28 @@ stubAddr, stubName, TypePtr::BOTTOM, src_start, offset, len, dest_start, dp, isURL); return true; } -//------------------------------inline_sha_implCompress----------------------- +//------------------------------inline_digestBase_implCompress----------------------- +// +// Calculate MD5 for single-block byte[] array. +// void com.sun.security.provider.MD5.implCompress(byte[] buf, int ofs) // // Calculate SHA (i.e., SHA-1) for single-block byte[] array. // void com.sun.security.provider.SHA.implCompress(byte[] buf, int ofs) // // Calculate SHA2 (i.e., SHA-244 or SHA-256) for single-block byte[] array. // void com.sun.security.provider.SHA2.implCompress(byte[] buf, int ofs) // // Calculate SHA5 (i.e., SHA-384 or SHA-512) for single-block byte[] array. // void com.sun.security.provider.SHA5.implCompress(byte[] buf, int ofs) // -bool LibraryCallKit::inline_sha_implCompress(vmIntrinsics::ID id) { +bool LibraryCallKit::inline_digestBase_implCompress(vmIntrinsics::ID id) { assert(callee()->signature()->size() == 2, "sha_implCompress has 2 parameters"); - Node* sha_obj = argument(0); + Node* obj = argument(0); Node* src = argument(1); // type oop Node* ofs = argument(2); // type int const Type* src_type = src->Value(&_gvn); const TypeAryPtr* top_src = src_type->isa_aryptr(); @@ -6447,25 +6451,31 @@ Node* state = NULL; address stubAddr; const char *stubName; switch(id) { + case vmIntrinsics::_md5_implCompress: + assert(UseMD5Intrinsics, "need MD5 instruction support"); + state = get_state_from_digestBase_object(obj); + stubAddr = StubRoutines::md5_implCompress(); + stubName = "md5_implCompress"; + break; case vmIntrinsics::_sha_implCompress: assert(UseSHA1Intrinsics, "need SHA1 instruction support"); - state = get_state_from_sha_object(sha_obj); + state = get_state_from_digestBase_object(obj); stubAddr = StubRoutines::sha1_implCompress(); stubName = "sha1_implCompress"; break; case vmIntrinsics::_sha2_implCompress: assert(UseSHA256Intrinsics, "need SHA256 instruction support"); - state = get_state_from_sha_object(sha_obj); + state = get_state_from_digestBase_object(obj); stubAddr = StubRoutines::sha256_implCompress(); stubName = "sha256_implCompress"; break; case vmIntrinsics::_sha5_implCompress: assert(UseSHA512Intrinsics, "need SHA512 instruction support"); - state = get_state_from_sha5_object(sha_obj); + state = get_long_state_from_digestBase_object(obj); stubAddr = StubRoutines::sha512_implCompress(); stubName = "sha512_implCompress"; break; default: fatal_unexpected_iid(id); @@ -6475,26 +6485,26 @@ assert(stubAddr != NULL, "Stub is generated"); if (stubAddr == NULL) return false; // Call the stub. - Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::sha_implCompress_Type(), + Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::digestBase_implCompress_Type(), stubAddr, stubName, TypePtr::BOTTOM, src_start, state); return true; } //------------------------------inline_digestBase_implCompressMB----------------------- // -// Calculate SHA/SHA2/SHA5 for multi-block byte[] array. +// Calculate MD5/SHA/SHA2/SHA5 for multi-block byte[] array. // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) // bool LibraryCallKit::inline_digestBase_implCompressMB(int predicate) { - assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics, - "need SHA1/SHA256/SHA512 instruction support"); - assert((uint)predicate < 3, "sanity"); + assert(UseMD5Intrinsics || UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics, + "need MD5/SHA1/SHA256/SHA512 instruction support"); + assert((uint)predicate < 4, "sanity"); assert(callee()->signature()->size() == 3, "digestBase_implCompressMB has 3 parameters"); Node* digestBase_obj = argument(0); // The receiver was checked for NULL already. Node* src = argument(1); // byte[] array Node* ofs = argument(2); // type int @@ -6513,71 +6523,79 @@ } // 'src_start' points to src array + offset src = must_be_not_null(src, false); Node* src_start = array_element_address(src, ofs, src_elem); - const char* klass_SHA_name = NULL; + const char* klass_name = NULL; const char* stub_name = NULL; address stub_addr = NULL; bool long_state = false; switch (predicate) { case 0: + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_md5_implCompress)) { + klass_name = "sun/security/provider/MD5"; + stub_name = "md5_implCompressMB"; + stub_addr = StubRoutines::md5_implCompressMB(); + } + break; + case 1: if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_sha_implCompress)) { - klass_SHA_name = "sun/security/provider/SHA"; + klass_name = "sun/security/provider/SHA"; stub_name = "sha1_implCompressMB"; stub_addr = StubRoutines::sha1_implCompressMB(); } break; - case 1: + case 2: if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_sha2_implCompress)) { - klass_SHA_name = "sun/security/provider/SHA2"; + klass_name = "sun/security/provider/SHA2"; stub_name = "sha256_implCompressMB"; stub_addr = StubRoutines::sha256_implCompressMB(); } break; - case 2: + case 3: if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_sha5_implCompress)) { - klass_SHA_name = "sun/security/provider/SHA5"; + klass_name = "sun/security/provider/SHA5"; stub_name = "sha512_implCompressMB"; stub_addr = StubRoutines::sha512_implCompressMB(); long_state = true; } break; default: - fatal("unknown SHA intrinsic predicate: %d", predicate); + fatal("unknown DigestBase intrinsic predicate: %d", predicate); } - if (klass_SHA_name != NULL) { + if (klass_name != NULL) { assert(stub_addr != NULL, "Stub is generated"); if (stub_addr == NULL) return false; // get DigestBase klass to lookup for SHA klass const TypeInstPtr* tinst = _gvn.type(digestBase_obj)->isa_instptr(); assert(tinst != NULL, "digestBase_obj is not instance???"); assert(tinst->klass()->is_loaded(), "DigestBase is not loaded"); - ciKlass* klass_SHA = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_SHA_name)); - assert(klass_SHA->is_loaded(), "predicate checks that this class is loaded"); - ciInstanceKlass* instklass_SHA = klass_SHA->as_instance_klass(); - return inline_sha_implCompressMB(digestBase_obj, instklass_SHA, long_state, stub_addr, stub_name, src_start, ofs, limit); + ciKlass* klass = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_name)); + assert(klass->is_loaded(), "predicate checks that this class is loaded"); + ciInstanceKlass* instklass = klass->as_instance_klass(); + return inline_digestBase_implCompressMB(digestBase_obj, instklass, long_state, stub_addr, stub_name, src_start, ofs, limit); } return false; } -//------------------------------inline_sha_implCompressMB----------------------- -bool LibraryCallKit::inline_sha_implCompressMB(Node* digestBase_obj, ciInstanceKlass* instklass_SHA, - bool long_state, address stubAddr, const char *stubName, - Node* src_start, Node* ofs, Node* limit) { - const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_SHA); + +//------------------------------inline_digestBase_implCompressMB----------------------- +bool LibraryCallKit::inline_digestBase_implCompressMB(Node* digestBase_obj, ciInstanceKlass* instklass, + bool long_state, address stubAddr, const char *stubName, + Node* src_start, Node* ofs, Node* limit) { + const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass); const TypeOopPtr* xtype = aklass->as_instance_type(); - Node* sha_obj = new CheckCastPPNode(control(), digestBase_obj, xtype); - sha_obj = _gvn.transform(sha_obj); + Node* obj = new CheckCastPPNode(control(), digestBase_obj, xtype); + obj = _gvn.transform(obj); Node* state; if (long_state) { - state = get_state_from_sha5_object(sha_obj); + state = get_state_from_digestBase_object(digestBase_obj); } else { - state = get_state_from_sha_object(sha_obj); + state = get_long_state_from_digestBase_object(digestBase_obj); } if (state == NULL) return false; // Call the stub. Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, @@ -6589,24 +6607,24 @@ set_result(result); return true; } -//------------------------------get_state_from_sha_object----------------------- -Node * LibraryCallKit::get_state_from_sha_object(Node *sha_object) { - Node* sha_state = load_field_from_object(sha_object, "state", "[I", /*is_exact*/ false); - assert (sha_state != NULL, "wrong version of sun.security.provider.SHA/SHA2"); +//------------------------------get_state_from_digestBase_object----------------------- +Node * LibraryCallKit::get_state_from_digestBase_object(Node *digestBase_object) { + Node* sha_state = load_field_from_object(digestBase_object, "state", "[I", /*is_exact*/ false); + assert (sha_state != NULL, "wrong version of sun.security.provider.MD5/SHA/SHA2"); if (sha_state == NULL) return (Node *) NULL; // now have the array, need to get the start address of the state array Node* state = array_element_address(sha_state, intcon(0), T_INT); return state; } -//------------------------------get_state_from_sha5_object----------------------- -Node * LibraryCallKit::get_state_from_sha5_object(Node *sha_object) { - Node* sha_state = load_field_from_object(sha_object, "state", "[J", /*is_exact*/ false); +//------------------------------get_long_state_from_digestBase_object----------------------- +Node * LibraryCallKit::get_long_state_from_digestBase_object(Node *digestBase_object) { + Node* sha_state = load_field_from_object(digestBase_object, "state", "[J", /*is_exact*/ false); assert (sha_state != NULL, "wrong version of sun.security.provider.SHA5"); if (sha_state == NULL) return (Node *) NULL; // now have the array, need to get the start address of the state array Node* state = array_element_address(sha_state, intcon(0), T_LONG); @@ -6614,63 +6632,69 @@ } //----------------------------inline_digestBase_implCompressMB_predicate---------------------------- // Return node representing slow path of predicate check. // the pseudo code we want to emulate with this predicate is: -// if (digestBaseObj instanceof SHA/SHA2/SHA5) do_intrinsic, else do_javapath +// if (digestBaseObj instanceof MD5/SHA/SHA2/SHA5) do_intrinsic, else do_javapath // Node* LibraryCallKit::inline_digestBase_implCompressMB_predicate(int predicate) { - assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics, - "need SHA1/SHA256/SHA512 instruction support"); - assert((uint)predicate < 3, "sanity"); + assert(UseMD5Intrinsics || UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics, + "need MD5/SHA1/SHA256/SHA512 instruction support"); + assert((uint)predicate < 4, "sanity"); // The receiver was checked for NULL already. Node* digestBaseObj = argument(0); // get DigestBase klass for instanceOf check const TypeInstPtr* tinst = _gvn.type(digestBaseObj)->isa_instptr(); assert(tinst != NULL, "digestBaseObj is null"); assert(tinst->klass()->is_loaded(), "DigestBase is not loaded"); - const char* klass_SHA_name = NULL; + const char* klass_name = NULL; switch (predicate) { case 0: + if (UseMD5Intrinsics) { + // we want to do an instanceof comparison against the MD5 class + klass_name = "sun/security/provider/MD5"; + } + break; + case 1: if (UseSHA1Intrinsics) { // we want to do an instanceof comparison against the SHA class - klass_SHA_name = "sun/security/provider/SHA"; + klass_name = "sun/security/provider/SHA"; } break; - case 1: + case 2: if (UseSHA256Intrinsics) { // we want to do an instanceof comparison against the SHA2 class - klass_SHA_name = "sun/security/provider/SHA2"; + klass_name = "sun/security/provider/SHA2"; } break; - case 2: + case 3: if (UseSHA512Intrinsics) { // we want to do an instanceof comparison against the SHA5 class - klass_SHA_name = "sun/security/provider/SHA5"; + klass_name = "sun/security/provider/SHA5"; } break; default: fatal("unknown SHA intrinsic predicate: %d", predicate); } - ciKlass* klass_SHA = NULL; - if (klass_SHA_name != NULL) { - klass_SHA = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_SHA_name)); + ciKlass* klass = NULL; + if (klass_name != NULL) { + klass = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_name)); } - if ((klass_SHA == NULL) || !klass_SHA->is_loaded()) { - // if none of SHA/SHA2/SHA5 is loaded, we never take the intrinsic fast path + if ((klass == NULL) || !klass->is_loaded()) { + // if none of MD5/SHA/SHA2/SHA5 is loaded, we never take the intrinsic fast path Node* ctrl = control(); set_control(top()); // no intrinsic path return ctrl; } - ciInstanceKlass* instklass_SHA = klass_SHA->as_instance_klass(); + ciInstanceKlass* instklass = klass->as_instance_klass(); - Node* instofSHA = gen_instanceof(digestBaseObj, makecon(TypeKlassPtr::make(instklass_SHA))); - Node* cmp_instof = _gvn.transform(new CmpINode(instofSHA, intcon(1))); + Node* instof = gen_instanceof(digestBaseObj, makecon(TypeKlassPtr::make(instklass))); + Node* cmp_instof = _gvn.transform(new CmpINode(instof, intcon(1))); Node* bool_instof = _gvn.transform(new BoolNode(cmp_instof, BoolTest::ne)); Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN); return instof_false; // even if it is NULL } diff a/src/hotspot/share/opto/runtime.cpp b/src/hotspot/share/opto/runtime.cpp --- a/src/hotspot/share/opto/runtime.cpp +++ b/src/hotspot/share/opto/runtime.cpp @@ -958,11 +958,11 @@ } /* * void implCompress(byte[] buf, int ofs) */ -const TypeFunc* OptoRuntime::sha_implCompress_Type() { +const TypeFunc* OptoRuntime::digestBase_implCompress_Type() { // create input type (domain) int num_args = 2; int argcnt = num_args; const Type** fields = TypeTuple::fields(argcnt); int argp = TypeFunc::Parms; diff a/src/hotspot/share/opto/runtime.hpp b/src/hotspot/share/opto/runtime.hpp --- a/src/hotspot/share/opto/runtime.hpp +++ b/src/hotspot/share/opto/runtime.hpp @@ -276,11 +276,11 @@ static const TypeFunc* aescrypt_block_Type(); static const TypeFunc* cipherBlockChaining_aescrypt_Type(); static const TypeFunc* electronicCodeBook_aescrypt_Type(); static const TypeFunc* counterMode_aescrypt_Type(); - static const TypeFunc* sha_implCompress_Type(); + static const TypeFunc* digestBase_implCompress_Type(); static const TypeFunc* digestBase_implCompressMB_Type(); static const TypeFunc* multiplyToLen_Type(); static const TypeFunc* montgomeryMultiply_Type(); static const TypeFunc* montgomerySquare_Type(); diff a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -321,10 +321,13 @@ "Use intrinsics for AES versions of crypto") \ \ diagnostic(bool, UseAESCTRIntrinsics, false, \ "Use intrinsics for the paralleled version of AES/CTR crypto") \ \ + diagnostic(bool, UseMD5Intrinsics, false, \ + "Use intrinsics for MD5 crypto hash function") \ + \ diagnostic(bool, UseSHA1Intrinsics, false, \ "Use intrinsics for SHA-1 crypto hash function. " \ "Requires that UseSHA is enabled.") \ \ diagnostic(bool, UseSHA256Intrinsics, false, \ diff a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp --- a/src/hotspot/share/runtime/stubRoutines.cpp +++ b/src/hotspot/share/runtime/stubRoutines.cpp @@ -136,10 +136,12 @@ address StubRoutines::_electronicCodeBook_decryptAESCrypt = NULL; address StubRoutines::_counterMode_AESCrypt = NULL; address StubRoutines::_ghash_processBlocks = NULL; address StubRoutines::_base64_encodeBlock = NULL; +address StubRoutines::_md5_implCompress = NULL; +address StubRoutines::_md5_implCompressMB = NULL; address StubRoutines::_sha1_implCompress = NULL; address StubRoutines::_sha1_implCompressMB = NULL; address StubRoutines::_sha256_implCompress = NULL; address StubRoutines::_sha256_implCompressMB = NULL; address StubRoutines::_sha512_implCompress = NULL; diff a/src/hotspot/share/runtime/stubRoutines.hpp b/src/hotspot/share/runtime/stubRoutines.hpp --- a/src/hotspot/share/runtime/stubRoutines.hpp +++ b/src/hotspot/share/runtime/stubRoutines.hpp @@ -218,10 +218,12 @@ static address _electronicCodeBook_decryptAESCrypt; static address _counterMode_AESCrypt; static address _ghash_processBlocks; static address _base64_encodeBlock; + static address _md5_implCompress; + static address _md5_implCompressMB; static address _sha1_implCompress; static address _sha1_implCompressMB; static address _sha256_implCompress; static address _sha256_implCompressMB; static address _sha512_implCompress; @@ -395,10 +397,12 @@ static address electronicCodeBook_encryptAESCrypt() { return _electronicCodeBook_encryptAESCrypt; } static address electronicCodeBook_decryptAESCrypt() { return _electronicCodeBook_decryptAESCrypt; } static address counterMode_AESCrypt() { return _counterMode_AESCrypt; } static address ghash_processBlocks() { return _ghash_processBlocks; } static address base64_encodeBlock() { return _base64_encodeBlock; } + static address md5_implCompress() { return _md5_implCompress; } + static address md5_implCompressMB() { return _md5_implCompressMB; } static address sha1_implCompress() { return _sha1_implCompress; } static address sha1_implCompressMB() { return _sha1_implCompressMB; } static address sha256_implCompress() { return _sha256_implCompress; } static address sha256_implCompressMB() { return _sha256_implCompressMB; } static address sha512_implCompress() { return _sha512_implCompress; } diff a/src/java.base/share/classes/sun/security/provider/MD5.java b/src/java.base/share/classes/sun/security/provider/MD5.java --- a/src/java.base/share/classes/sun/security/provider/MD5.java +++ b/src/java.base/share/classes/sun/security/provider/MD5.java @@ -24,12 +24,14 @@ */ package sun.security.provider; import java.util.Arrays; +import java.util.Objects; import static sun.security.provider.ByteArrayAccess.*; +import jdk.internal.HotSpotIntrinsicCandidate; /** * The MD5 class is used to compute an MD5 message digest over a given * buffer of bytes. It is an implementation of the RSA Data Security Inc * MD5 algorithim as described in internet RFC 1321. @@ -145,12 +147,31 @@ * This is where the functions come together as the generic MD5 * transformation operation. It consumes sixteen * bytes from the buffer, beginning at the specified offset. */ void implCompress(byte[] buf, int ofs) { + implCompressCheck(buf, ofs); + implCompress0(buf, ofs); + } + + private void implCompressCheck(byte[] buf, int ofs) { + Objects.requireNonNull(buf); + + // The checks performed by the method 'b2iBig64' + // are sufficient for the case when the method + // 'implCompressImpl' is replaced with a compiler + // intrinsic. b2iLittle64(buf, ofs, x); + } + // The method 'implCompress0 seems not to use its parameters. + // The method can, however, be replaced with a compiler intrinsic + // that operates directly on the array 'buf' (starting from + // offset 'ofs') and not on array 'x', therefore 'buf' and 'ofs' + // must be passed as parameter to the method. + @HotSpotIntrinsicCandidate + void implCompress0(byte[] buf, int ofs) { int a = state[0]; int b = state[1]; int c = state[2]; int d = state[3]; diff a/src/java.base/share/classes/sun/security/provider/SHA.java b/src/java.base/share/classes/sun/security/provider/SHA.java --- a/src/java.base/share/classes/sun/security/provider/SHA.java +++ b/src/java.base/share/classes/sun/security/provider/SHA.java @@ -132,16 +132,16 @@ private void implCompressCheck(byte[] buf, int ofs) { Objects.requireNonNull(buf); // The checks performed by the method 'b2iBig64' // are sufficient for the case when the method - // 'implCompressImpl' is replaced with a compiler + // 'implCompress0' is replaced with a compiler // intrinsic. b2iBig64(buf, ofs, W); } - // The method 'implCompressImpl seems not to use its parameters. + // The method 'implCompress0 seems not to use its parameters. // The method can, however, be replaced with a compiler intrinsic // that operates directly on the array 'buf' (starting from // offset 'ofs') and not on array 'W', therefore 'buf' and 'ofs' // must be passed as parameter to the method. @HotSpotIntrinsicCandidate