< prev index next >

src/cpu/sparc/vm/stubGenerator_sparc.cpp

Print this page
rev 9055 : 8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
Reviewed-by: kvn, jrose

*** 4786,4796 **** __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38); return start; } ! void generate_initial() { // Generates all stubs and initializes the entry points //------------------------------------------------------------------------------------------------------------------------ // entry points that exist in all platforms // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than --- 4786,4920 ---- __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38); return start; } ! /* Single and multi-block ghash operations */ ! address generate_ghash_processBlocks() { ! __ align(CodeEntryAlignment); ! Label L_ghash_loop, L_aligned, L_main; ! StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); ! address start = __ pc(); ! ! Register state = I0; ! Register subkeyH = I1; ! Register data = I2; ! Register len = I3; ! ! __ save_frame(0); ! ! __ ldx(state, 0, O0); ! __ ldx(state, 8, O1); ! ! // Loop label for multiblock operations ! __ BIND(L_ghash_loop); ! ! // Check if 'data' is unaligned ! __ andcc(data, 7, G1); ! __ br(Assembler::zero, false, Assembler::pt, L_aligned); ! __ delayed()->nop(); ! ! Register left_shift = L1; ! Register right_shift = L2; ! Register data_ptr = L3; ! ! // Get left and right shift values in bits ! __ sll(G1, LogBitsPerByte, left_shift); ! __ mov(64, right_shift); ! __ sub(right_shift, left_shift, right_shift); ! ! // Align to read 'data' ! __ sub(data, G1, data_ptr); ! ! // Load first 8 bytes of 'data' ! __ ldx(data_ptr, 0, O4); ! __ sllx(O4, left_shift, O4); ! __ ldx(data_ptr, 8, O5); ! __ srlx(O5, right_shift, G4); ! __ bset(G4, O4); ! ! // Load second 8 bytes of 'data' ! __ sllx(O5, left_shift, O5); ! __ ldx(data_ptr, 16, G4); ! __ srlx(G4, right_shift, G4); ! __ ba(L_main); ! __ delayed()->bset(G4, O5); ! ! // If 'data' is aligned, load normally ! __ BIND(L_aligned); ! __ ldx(data, 0, O4); ! __ ldx(data, 8, O5); ! ! __ BIND(L_main); ! __ ldx(subkeyH, 0, O2); ! __ ldx(subkeyH, 8, O3); ! ! __ xor3(O0, O4, O0); ! __ xor3(O1, O5, O1); ! ! __ xmulxhi(O0, O3, G3); ! __ xmulx(O0, O2, O5); ! __ xmulxhi(O1, O2, G4); ! __ xmulxhi(O1, O3, G5); ! __ xmulx(O0, O3, G1); ! __ xmulx(O1, O3, G2); ! __ xmulx(O1, O2, O3); ! __ xmulxhi(O0, O2, O4); ! ! __ mov(0xE1, O0); ! __ sllx(O0, 56, O0); ! ! __ xor3(O5, G3, O5); ! __ xor3(O5, G4, O5); ! __ xor3(G5, G1, G1); ! __ xor3(G1, O3, G1); ! __ srlx(G2, 63, O1); ! __ srlx(G1, 63, G3); ! __ sllx(G2, 63, O3); ! __ sllx(G2, 58, O2); ! __ xor3(O3, O2, O2); ! ! __ sllx(G1, 1, G1); ! __ or3(G1, O1, G1); ! ! __ xor3(G1, O2, G1); ! ! __ sllx(G2, 1, G2); ! ! __ xmulxhi(G1, O0, O1); ! __ xmulx(G1, O0, O2); ! __ xmulxhi(G2, O0, O3); ! __ xmulx(G2, O0, G1); ! ! __ xor3(O4, O1, O4); ! __ xor3(O5, O2, O5); ! __ xor3(O5, O3, O5); ! ! __ sllx(O4, 1, O2); ! __ srlx(O5, 63, O3); ! ! __ or3(O2, O3, O0); ! ! __ sllx(O5, 1, O1); ! __ srlx(G1, 63, O2); ! __ or3(O1, O2, O1); ! __ xor3(O1, G3, O1); ! ! __ deccc(len); ! __ br(Assembler::notZero, true, Assembler::pt, L_ghash_loop); ! __ delayed()->add(data, 16, data); ! ! __ stx(O0, I0, 0); ! __ stx(O1, I0, 8); ! ! __ ret(); ! __ delayed()->restore(); ! ! return start; ! } ! ! void generate_initial() { // Generates all stubs and initializes the entry points //------------------------------------------------------------------------------------------------------------------------ // entry points that exist in all platforms // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
*** 4859,4868 **** --- 4983,4997 ---- StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); } + // generate GHASH intrinsics code + if (UseGHASHIntrinsics) { + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + } + // generate SHA1/SHA256/SHA512 intrinsics code if (UseSHA1Intrinsics) { StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); }
< prev index next >