< prev index next >

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Print this page
rev 54140 : 8255625: AArch64: Implement Base64.encodeBlock accelerator/intrinsic
Reviewed-by: aph

*** 4728,4737 **** --- 4728,4881 ---- __ ret(lr); return start; } + void generate_base64_encode_simdround(Register src, Register dst, + FloatRegister codec, u8 size) { + + FloatRegister in0 = v4, in1 = v5, in2 = v6; + FloatRegister out0 = v16, out1 = v17, out2 = v18, out3 = v19; + FloatRegister ind0 = v20, ind1 = v21, ind2 = v22, ind3 = v23; + + Assembler::SIMD_Arrangement arrangement = size == 16 ? __ T16B : __ T8B; + + __ ld3(in0, in1, in2, arrangement, __ post(src, 3 * size)); + + __ ushr(ind0, arrangement, in0, 2); + + __ ushr(ind1, arrangement, in1, 2); + __ shl(in0, arrangement, in0, 6); + __ orr(ind1, arrangement, ind1, in0); + __ ushr(ind1, arrangement, ind1, 2); + + __ ushr(ind2, arrangement, in2, 4); + __ shl(in1, arrangement, in1, 4); + __ orr(ind2, arrangement, in1, ind2); + __ ushr(ind2, arrangement, ind2, 2); + + __ shl(ind3, arrangement, in2, 2); + __ ushr(ind3, arrangement, ind3, 2); + + __ tbl(out0, arrangement, codec, 4, ind0); + __ tbl(out1, arrangement, codec, 4, ind1); + __ tbl(out2, arrangement, codec, 4, ind2); + __ tbl(out3, arrangement, codec, 4, ind3); + + __ st4(out0, out1, out2, out3, arrangement, __ post(dst, 4 * size)); + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - src_start + * c_rarg1 - src_offset + * c_rarg2 - src_length + * c_rarg3 - dest_start + * c_rarg4 - dest_offset + * c_rarg5 - isURL + * + */ + address generate_base64_encodeBlock() { + + static const char toBase64[64] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' + }; + + static const char toBase64URL[64] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' + }; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "encodeBlock"); + address start = __ pc(); + + Register src = c_rarg0; // source array + Register soff = c_rarg1; // source start offset + Register send = c_rarg2; // source end offset + Register dst = c_rarg3; // dest array + Register doff = c_rarg4; // position for writing to dest array + Register isURL = c_rarg5; // Base64 or URL chracter set + + // c_rarg6 and c_rarg7 are free to use as temps + Register codec = c_rarg6; + Register length = c_rarg7; + + Label ProcessData, Process48B, Process24B, Process3B, SIMDExit, Exit; + + __ add(src, src, soff); + __ add(dst, dst, doff); + __ sub(length, send, soff); + + // load the codec base address + __ lea(codec, ExternalAddress((address) toBase64)); + __ cbz(isURL, ProcessData); + __ lea(codec, ExternalAddress((address) toBase64URL)); + + __ BIND(ProcessData); + + // too short to formup a SIMD loop, roll back + __ cmp(length, (u1)24); + __ br(Assembler::LT, Process3B); + + __ ld1(v0, v1, v2, v3, __ T16B, Address(codec)); + + __ BIND(Process48B); + __ cmp(length, (u1)48); + __ br(Assembler::LT, Process24B); + generate_base64_encode_simdround(src, dst, v0, 16); + __ sub(length, length, 48); + __ b(Process48B); + + __ BIND(Process24B); + __ cmp(length, (u1)24); + __ br(Assembler::LT, SIMDExit); + generate_base64_encode_simdround(src, dst, v0, 8); + __ sub(length, length, 24); + + __ BIND(SIMDExit); + __ cbz(length, Exit); + + __ BIND(Process3B); + // 3 src bytes, 24 bits + __ ldrb(r10, __ post(src, 1)); + __ ldrb(r11, __ post(src, 1)); + __ ldrb(r12, __ post(src, 1)); + __ orrw(r11, r11, r10, Assembler::LSL, 8); + __ orrw(r12, r12, r11, Assembler::LSL, 8); + // codec index + __ ubfmw(r15, r12, 18, 23); + __ ubfmw(r14, r12, 12, 17); + __ ubfmw(r13, r12, 6, 11); + __ andw(r12, r12, 63); + // get the code based on the codec + __ ldrb(r15, Address(codec, r15, Address::uxtw(0))); + __ ldrb(r14, Address(codec, r14, Address::uxtw(0))); + __ ldrb(r13, Address(codec, r13, Address::uxtw(0))); + __ ldrb(r12, Address(codec, r12, Address::uxtw(0))); + __ strb(r15, __ post(dst, 1)); + __ strb(r14, __ post(dst, 1)); + __ strb(r13, __ post(dst, 1)); + __ strb(r12, __ post(dst, 1)); + __ sub(length, length, 3); + __ cbnz(length, Process3B); + + __ BIND(Exit); + __ ret(lr); + + return start; + } + // Continuation point for throwing of implicit exceptions that are // not handled in the current activation. Fabricates an exception // oop and initiates normal exception dispatching in this // frame. Since we need to preserve callee-saved values (currently // only for C2, but done for C1 as well) we need a callee-saved oop
*** 5781,5790 **** --- 5925,5938 ---- // generate GHASH intrinsics code if (UseGHASHIntrinsics) { StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); } + if (UseBASE64Intrinsics) { + StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock(); + } + if (UseAESIntrinsics) { StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
< prev index next >