# HG changeset patch # User dongbo # Date 1619091196 0 # Thu Apr 22 11:33:16 2021 +0000 # Node ID 4b2ae0a47349fac898a9eb43b2c8783cd6dcbdc2 # Parent ac57289f6124f7f9a76f1f796174fadbcea30676 8255625: AArch64: Implement Base64.encodeBlock accelerator/intrinsic Reviewed-by: aph diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -4730,6 +4730,150 @@ return start; } + void generate_base64_encode_simdround(Register src, Register dst, + FloatRegister codec, u8 size) { + + FloatRegister in0 = v4, in1 = v5, in2 = v6; + FloatRegister out0 = v16, out1 = v17, out2 = v18, out3 = v19; + FloatRegister ind0 = v20, ind1 = v21, ind2 = v22, ind3 = v23; + + Assembler::SIMD_Arrangement arrangement = size == 16 ? __ T16B : __ T8B; + + __ ld3(in0, in1, in2, arrangement, __ post(src, 3 * size)); + + __ ushr(ind0, arrangement, in0, 2); + + __ ushr(ind1, arrangement, in1, 2); + __ shl(in0, arrangement, in0, 6); + __ orr(ind1, arrangement, ind1, in0); + __ ushr(ind1, arrangement, ind1, 2); + + __ ushr(ind2, arrangement, in2, 4); + __ shl(in1, arrangement, in1, 4); + __ orr(ind2, arrangement, in1, ind2); + __ ushr(ind2, arrangement, ind2, 2); + + __ shl(ind3, arrangement, in2, 2); + __ ushr(ind3, arrangement, ind3, 2); + + __ tbl(out0, arrangement, codec, 4, ind0); + __ tbl(out1, arrangement, codec, 4, ind1); + __ tbl(out2, arrangement, codec, 4, ind2); + __ tbl(out3, arrangement, codec, 4, ind3); + + __ st4(out0, out1, out2, out3, arrangement, __ post(dst, 4 * size)); + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - src_start + * c_rarg1 - src_offset + * c_rarg2 - src_length + * c_rarg3 - dest_start + * c_rarg4 - dest_offset + * c_rarg5 - isURL + * + */ + address generate_base64_encodeBlock() { + + static const char toBase64[64] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' + }; + + static const char toBase64URL[64] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' + }; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "encodeBlock"); + address start = __ pc(); + + Register src = c_rarg0; // source array + Register soff = c_rarg1; // source start offset + Register send = c_rarg2; // source end offset + Register dst = c_rarg3; // dest array + Register doff = c_rarg4; // position for writing to dest array + Register isURL = c_rarg5; // Base64 or URL chracter set + + // c_rarg6 and c_rarg7 are free to use as temps + Register codec = c_rarg6; + Register length = c_rarg7; + + Label ProcessData, Process48B, Process24B, Process3B, SIMDExit, Exit; + + __ add(src, src, soff); + __ add(dst, dst, doff); + __ sub(length, send, soff); + + // load the codec base address + __ lea(codec, ExternalAddress((address) toBase64)); + __ cbz(isURL, ProcessData); + __ lea(codec, ExternalAddress((address) toBase64URL)); + + __ BIND(ProcessData); + + // too short to formup a SIMD loop, roll back + __ cmp(length, (u1)24); + __ br(Assembler::LT, Process3B); + + __ ld1(v0, v1, v2, v3, __ T16B, Address(codec)); + + __ BIND(Process48B); + __ cmp(length, (u1)48); + __ br(Assembler::LT, Process24B); + generate_base64_encode_simdround(src, dst, v0, 16); + __ sub(length, length, 48); + __ b(Process48B); + + __ BIND(Process24B); + __ cmp(length, (u1)24); + __ br(Assembler::LT, SIMDExit); + generate_base64_encode_simdround(src, dst, v0, 8); + __ sub(length, length, 24); + + __ BIND(SIMDExit); + __ cbz(length, Exit); + + __ BIND(Process3B); + // 3 src bytes, 24 bits + __ ldrb(r10, __ post(src, 1)); + __ ldrb(r11, __ post(src, 1)); + __ ldrb(r12, __ post(src, 1)); + __ orrw(r11, r11, r10, Assembler::LSL, 8); + __ orrw(r12, r12, r11, Assembler::LSL, 8); + // codec index + __ ubfmw(r15, r12, 18, 23); + __ ubfmw(r14, r12, 12, 17); + __ ubfmw(r13, r12, 6, 11); + __ andw(r12, r12, 63); + // get the code based on the codec + __ ldrb(r15, Address(codec, r15, Address::uxtw(0))); + __ ldrb(r14, Address(codec, r14, Address::uxtw(0))); + __ ldrb(r13, Address(codec, r13, Address::uxtw(0))); + __ ldrb(r12, Address(codec, r12, Address::uxtw(0))); + __ strb(r15, __ post(dst, 1)); + __ strb(r14, __ post(dst, 1)); + __ strb(r13, __ post(dst, 1)); + __ strb(r12, __ post(dst, 1)); + __ sub(length, length, 3); + __ cbnz(length, Process3B); + + __ BIND(Exit); + __ ret(lr); + + return start; + } + // Continuation point for throwing of implicit exceptions that are // not handled in the current activation. Fabricates an exception // oop and initiates normal exception dispatching in this @@ -5783,6 +5927,10 @@ StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); } + if (UseBASE64Intrinsics) { + StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock(); + } + if (UseAESIntrinsics) { StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp @@ -374,6 +374,10 @@ FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); } + if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { + UseBASE64Intrinsics = true; + } + if (is_zva_enabled()) { if (FLAG_IS_DEFAULT(UseBlockZeroing)) { FLAG_SET_DEFAULT(UseBlockZeroing, true); diff --git a/test/micro/org/openjdk/bench/java/util/Base64Encode.java b/test/micro/org/openjdk/bench/java/util/Base64Encode.java new file mode 100644 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/util/Base64Encode.java @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.micro.bench.java.util; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.Base64; +import java.util.Random; +import java.util.ArrayList; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +public class Base64Encode { + + private Base64.Encoder encoder; + private ArrayList unencoded; + private byte[] encoded; + + private static final int TESTSIZE = 1000; + + @Param({"1", "2", "3", "6", "7", "9", "10", "48", "512", "1000", "20000"}) + private int maxNumBytes; + + @Setup + public void setup() { + Random r = new Random(1123); + + int dstLen = ((maxNumBytes + 16) / 3) * 4; + + encoder = Base64.getEncoder(); + unencoded = new ArrayList (); + encoded = new byte[dstLen]; + + for (int i = 0; i < TESTSIZE; i++) { + int srcLen = 1 + r.nextInt(maxNumBytes); + byte[] src = new byte[srcLen]; + r.nextBytes(src); + unencoded.add(src); + } + } + + @Benchmark + @OperationsPerInvocation(TESTSIZE) + public void testBase64Encode(Blackhole bh) { + for (byte[] s : unencoded) { + encoder.encode(s, encoded); + bh.consume(encoded); + } + } +}