diff a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -2674,20 +2674,27 @@ #undef MSG #undef INSN -void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index) + void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index) { starti; assert(T == T8B || T == T16B, "invalid arrangement"); assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value"); f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21); rf(Vm, 16), f(0, 15), f(index, 14, 11); f(0, 10), rf(Vn, 5), rf(Vd, 0); } + void sve_inc(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { + starti; + assert(T != Q, "invalid size"); + f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20); + f(imm4 - 1, 19, 16), f(0b111000, 15, 10), f(pattern, 9, 5), rf(Xdn, 0); + } + Assembler(CodeBuffer* code) : AbstractAssembler(code) { } virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, diff a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp --- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp @@ -97,10 +97,13 @@ "Use simpliest and shortest implementation for array equals") \ product(bool, AvoidUnalignedAccesses, false, \ "Avoid generating unaligned memory accesses") \ product(bool, UseLSE, false, \ "Use LSE instructions") \ + product(uint, UseSVE, 0, \ + "Highest supported SVE instruction set version") \ + range(0, 2) \ product(bool, UseBlockZeroing, true, \ "Use DC ZVA for block zeroing") \ product(intx, BlockZeroingLowLimit, 256, \ "Minimum size in bytes when block zeroing will be used") \ range(1, max_jint) \ diff a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -5223,5 +5223,16 @@ // we only need a barrier post sync if (!is_pre) { membar(Assembler::AnyAny); } } + +void MacroAssembler::verify_sve_vector_length() { + Label verify_ok; + assert(UseSVE > 0, "should only be used for SVE"); + movw(rscratch1, zr); + sve_inc(rscratch1, B); + subsw(zr, rscratch1, VM_Version::get_initial_sve_vector_length()); + br(EQ, verify_ok); + stop("Error: SVE vector length has changed since jvm startup"); + bind(verify_ok); +} diff a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -967,10 +967,11 @@ Label* L_fast_path = NULL, Label* L_slow_path = NULL); Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + void verify_sve_vector_length(); // Debugging // only if +VerifyOops void verify_oop(Register reg, const char* s = "broken oop"); diff a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -1840,10 +1840,15 @@ __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset())); // Force this write out before the read below __ dmb(Assembler::ISH); + if (UseSVE > 0) { + // Make sure that jni code does not change SVE vector length. + __ verify_sve_vector_length(); + } + // check for safepoint operation in progress and/or pending suspend requests Label safepoint_in_progress, safepoint_in_progress_done; { __ safepoint_poll_acquire(safepoint_in_progress); __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset())); diff a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -1370,10 +1370,15 @@ // pushes change or anything else is added to the stack then the code in // interpreter_frame_result must also change. __ push(dtos); __ push(ltos); + if (UseSVE > 0) { + // Make sure that jni code does not change SVE vector length. + __ verify_sve_vector_length(); + } + // change thread state __ mov(rscratch1, _thread_in_native_trans); __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); __ stlrw(rscratch1, rscratch2); diff a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp @@ -29,16 +29,18 @@ #include "memory/resourceArea.hpp" #include "runtime/java.hpp" #include "runtime/os.hpp" #include "runtime/stubCodeGenerator.hpp" #include "runtime/vm_version.hpp" +#include "utilities/formatBuffer.hpp" #include "utilities/macros.hpp" #include OS_HEADER_INLINE(os) -#include #include +#include +#include #ifndef HWCAP_AES #define HWCAP_AES (1<<3) #endif @@ -64,17 +66,32 @@ #ifndef HWCAP_SHA512 #define HWCAP_SHA512 (1 << 21) #endif +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif + +#ifndef HWCAP2_SVE2 +#define HWCAP2_SVE2 (1 << 1) +#endif + +#ifndef PR_SVE_GET_VL +// For old toolchains which do not have SVE related macros defined. +#define PR_SVE_SET_VL 50 +#define PR_SVE_GET_VL 51 +#endif + int VM_Version::_cpu; int VM_Version::_model; int VM_Version::_model2; int VM_Version::_variant; int VM_Version::_revision; int VM_Version::_stepping; bool VM_Version::_dcpop; +int VM_Version::_initial_sve_vector_length; VM_Version::PsrInfo VM_Version::_psr_info = { 0, }; static BufferBlob* stub_blob; static const int stub_size = 550; @@ -113,11 +130,10 @@ return start; } }; - void VM_Version::get_processor_features() { _supports_cx8 = true; _supports_atomic_getset4 = true; _supports_atomic_getadd4 = true; _supports_atomic_getset8 = true; @@ -164,10 +180,11 @@ warning("SoftwarePrefetchHintDistance must be -1, or a multiple of 8"); SoftwarePrefetchHintDistance &= ~7; } uint64_t auxv = getauxval(AT_HWCAP); + uint64_t auxv2 = getauxval(AT_HWCAP2); char buf[512]; _features = auxv; @@ -289,10 +306,12 @@ if (auxv & HWCAP_AES) strcat(buf, ", aes"); if (auxv & HWCAP_SHA1) strcat(buf, ", sha1"); if (auxv & HWCAP_SHA2) strcat(buf, ", sha256"); if (auxv & HWCAP_SHA512) strcat(buf, ", sha512"); if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse"); + if (auxv & HWCAP_SVE) strcat(buf, ", sve"); + if (auxv2 & HWCAP2_SVE2) strcat(buf, ", sve2"); _features_string = os::strdup(buf); if (FLAG_IS_DEFAULT(UseCRC32)) { UseCRC32 = (auxv & HWCAP_CRC32) != 0; @@ -428,10 +447,22 @@ } else if (UseBlockZeroing) { warning("DC ZVA is not available on this CPU"); FLAG_SET_DEFAULT(UseBlockZeroing, false); } + if (auxv & HWCAP_SVE) { + if (FLAG_IS_DEFAULT(UseSVE)) { + FLAG_SET_DEFAULT(UseSVE, (auxv2 & HWCAP2_SVE2) ? 2 : 1); + } + if (UseSVE > 0) { + _initial_sve_vector_length = prctl(PR_SVE_GET_VL); + } + } else if (UseSVE > 0) { + warning("UseSVE specified, but not supported on current CPU. Disabling SVE."); + FLAG_SET_DEFAULT(UseSVE, 0); + } + // This machine allows unaligned memory accesses if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { FLAG_SET_DEFAULT(UseUnalignedAccesses, true); } @@ -462,10 +493,54 @@ } if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { UseMontgomerySquareIntrinsic = true; } + if (UseSVE > 0) { + if (FLAG_IS_DEFAULT(MaxVectorSize)) { + MaxVectorSize = _initial_sve_vector_length; + } else if (MaxVectorSize < 16) { + warning("SVE does not support vector length less than 16 bytes. Disabling SVE."); + UseSVE = 0; + } else if ((MaxVectorSize % 16) == 0 && is_power_of_2(MaxVectorSize)) { + int new_vl = prctl(PR_SVE_SET_VL, MaxVectorSize); + _initial_sve_vector_length = new_vl; + // If MaxVectorSize is larger than system largest supported SVE vector length, above prctl() + // call will set task vector length to the system largest supported value. So, we also update + // MaxVectorSize to that largest supported value. + if (new_vl < 0) { + vm_exit_during_initialization( + err_msg("Current system does not support SVE vector length for MaxVectorSize: %d", + (int)MaxVectorSize)); + } else if (new_vl != MaxVectorSize) { + warning("Current system only supports max SVE vector length %d. Set MaxVectorSize to %d", + new_vl, new_vl); + } + MaxVectorSize = new_vl; + } else { + vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize)); + } + } + + if (UseSVE == 0) { // NEON + int min_vector_size = 8; + int max_vector_size = 16; + if (!FLAG_IS_DEFAULT(MaxVectorSize)) { + if (!is_power_of_2(MaxVectorSize)) { + vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize)); + } else if (MaxVectorSize < min_vector_size) { + warning("MaxVectorSize must be at least %i on this platform", min_vector_size); + FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); + } else if (MaxVectorSize > max_vector_size) { + warning("MaxVectorSize must be at most %i on this platform", max_vector_size); + FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); + } + } else { + FLAG_SET_DEFAULT(MaxVectorSize, 16); + } + } + if (FLAG_IS_DEFAULT(OptoScheduling)) { OptoScheduling = true; } if (FLAG_IS_DEFAULT(AlignVector)) { diff a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp @@ -39,10 +39,12 @@ static int _model2; static int _variant; static int _revision; static int _stepping; static bool _dcpop; + static int _initial_sve_vector_length; + struct PsrInfo { uint32_t dczid_el0; uint32_t ctr_el0; }; static PsrInfo _psr_info; @@ -104,10 +106,11 @@ static int cpu_model() { return _model; } static int cpu_model2() { return _model2; } static int cpu_variant() { return _variant; } static int cpu_revision() { return _revision; } static bool supports_dcpop() { return _dcpop; } + static int get_initial_sve_vector_length() { return _initial_sve_vector_length; }; static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); } static ByteSize ctr_el0_offset() { return byte_offset_of(PsrInfo, ctr_el0); } static bool is_zva_enabled() { // Check the DZP bit (bit 4) of dczid_el0 is zero // and block size (bit 0~3) is not zero. diff a/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java @@ -0,0 +1,128 @@ +/* +* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +* Copyright (c) 2020, Arm Ltd. All rights reserved. +* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +* +* This code is free software; you can redistribute it and/or modify it +* under the terms of the GNU General Public License version 2 only, as +* published by the Free Software Foundation. +* +* This code is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +* version 2 for more details (a copy is included in the LICENSE file that +* accompanied this code). +* +* You should have received a copy of the GNU General Public License version +* 2 along with this work; if not, write to the Free Software Foundation, +* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +* +* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +* or visit www.oracle.com if you need additional information or have any +* questions. +* +*/ + +/** + * @test + * + * @requires os.arch == "aarch64" & vm.compiler2.enabled + * @summary Verify VM SVE checking behavior + * @library /test/lib + * @run main/othervm/native compiler.c2.aarch64.TestSVEWithJNI + * + */ + +package compiler.c2.aarch64; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import jdk.test.lib.process.ProcessTools; +import jdk.test.lib.process.OutputAnalyzer; + +public class TestSVEWithJNI { + static { + System.loadLibrary("TestSVEWithJNI"); + } + + static final int EXIT_CODE = 99; + // Returns a nonnegative on success, or a negative value on error. + public static native int setVectorLength(int arg); + // Returns a nonnegative value on success, or a negative value on error. + public static native int getVectorLength(); + + public static final String MSG = "Current Vector Size: "; + public static void testNormal() { + int vlen = getVectorLength(); + System.out.println(MSG + vlen); + // Should be fine if no vector length changed. + if (setVectorLength(vlen) < 0) { + throw new Error("Error in setting vector length."); + } + } + + public static void testAbort() { + int vlen = getVectorLength(); + if (vlen <= 16) { + throw new Error("Error: unsupported vector length."); + } + if (setVectorLength(16) < 0) { + throw new Error("Error: setting vector length failed."); + } + } + + public static ProcessBuilder createProcessBuilder(String [] args, String mode) { + List vmopts = new ArrayList<>(); + String testjdkPath = System.getProperty("test.jdk"); + Collections.addAll(vmopts, "-Dtest.jdk=" + testjdkPath); + Collections.addAll(vmopts, args); + Collections.addAll(vmopts, TestSVEWithJNI.class.getName(), mode); + return ProcessTools.createJavaProcessBuilder(vmopts.toArray(new String[vmopts.size()])); + } + + public static void main(String [] args) throws Exception { + if (args.length == 0) { + int vlen = getVectorLength(); + if (vlen < 0) { + return; + } + String [][] testOpts = { + {"-Xint", "-XX:UseSVE=1"}, + {"-Xcomp", "-XX:UseSVE=1"}, + }; + ProcessBuilder pb; + OutputAnalyzer output; + for (String [] opts : testOpts) { + pb = createProcessBuilder(opts, "normal"); + output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(EXIT_CODE); + + pb = createProcessBuilder(opts, "abort"); + output = new OutputAnalyzer(pb.start()); + output.shouldNotHaveExitValue(EXIT_CODE); + output.shouldMatch("(error|Error|ERROR)"); + } + + // Verify MaxVectorSize + + // Any SVE architecture should support 128-bit vector size. + pb = createProcessBuilder(new String []{"-XX:UseSVE=1", "-XX:MaxVectorSize=16"}, "normal"); + output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(EXIT_CODE); + output.shouldContain(MSG + 16); + + // An unsupported large vector size value. + pb = createProcessBuilder(new String []{"-XX:UseSVE=1", "-XX:MaxVectorSize=512"}, "normal"); + output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(EXIT_CODE); + output.shouldContain("warning"); + } else if (args[0].equals("normal")) { + testNormal(); + System.exit(EXIT_CODE); + } else if (args[0].equals("abort")) { + testAbort(); + System.exit(EXIT_CODE); + } + } +} diff a/test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c b/test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c @@ -0,0 +1,68 @@ +/* +* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +* Copyright (c) 2020, Arm Ltd. All rights reserved. +* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +* +* This code is free software; you can redistribute it and/or modify it +* under the terms of the GNU General Public License version 2 only, as +* published by the Free Software Foundation. +* +* This code is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +* version 2 for more details (a copy is included in the LICENSE file that +* accompanied this code). +* +* You should have received a copy of the GNU General Public License version +* 2 along with this work; if not, write to the Free Software Foundation, +* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +* +* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +* or visit www.oracle.com if you need additional information or have any +* questions. +* +*/ + +#ifdef __aarch64__ + +#include +#include +#include +#include +#include +#include + +#ifndef PR_SVE_GET_VL +// For old toolchains which do not have SVE related macros defined. +#define PR_SVE_SET_VL 50 +#define PR_SVE_GET_VL 51 +#endif + +int get_current_thread_vl() { + return prctl(PR_SVE_GET_VL); +} + +int set_current_thread_vl(unsigned long arg) { + return prctl(PR_SVE_SET_VL, arg); +} + +#ifdef __cplusplus +extern "C" { +#endif + +JNIEXPORT jint JNICALL Java_compiler_c2_aarch64_TestSVEWithJNI_setVectorLength +(JNIEnv * env, jclass clz, jint length) { + return set_current_thread_vl(length); +} + +JNIEXPORT jint JNICALL Java_compiler_c2_aarch64_TestSVEWithJNI_getVectorLength +(JNIEnv *env, jclass clz) { + return get_current_thread_vl(); +} + + +#ifdef __cplusplus +} +#endif + +#endif