--- old/doc/building.html 2018-09-17 10:29:14.910978100 -0400 +++ new/doc/building.html 2018-09-17 10:29:14.280941301 -0400 @@ -707,7 +707,6 @@

Additional architectures might be supported by Debian/Ubuntu Ports.

Building for ARM/aarch64

A common cross-compilation target is the ARM CPU. When building for ARM, it is useful to set the ABI profile. A number of pre-defined ABI profiles are available using --with-abi-profile: arm-vfp-sflt, arm-vfp-hflt, arm-sflt, armv5-vfp-sflt, armv6-vfp-hflt. Note that soft-float ABIs are no longer properly supported by the JDK.

-

The JDK contains two different ports for the aarch64 platform, one is the original aarch64 port from the AArch64 Port Project and one is a 64-bit version of the Oracle contributed ARM port. When targeting aarch64, by the default the original aarch64 port is used. To select the Oracle ARM 64 port, use --with-cpu-port=arm64. Also set the corresponding value (aarch64 or arm64) to --with-abi-profile, to ensure a consistent build.

Verifying the Build

The build will end up in a directory named like build/linux-arm-normal-server-release.

Inside this build output directory, the images/jdk will contain the newly built JDK, for your target system.

--- old/doc/building.md 2018-09-17 10:29:16.487070156 -0400 +++ new/doc/building.md 2018-09-17 10:29:15.853033123 -0400 @@ -1080,14 +1080,6 @@ armv5-vfp-sflt, armv6-vfp-hflt. Note that soft-float ABIs are no longer properly supported by the JDK. -The JDK contains two different ports for the aarch64 platform, one is the -original aarch64 port from the [AArch64 Port Project]( -http://openjdk.java.net/projects/aarch64-port) and one is a 64-bit version of -the Oracle contributed ARM port. When targeting aarch64, by the default the -original aarch64 port is used. To select the Oracle ARM 64 port, use -`--with-cpu-port=arm64`. Also set the corresponding value (`aarch64` or -`arm64`) to --with-abi-profile, to ensure a consistent build. - ### Verifying the Build The build will end up in a directory named like --- old/make/autoconf/flags-cflags.m4 2018-09-17 10:29:18.059161978 -0400 +++ new/make/autoconf/flags-cflags.m4 2018-09-17 10:29:17.426125004 -0400 @@ -724,10 +724,6 @@ # -Wno-psabi to get rid of annoying "note: the mangling of 'va_list' has changed in GCC 4.4" $1_CFLAGS_CPU="-fsigned-char -Wno-psabi $ARM_ARCH_TYPE_FLAGS $ARM_FLOAT_TYPE_FLAGS -DJDK_ARCH_ABI_PROP_NAME='\"\$(JDK_ARCH_ABI_PROP_NAME)\"'" $1_CFLAGS_CPU_JVM="-DARM" - elif test "x$FLAGS_CPU" = xaarch64; then - if test "x$HOTSPOT_TARGET_CPU_PORT" = xarm64; then - $1_CFLAGS_CPU_JVM="-fsigned-char -DARM" - fi elif test "x$FLAGS_CPU_ARCH" = xppc; then $1_CFLAGS_CPU_JVM="-minsert-sched-nops=regroup_exact -mno-multiple -mno-string" if test "x$FLAGS_CPU" = xppc64; then --- old/make/autoconf/flags-ldflags.m4 2018-09-17 10:29:19.641254384 -0400 +++ new/make/autoconf/flags-ldflags.m4 2018-09-17 10:29:19.008217410 -0400 @@ -163,10 +163,6 @@ elif test "x$OPENJDK_$1_CPU" = xarm; then $1_CPU_LDFLAGS_JVM_ONLY="${$1_CPU_LDFLAGS_JVM_ONLY} -fsigned-char" $1_CPU_LDFLAGS="$ARM_ARCH_TYPE_FLAGS $ARM_FLOAT_TYPE_FLAGS" - elif test "x$FLAGS_CPU" = xaarch64; then - if test "x$HOTSPOT_TARGET_CPU_PORT" = xarm64; then - $1_CPU_LDFLAGS_JVM_ONLY="${$1_CPU_LDFLAGS_JVM_ONLY} -fsigned-char" - fi fi elif test "x$TOOLCHAIN_TYPE" = xsolstudio; then --- old/make/autoconf/flags.m4 2018-09-17 10:29:21.204345681 -0400 +++ new/make/autoconf/flags.m4 2018-09-17 10:29:20.575308940 -0400 @@ -34,7 +34,7 @@ AC_DEFUN([FLAGS_SETUP_ABI_PROFILE], [ AC_ARG_WITH(abi-profile, [AS_HELP_STRING([--with-abi-profile], - [specify ABI profile for ARM builds (arm-vfp-sflt,arm-vfp-hflt,arm-sflt, armv5-vfp-sflt,armv6-vfp-hflt,arm64,aarch64) @<:@toolchain dependent@:>@ ])]) + [specify ABI profile for ARM builds (arm-vfp-sflt,arm-vfp-hflt,arm-sflt, armv5-vfp-sflt,armv6-vfp-hflt,aarch64) @<:@toolchain dependent@:>@ ])]) if test "x$with_abi_profile" != x; then if test "x$OPENJDK_TARGET_CPU" != xarm && \ @@ -61,10 +61,6 @@ elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarmv6-vfp-hflt; then ARM_FLOAT_TYPE=vfp-hflt ARM_ARCH_TYPE_FLAGS='-march=armv6 -marm' - elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarm64; then - # No special flags, just need to trigger setting JDK_ARCH_ABI_PROP_NAME - ARM_FLOAT_TYPE= - ARM_ARCH_TYPE_FLAGS= elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xaarch64; then # No special flags, just need to trigger setting JDK_ARCH_ABI_PROP_NAME ARM_FLOAT_TYPE= --- old/make/autoconf/hotspot.m4 2018-09-17 10:29:22.768437035 -0400 +++ new/make/autoconf/hotspot.m4 2018-09-17 10:29:22.133399945 -0400 @@ -72,8 +72,6 @@ AC_ARG_WITH([jvm-variants], [AS_HELP_STRING([--with-jvm-variants], [JVM variants (separated by commas) to build (server,client,minimal,core,zero,custom) @<:@server@:>@])]) - SETUP_HOTSPOT_TARGET_CPU_PORT - if test "x$with_jvm_variants" = x; then with_jvm_variants="server" fi @@ -307,9 +305,6 @@ if test "x$OPENJDK_TARGET_CPU" = xarm; then HOTSPOT_TARGET_CPU=arm_32 HOTSPOT_TARGET_CPU_DEFINE="ARM32" - elif test "x$OPENJDK_TARGET_CPU" = xaarch64 && test "x$HOTSPOT_TARGET_CPU_PORT" = xarm64; then - HOTSPOT_TARGET_CPU=arm_64 - HOTSPOT_TARGET_CPU_ARCH=arm fi # Verify that dependencies are met for explicitly set features. @@ -517,6 +512,9 @@ # Used for verification of Makefiles by check-jvm-feature AC_SUBST(VALID_JVM_FEATURES) + + # --with-cpu-port is no longer supported + BASIC_DEPRECATED_ARG_WITH(with-cpu-port) ]) ############################################################################### @@ -554,31 +552,6 @@ ]) ################################################################################ -# -# Specify which sources will be used to build the 64-bit ARM port -# -# --with-cpu-port=arm64 will use hotspot/src/cpu/arm -# --with-cpu-port=aarch64 will use hotspot/src/cpu/aarch64 -# -AC_DEFUN([SETUP_HOTSPOT_TARGET_CPU_PORT], -[ - AC_ARG_WITH(cpu-port, [AS_HELP_STRING([--with-cpu-port], - [specify sources to use for Hotspot 64-bit ARM port (arm64,aarch64) @<:@aarch64@:>@ ])]) - - if test "x$with_cpu_port" != x; then - if test "x$OPENJDK_TARGET_CPU" != xaarch64; then - AC_MSG_ERROR([--with-cpu-port only available on aarch64]) - fi - if test "x$with_cpu_port" != xarm64 && \ - test "x$with_cpu_port" != xaarch64; then - AC_MSG_ERROR([--with-cpu-port must specify arm64 or aarch64]) - fi - HOTSPOT_TARGET_CPU_PORT="$with_cpu_port" - fi -]) - - -################################################################################ # Check if gtest should be built # AC_DEFUN_ONCE([HOTSPOT_ENABLE_DISABLE_GTEST], --- old/make/conf/jib-profiles.js 2018-09-17 10:29:24.337528683 -0400 +++ new/make/conf/jib-profiles.js 2018-09-17 10:29:23.703491650 -0400 @@ -233,8 +233,7 @@ common.main_profile_names = [ "linux-x64", "linux-x86", "macosx-x64", "solaris-x64", "solaris-sparcv9", "windows-x64", "windows-x86", - "linux-aarch64", "linux-arm32", "linux-arm64", "linux-arm-vfp-hflt", - "linux-arm-vfp-hflt-dyn" + "linux-aarch64", "linux-arm32" ]; // These are the base setttings for all the main build profiles. @@ -440,20 +439,7 @@ dependencies: ["devkit", "build_devkit", "cups"], configure_args: [ "--openjdk-target=aarch64-linux-gnu", "--with-freetype=bundled", - "--disable-warnings-as-errors", "--with-cpu-port=aarch64", - ], - }, - - "linux-arm64": { - target_os: "linux", - target_cpu: "aarch64", - build_cpu: "x64", - dependencies: ["devkit", "build_devkit", "cups", "headless_stubs"], - configure_args: [ - "--with-cpu-port=arm64", - "--with-jvm-variants=server", - "--openjdk-target=aarch64-linux-gnu", - "--enable-headless-only" + "--disable-warnings-as-errors" ], }, @@ -467,30 +453,7 @@ "--with-abi-profile=arm-vfp-hflt", "--disable-warnings-as-errors" ], }, - - "linux-arm-vfp-hflt": { - target_os: "linux", - target_cpu: "arm", - build_cpu: "x64", - dependencies: ["devkit", "build_devkit", "cups"], - configure_args: [ - "--with-jvm-variants=minimal1,client", - "--with-x=" + input.get("devkit", "install_path") + "/arm-linux-gnueabihf/libc/usr/X11R6-PI", - "--with-fontconfig=" + input.get("devkit", "install_path") + "/arm-linux-gnueabihf/libc/usr/X11R6-PI", - "--openjdk-target=arm-linux-gnueabihf", - "--with-abi-profile=arm-vfp-hflt", - "--with-freetype=bundled" - ], - }, - - // Special version of the SE profile adjusted to be testable on arm64 hardware. - "linux-arm-vfp-hflt-dyn": { - configure_args: "--with-stdc++lib=dynamic" - } }; - // Let linux-arm-vfp-hflt-dyn inherit everything from linux-arm-vfp-hflt - profiles["linux-arm-vfp-hflt-dyn"] = concatObjects( - profiles["linux-arm-vfp-hflt-dyn"], profiles["linux-arm-vfp-hflt"]); // Add the base settings to all the main profiles common.main_profile_names.forEach(function (name) { @@ -618,15 +581,6 @@ }, "linux-arm32": { platform: "linux-arm32", - }, - "linux-arm64": { - platform: "linux-arm64-vfp-hflt", - }, - "linux-arm-vfp-hflt": { - platform: "linux-arm32-vfp-hflt", - }, - "linux-arm-vfp-hflt-dyn": { - platform: "linux-arm32-vfp-hflt-dyn", } } // Generate common artifacts for all main profiles @@ -842,16 +796,8 @@ solaris_x64: "SS12u4-Solaris11u1+1.0", solaris_sparcv9: "SS12u4-Solaris11u1+1.1", windows_x64: "VS2017-15.5.5+1.0", - linux_aarch64: (input.profile != null && input.profile.indexOf("arm64") >= 0 - ? "gcc-linaro-aarch64-linux-gnu-4.8-2013.11_linux+1.0" - : "gcc7.3.0-Fedora27+1.0"), - linux_arm: (input.profile != null && input.profile.indexOf("hflt") >= 0 - ? "gcc-linaro-arm-linux-gnueabihf-raspbian-2012.09-20120921_linux+1.0" - : (input.profile.indexOf("arm32") >= 0 - ? "gcc7.3.0-Fedora27+1.0" - : "arm-linaro-4.7+1.0" - ) - ) + linux_aarch64: "gcc7.3.0-Fedora27+1.0", + linux_arm: "gcc7.3.0-Fedora27+1.0" }; var devkit_platform = (input.target_cpu == "x86" --- old/make/hotspot/lib/CompileJvm.gmk 2018-09-17 10:29:25.923621322 -0400 +++ new/make/hotspot/lib/CompileJvm.gmk 2018-09-17 10:29:25.290584348 -0400 @@ -60,12 +60,6 @@ OPENJDK_TARGET_CPU_VM_VERSION := amd64 else ifeq ($(OPENJDK_TARGET_CPU), sparcv9) OPENJDK_TARGET_CPU_VM_VERSION := sparc -else ifeq ($(HOTSPOT_TARGET_CPU_ARCH), arm) - ifeq ($(OPENJDK_TARGET_CPU), aarch64) - # This sets the Oracle Aarch64 port to use arm64 - # while the original Aarch64 port uses aarch64 - OPENJDK_TARGET_CPU_VM_VERSION := arm64 - endif else OPENJDK_TARGET_CPU_VM_VERSION := $(OPENJDK_TARGET_CPU) endif --- old/src/hotspot/cpu/arm/abstractInterpreter_arm.cpp 2018-09-17 10:29:27.487712677 -0400 +++ new/src/hotspot/cpu/arm/abstractInterpreter_arm.cpp 2018-09-17 10:29:26.852675586 -0400 @@ -38,19 +38,6 @@ int AbstractInterpreter::BasicType_as_index(BasicType type) { int i = 0; switch (type) { -#ifdef AARCH64 - case T_BOOLEAN: i = 0; break; - case T_CHAR : i = 1; break; - case T_BYTE : i = 2; break; - case T_SHORT : i = 3; break; - case T_INT : // fall through - case T_LONG : // fall through - case T_VOID : // fall through - case T_FLOAT : // fall through - case T_DOUBLE : i = 4; break; - case T_OBJECT : // fall through - case T_ARRAY : i = 5; break; -#else case T_VOID : i = 0; break; case T_BOOLEAN: i = 1; break; case T_CHAR : i = 2; break; @@ -62,7 +49,6 @@ case T_LONG : i = 7; break; case T_FLOAT : i = 8; break; case T_DOUBLE : i = 9; break; -#endif // AARCH64 default : ShouldNotReachHere(); } assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds"); @@ -71,7 +57,7 @@ // How much stack a method activation needs in words. int AbstractInterpreter::size_top_interpreter_activation(Method* method) { - const int stub_code = AARCH64_ONLY(24) NOT_AARCH64(12); // see generate_call_stub + const int stub_code = 12; // see generate_call_stub // Save space for one monitor to get into the interpreted method in case // the method is synchronized int monitor_size = method->is_synchronized() ? @@ -108,9 +94,6 @@ (moncount*frame::interpreter_frame_monitor_size()) + tempcount*Interpreter::stackElementWords + extra_args; -#ifdef AARCH64 - size = align_up(size, StackAlignmentInBytes/BytesPerWord); -#endif // AARCH64 return size; } @@ -146,65 +129,7 @@ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) // and sender_sp is (fp + sender_sp_offset*wordSize) -#ifdef AARCH64 - intptr_t* locals; - if (caller->is_interpreted_frame()) { - // attach locals to the expression stack of caller interpreter frame - locals = caller->interpreter_frame_tos_address() + caller_actual_parameters*Interpreter::stackElementWords - 1; - } else { - assert (is_bottom_frame, "should be"); - locals = interpreter_frame->fp() + frame::sender_sp_offset + method->max_locals() - 1; - } - - if (TraceDeoptimization) { - tty->print_cr("layout_activation:"); - - if (caller->is_entry_frame()) { - tty->print("entry "); - } - if (caller->is_compiled_frame()) { - tty->print("compiled "); - } - if (caller->is_interpreted_frame()) { - tty->print("interpreted "); - } - tty->print_cr("caller: sp=%p, unextended_sp=%p, fp=%p, pc=%p", caller->sp(), caller->unextended_sp(), caller->fp(), caller->pc()); - tty->print_cr("interpreter_frame: sp=%p, unextended_sp=%p, fp=%p, pc=%p", interpreter_frame->sp(), interpreter_frame->unextended_sp(), interpreter_frame->fp(), interpreter_frame->pc()); - tty->print_cr("method: max_locals = %d, size_of_parameters = %d", method->max_locals(), method->size_of_parameters()); - tty->print_cr("caller_actual_parameters = %d", caller_actual_parameters); - tty->print_cr("locals = %p", locals); - } - -#ifdef ASSERT - if (caller_actual_parameters != method->size_of_parameters()) { - assert(caller->is_interpreted_frame(), "adjusted caller_actual_parameters, but caller is not interpreter frame"); - Bytecode_invoke inv(caller->interpreter_frame_method(), caller->interpreter_frame_bci()); - - if (is_bottom_frame) { - assert(caller_actual_parameters == 0, "invalid adjusted caller_actual_parameters value for bottom frame"); - assert(inv.is_invokedynamic() || inv.is_invokehandle(), "adjusted caller_actual_parameters for bottom frame, but not invokedynamic/invokehandle"); - } else { - assert(caller_actual_parameters == method->size_of_parameters()+1, "invalid adjusted caller_actual_parameters value"); - assert(!inv.is_invokedynamic() && MethodHandles::has_member_arg(inv.klass(), inv.name()), "adjusted caller_actual_parameters, but no member arg"); - } - } - if (caller->is_interpreted_frame()) { - intptr_t* locals_base = (locals - method->max_locals()*Interpreter::stackElementWords + 1); - locals_base = align_down(locals_base, StackAlignmentInBytes); - assert(interpreter_frame->sender_sp() <= locals_base, "interpreter-to-interpreter frame chaining"); - - } else if (caller->is_compiled_frame()) { - assert(locals + 1 <= caller->unextended_sp(), "compiled-to-interpreter frame chaining"); - - } else { - assert(caller->is_entry_frame(), "should be"); - assert(locals + 1 <= caller->fp(), "entry-to-interpreter frame chaining"); - } -#endif // ASSERT - -#else intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; -#endif // AARCH64 interpreter_frame->interpreter_frame_set_locals(locals); BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); @@ -215,44 +140,16 @@ intptr_t* stack_top = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - popframe_extra_args; -#ifdef AARCH64 - interpreter_frame->interpreter_frame_set_stack_top(stack_top); - - // We have to add extra reserved slots to max_stack. There are 3 users of the extra slots, - // none of which are at the same time, so we just need to make sure there is enough room - // for the biggest user: - // -reserved slot for exception handler - // -reserved slots for JSR292. Method::extra_stack_entries() is the size. - // -3 reserved slots so get_method_counters() can save some registers before call_VM(). - int max_stack = method->constMethod()->max_stack() + MAX2(3, Method::extra_stack_entries()); - intptr_t* extended_sp = (intptr_t*) monbot - - (max_stack * Interpreter::stackElementWords) - - popframe_extra_args; - extended_sp = align_down(extended_sp, StackAlignmentInBytes); - interpreter_frame->interpreter_frame_set_extended_sp(extended_sp); -#else interpreter_frame->interpreter_frame_set_last_sp(stack_top); -#endif // AARCH64 // All frames but the initial (oldest) interpreter frame we fill in have a // value for sender_sp that allows walking the stack but isn't // truly correct. Correct the value here. -#ifdef AARCH64 - if (caller->is_interpreted_frame()) { - intptr_t* sender_sp = align_down(caller->interpreter_frame_tos_address(), StackAlignmentInBytes); - interpreter_frame->set_interpreter_frame_sender_sp(sender_sp); - - } else { - // in case of non-interpreter caller sender_sp of the oldest frame is already - // set to valid value - } -#else if (extra_locals != 0 && interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); } -#endif // AARCH64 *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); --- old/src/hotspot/cpu/arm/arm.ad 2018-09-17 10:29:29.051804032 -0400 +++ new/src/hotspot/cpu/arm/arm.ad 2018-09-17 10:29:28.412766707 -0400 @@ -67,15 +67,10 @@ return MacroAssembler::_cache_fully_reachable(); } -#ifdef AARCH64 -#define ldr_32 ldr_w -#define str_32 str_w -#else #define ldr_32 ldr #define str_32 str #define tst_32 tst #define teq_32 teq -#endif #if 1 extern bool PrintOptoAssembly; #endif @@ -111,12 +106,7 @@ static int emit_deopt_handler(CodeBuffer& cbuf); static uint size_exception_handler() { -#ifdef AARCH64 - // ldr_literal; br; (pad); - return 3 * Assembler::InstructionSize + wordSize; -#else return ( 3 * 4 ); -#endif } @@ -205,9 +195,6 @@ const RegMask& MachConstantBaseNode::_out_RegMask = PTR_REG_mask(); int Compile::ConstantTable::calculate_table_base_offset() const { -#ifdef AARCH64 - return 0; -#else int offset = -(size() / 2); // flds, fldd: 8-bit offset multiplied by 4: +/- 1024 // ldr, ldrb : 12-bit offset: +/- 4096 @@ -215,7 +202,6 @@ offset = Assembler::min_simm10(); } return offset; -#endif } bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } @@ -240,11 +226,7 @@ } uint MachConstantBaseNode::size(PhaseRegAlloc*) const { -#ifdef AARCH64 - return 5 * Assembler::InstructionSize; -#else return 8; -#endif } #ifndef PRODUCT @@ -262,12 +244,6 @@ for (int i = 0; i < OptoPrologueNops; i++) { st->print_cr("NOP"); st->print("\t"); } -#ifdef AARCH64 - if (OptoPrologueNops <= 0) { - st->print_cr("NOP\t! required for safe patching"); - st->print("\t"); - } -#endif size_t framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); @@ -298,11 +274,6 @@ for (int i = 0; i < OptoPrologueNops; i++) { __ nop(); } -#ifdef AARCH64 - if (OptoPrologueNops <= 0) { - __ nop(); // required for safe patching by patch_verified_entry() - } -#endif size_t framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); @@ -361,18 +332,8 @@ if (do_polling() && ra_->C->is_method_compilation()) { st->print("\n\t"); -#ifdef AARCH64 - if (MacroAssembler::page_reachable_from_cache(os::get_polling_page())) { - st->print("ADRP Rtemp, #PollAddr\t! Load Polling address\n\t"); - st->print("LDR ZR,[Rtemp + #PollAddr & 0xfff]\t!Poll for Safepointing"); - } else { - st->print("mov_slow Rtemp, #PollAddr\t! Load Polling address\n\t"); - st->print("LDR ZR,[Rtemp]\t!Poll for Safepointing"); - } -#else st->print("MOV Rtemp, #PollAddr\t! Load Polling address\n\t"); st->print("LDR Rtemp,[Rtemp]\t!Poll for Safepointing"); -#endif } } #endif @@ -390,36 +351,15 @@ // If this does safepoint polling, then do it here if (do_polling() && ra_->C->is_method_compilation()) { -#ifdef AARCH64 - if (false && MacroAssembler::page_reachable_from_cache(os::get_polling_page())) { -/* FIXME: TODO - __ relocate(relocInfo::xxx); - __ adrp(Rtemp, (intptr_t)os::get_polling_page()); - __ relocate(relocInfo::poll_return_type); - int offset = os::get_polling_page() & 0xfff; - __ ldr(ZR, Address(Rtemp + offset)); -*/ - } else { - __ mov_address(Rtemp, (address)os::get_polling_page(), symbolic_Relocation::polling_page_reference); - __ relocate(relocInfo::poll_return_type); - __ ldr(ZR, Address(Rtemp)); - } -#else // mov_slow here is usually one or two instruction __ mov_address(Rtemp, (address)os::get_polling_page(), symbolic_Relocation::polling_page_reference); __ relocate(relocInfo::poll_return_type); __ ldr(Rtemp, Address(Rtemp)); -#endif } } uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { -#ifdef AARCH64 - // allow for added alignment nop from mov_address bind_literal - return MachNode::size(ra_) + 1 * Assembler::InstructionSize; -#else return MachNode::size(ra_); -#endif } int MachEpilogNode::reloc() const { @@ -451,16 +391,12 @@ } static inline bool is_iRegLd_memhd(OptoReg::Name src_first, OptoReg::Name src_second, int offset) { -#ifdef AARCH64 - return is_memoryHD(offset); -#else int rlo = Matcher::_regEncode[src_first]; int rhi = Matcher::_regEncode[src_second]; if (!((rlo&1)==0 && (rlo+1 == rhi))) { tty->print_cr("CAUGHT BAD LDRD/STRD"); } return (rlo&1)==0 && (rlo+1 == rhi) && is_memoryHD(offset); -#endif } uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, @@ -549,11 +485,6 @@ Matcher::regName[src_first]); #endif } -#ifdef AARCH64 - if (src_first+1 == src_second && dst_first+1 == dst_second) { - return size + 4; - } -#endif size += 4; } @@ -722,20 +653,12 @@ assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous"); assert(src_second_rc == rc_int && dst_second_rc == rc_float, "unsupported"); if (cbuf) { -#ifdef AARCH64 - __ fmov_dx(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first])); -#else __ fmdrr(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]), reg_to_register_object(Matcher::_regEncode[src_second])); -#endif #ifndef PRODUCT } else if (!do_size) { if (size != 0) st->print("\n\t"); -#ifdef AARCH64 - st->print("FMOV_DX R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first)); -#else st->print("FMDRR R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first), OptoReg::regname(src_second)); #endif -#endif } return size + 4; } else { @@ -759,20 +682,12 @@ assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous"); assert(src_second_rc == rc_float && dst_second_rc == rc_int, "unsupported"); if (cbuf) { -#ifdef AARCH64 - __ fmov_xd(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first])); -#else __ fmrrd(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first])); -#endif #ifndef PRODUCT } else if (!do_size) { if (size != 0) st->print("\n\t"); -#ifdef AARCH64 - st->print("FMOV_XD R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first)); -#else st->print("FMRRD R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(dst_second), OptoReg::regname(src_first)); #endif -#endif } return size + 4; } else { @@ -795,7 +710,6 @@ return size; // Self copy; no move assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); -#ifndef AARCH64 // Check for integer reg-reg copy. Hi bits are stuck up in the top // 32-bits of a 64-bit register, but are needed in low bits of another // register (else it's a hi-bits-to-hi-bits copy which should have @@ -852,7 +766,6 @@ } return size + 4; } -#endif Unimplemented(); return 0; // Mute compiler @@ -910,11 +823,7 @@ __ add(dst, SP, offset); } else { __ mov_slow(dst, offset); -#ifdef AARCH64 - __ add(dst, SP, dst, ex_lsl); -#else __ add(dst, SP, dst); -#endif } } @@ -926,11 +835,7 @@ //============================================================================= #ifndef PRODUCT -#ifdef AARCH64 -#define R_RTEMP "R_R16" -#else #define R_RTEMP "R_R12" -#endif void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { st->print_cr("\nUEP:"); if (UseCompressedClassPointers) { @@ -952,14 +857,7 @@ __ load_klass(Rtemp, receiver); __ cmp(Rtemp, iCache); -#ifdef AARCH64 - Label match; - __ b(match, eq); - __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp); - __ bind(match); -#else __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne); -#endif } uint MachUEPNode::size(PhaseRegAlloc *ra_) const { @@ -1005,24 +903,12 @@ int offset = __ offset(); address deopt_pc = __ pc(); -#ifdef AARCH64 - // See LR saved by caller in sharedRuntime_arm.cpp - // see also hse1 ws - // see also LIR_Assembler::emit_deopt_handler - - __ raw_push(LR, LR); // preserve LR in both slots - __ mov_relative_address(LR, deopt_pc); - __ str(LR, Address(SP, 1 * wordSize)); // save deopt PC - // OK to kill LR, because deopt blob will restore it from SP[0] - __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, LR_tmp); -#else __ sub(SP, SP, wordSize); // make room for saved PC __ push(LR); // save LR that may be live when we get here __ mov_relative_address(LR, deopt_pc); __ str(LR, Address(SP, wordSize)); // save deopt PC __ pop(LR); // restore LR __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, noreg); -#endif assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); @@ -1073,21 +959,13 @@ case Op_AddVF: case Op_SubVF: case Op_MulVF: -#ifdef AARCH64 - return VM_Version::has_simd(); -#else return VM_Version::has_vfp() || VM_Version::has_simd(); -#endif case Op_AddVD: case Op_SubVD: case Op_MulVD: case Op_DivVF: case Op_DivVD: -#ifdef AARCH64 - return VM_Version::has_simd(); -#else return VM_Version::has_vfp(); -#endif } return true; // Per default match rules are supported. @@ -1158,11 +1036,7 @@ } const bool Matcher::convL2FSupported(void) { -#ifdef AARCH64 - return true; -#else return false; -#endif } // Is this branch offset short enough that a short branch can be used? @@ -1181,29 +1055,17 @@ const bool Matcher::isSimpleConstant64(jlong value) { // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. -#ifdef AARCH64 - return (value == 0); -#else return false; -#endif } // No scaling for the parameter the ClearArray node. const bool Matcher::init_array_count_is_in_bytes = true; -#ifdef AARCH64 -const int Matcher::long_cmove_cost() { return 1; } -#else // Needs 2 CMOV's for longs. const int Matcher::long_cmove_cost() { return 2; } -#endif -#ifdef AARCH64 -const int Matcher::float_cmove_cost() { return 1; } -#else // CMOVF/CMOVD are expensive on ARM. const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } -#endif // Does the CPU require late expand (see block.cpp for description of late expand)? const bool Matcher::require_postalloc_expand = false; @@ -1211,11 +1073,7 @@ // Do we need to mask the count passed to shift instructions or does // the cpu only look at the lower 5/6 bits anyway? // FIXME: does this handle vector shifts as well? -#ifdef AARCH64 -const bool Matcher::need_masked_shift_count = false; -#else const bool Matcher::need_masked_shift_count = true; -#endif const bool Matcher::convi2l_type_required = true; @@ -1261,14 +1119,7 @@ // needed. Else we split the double into 2 integer pieces and move it // piece-by-piece. Only happens when passing doubles into C code as the // Java calling convention forces doubles to be aligned. -#ifdef AARCH64 -// On stack replacement support: -// We don't need Load[DL]_unaligned support, because interpreter stack -// has correct alignment -const bool Matcher::misaligned_doubles_ok = true; -#else const bool Matcher::misaligned_doubles_ok = false; -#endif // No-op on ARM. void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { @@ -1300,10 +1151,6 @@ // Registers not mentioned will be killed by the VM call in the trampoline, and // arguments in those registers not be available to the callee. bool Matcher::can_be_java_arg( int reg ) { -#ifdef AARCH64 - if (reg >= R_R0_num && reg < R_R8_num) return true; - if (reg >= R_V0_num && reg <= R_V7b_num && ((reg & 3) < 2)) return true; -#else if (reg == R_R0_num || reg == R_R1_num || reg == R_R2_num || @@ -1311,7 +1158,6 @@ if (reg >= R_S0_num && reg <= R_S13_num) return true; -#endif return false; } @@ -1454,44 +1300,14 @@ Register R8_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode()); assert(R8_ic_reg == Ricklass, "should be"); __ set_inst_mark(); -#ifdef AARCH64 -// TODO: see C1 LIR_Assembler::ic_call() - InlinedAddress oop_literal((address)Universe::non_oop_word()); - int offset = __ offset(); - int fixed_size = mov_oop_size * 4; - if (VM_Version::prefer_moves_over_load_literal()) { - uintptr_t val = (uintptr_t)Universe::non_oop_word(); - __ movz(R8_ic_reg, (val >> 0) & 0xffff, 0); - __ movk(R8_ic_reg, (val >> 16) & 0xffff, 16); - __ movk(R8_ic_reg, (val >> 32) & 0xffff, 32); - __ movk(R8_ic_reg, (val >> 48) & 0xffff, 48); - } else { - __ ldr_literal(R8_ic_reg, oop_literal); - } - assert(__ offset() - offset == fixed_size, "bad mov_oop size"); -#else __ movw(R8_ic_reg, ((unsigned int)Universe::non_oop_word()) & 0xffff); __ movt(R8_ic_reg, ((unsigned int)Universe::non_oop_word()) >> 16); -#endif address virtual_call_oop_addr = __ inst_mark(); // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine // who we intended to call. int method_index = resolved_method_index(cbuf); __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index)); emit_call_reloc(cbuf, as_MachCall(), $meth, RelocationHolder::none); -#ifdef AARCH64 - if (!VM_Version::prefer_moves_over_load_literal()) { - Label skip_literal; - __ b(skip_literal); - int off2 = __ offset(); - __ bind_literal(oop_literal); - if (__ offset() - off2 == wordSize) { - // no padding, so insert nop for worst-case sizing - __ nop(); - } - __ bind(skip_literal); - } -#endif %} enc_class LdReplImmI(immI src, regD dst, iRegI tmp, int cnt, int wth) %{ @@ -1558,16 +1374,8 @@ // See if the lengths are different, and calculate min in str1_reg. // Stash diff in tmp2 in case we need it for a tie-breaker. __ subs_32(tmp2_reg, cnt1_reg, cnt2_reg); -#ifdef AARCH64 - Label Lskip; - __ _lsl_w(cnt1_reg, cnt1_reg, exact_log2(sizeof(jchar))); // scale the limit - __ b(Lskip, mi); - __ _lsl_w(cnt1_reg, cnt2_reg, exact_log2(sizeof(jchar))); // scale the limit - __ bind(Lskip); -#else __ mov(cnt1_reg, AsmOperand(cnt1_reg, lsl, exact_log2(sizeof(jchar)))); // scale the limit __ mov(cnt1_reg, AsmOperand(cnt2_reg, lsl, exact_log2(sizeof(jchar))), pl); // scale the limit -#endif // reallocate cnt1_reg, cnt2_reg, result_reg // Note: limit_reg holds the string length pre-scaled by 2 @@ -1717,16 +1525,6 @@ int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); // return true if the same array -#ifdef AARCH64 - __ cmp(ary1_reg, ary2_reg); - __ b(Lequal, eq); - - __ mov(result_reg, 0); - - __ cbz(ary1_reg, Ldone); // not equal - - __ cbz(ary2_reg, Ldone); // not equal -#else __ teq(ary1_reg, ary2_reg); __ mov(result_reg, 1, eq); __ b(Ldone, eq); // equal @@ -1738,19 +1536,12 @@ __ tst(ary2_reg, ary2_reg); __ mov(result_reg, 0, eq); __ b(Ldone, eq); // not equal -#endif //load the lengths of arrays __ ldr_s32(tmp1_reg, Address(ary1_reg, length_offset)); // int __ ldr_s32(tmp2_reg, Address(ary2_reg, length_offset)); // int // return false if the two arrays are not equal length -#ifdef AARCH64 - __ cmp_w(tmp1_reg, tmp2_reg); - __ b(Ldone, ne); // not equal - - __ cbz_w(tmp1_reg, Lequal); // zero-length arrays are equal -#else __ teq_32(tmp1_reg, tmp2_reg); __ mov(result_reg, 0, ne); __ b(Ldone, ne); // not equal @@ -1758,7 +1549,6 @@ __ tst(tmp1_reg, tmp1_reg); __ mov(result_reg, 1, eq); __ b(Ldone, eq); // zero-length arrays are equal -#endif // load array addresses __ add(ary1_reg, ary1_reg, base_offset); @@ -1852,11 +1642,7 @@ sync_stack_slots(1 * VMRegImpl::slots_per_word); // Compiled code's Frame Pointer -#ifdef AARCH64 - frame_pointer(R_SP); -#else frame_pointer(R_R13); -#endif // Stack alignment requirement stack_alignment(StackAlignmentInBytes); @@ -1953,7 +1739,6 @@ interface(CONST_INTER); %} -#ifndef AARCH64 // Integer Immediate: offset for half and double word loads and stores operand immIHD() %{ predicate(is_memoryHD(n->get_int())); @@ -1972,7 +1757,6 @@ format %{ %} interface(CONST_INTER); %} -#endif // Valid scale values for addressing modes and shifts operand immU5() %{ @@ -2183,45 +1967,6 @@ interface(CONST_INTER); %} -#ifdef AARCH64 -// Long Immediate: for logical instruction -operand limmL() %{ - predicate(is_limmL(n->get_long())); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -operand limmLn() %{ - predicate(is_limmL(~n->get_long())); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate: for arithmetic instruction -operand aimmL() %{ - predicate(is_aimm(n->get_long())); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -operand aimmLneg() %{ - predicate(is_aimm(-n->get_long())); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} -#endif // AARCH64 // Long Immediate: the value FF operand immL_FF() %{ @@ -2404,11 +2149,7 @@ match(R1RegI); match(R2RegI); match(R3RegI); -#ifdef AARCH64 - match(ZRRegI); -#else match(R12RegI); -#endif format %{ %} interface(REG_INTER); @@ -2446,49 +2187,6 @@ interface(REG_INTER); %} -#ifdef AARCH64 -// Like sp_ptr_reg, but exclude regs (Aarch64 SP) that can't be -// stored directly. Includes ZR, so can't be used as a destination. -operand store_ptr_RegP() %{ - constraint(ALLOC_IN_RC(store_ptr_reg)); - match(RegP); - match(iRegP); - match(ZRRegP); - - format %{ %} - interface(REG_INTER); -%} - -operand store_RegI() %{ - constraint(ALLOC_IN_RC(store_reg)); - match(RegI); - match(iRegI); - match(ZRRegI); - - format %{ %} - interface(REG_INTER); -%} - -operand store_RegL() %{ - constraint(ALLOC_IN_RC(store_ptr_reg)); - match(RegL); - match(iRegL); - match(ZRRegL); - - format %{ %} - interface(REG_INTER); -%} - -operand store_RegN() %{ - constraint(ALLOC_IN_RC(store_reg)); - match(RegN); - match(iRegN); - match(ZRRegN); - - format %{ %} - interface(REG_INTER); -%} -#endif operand R0RegP() %{ constraint(ALLOC_IN_RC(R0_regP)); @@ -2578,7 +2276,6 @@ interface(REG_INTER); %} -#ifndef AARCH64 operand R12RegI() %{ constraint(ALLOC_IN_RC(R12_regI)); match(iRegI); @@ -2586,18 +2283,13 @@ format %{ %} interface(REG_INTER); %} -#endif // Long Register operand iRegL() %{ constraint(ALLOC_IN_RC(long_reg)); match(RegL); -#ifdef AARCH64 - match(iRegLd); -#else match(R0R1RegL); match(R2R3RegL); -#endif //match(iRegLex); format %{ %} @@ -2612,7 +2304,6 @@ interface(REG_INTER); %} -#ifndef AARCH64 // first long arg, or return value operand R0R1RegL() %{ constraint(ALLOC_IN_RC(R0R1_regL)); @@ -2629,7 +2320,6 @@ format %{ %} interface(REG_INTER); %} -#endif // Condition Code Flag Register operand flagsReg() %{ @@ -2671,7 +2361,6 @@ %} // Condition Code Register, long comparisons. -#ifndef AARCH64 operand flagsRegL_LTGE() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); @@ -2719,7 +2408,6 @@ format %{ "apsr_UL_LEGT" %} interface(REG_INTER); %} -#endif // Condition Code Register, floating comparisons, unordered same as "less". operand flagsRegF() %{ @@ -2800,113 +2488,12 @@ format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base($reg); -#ifdef AARCH64 - index(0xff); // 0xff => no index -#else index(0xf); // PC => no index -#endif scale(0x0); disp(0x0); %} %} -#ifdef AARCH64 -// Indirect with scaled*1 uimm12 offset -operand indOffsetU12ScaleB(sp_ptr_RegP reg, immUL12 offset) %{ - constraint(ALLOC_IN_RC(sp_ptr_reg)); - match(AddP reg offset); - - op_cost(100); - format %{ "[$reg + $offset]" %} - interface(MEMORY_INTER) %{ - base($reg); -#ifdef AARCH64 - index(0xff); // 0xff => no index -#else - index(0xf); // PC => no index -#endif - scale(0x0); - disp($offset); - %} -%} - -// Indirect with scaled*2 uimm12 offset -operand indOffsetU12ScaleS(sp_ptr_RegP reg, immUL12x2 offset) %{ - constraint(ALLOC_IN_RC(sp_ptr_reg)); - match(AddP reg offset); - - op_cost(100); - format %{ "[$reg + $offset]" %} - interface(MEMORY_INTER) %{ - base($reg); -#ifdef AARCH64 - index(0xff); // 0xff => no index -#else - index(0xf); // PC => no index -#endif - scale(0x0); - disp($offset); - %} -%} - -// Indirect with scaled*4 uimm12 offset -operand indOffsetU12ScaleI(sp_ptr_RegP reg, immUL12x4 offset) %{ - constraint(ALLOC_IN_RC(sp_ptr_reg)); - match(AddP reg offset); - - op_cost(100); - format %{ "[$reg + $offset]" %} - interface(MEMORY_INTER) %{ - base($reg); -#ifdef AARCH64 - index(0xff); // 0xff => no index -#else - index(0xf); // PC => no index -#endif - scale(0x0); - disp($offset); - %} -%} - -// Indirect with scaled*8 uimm12 offset -operand indOffsetU12ScaleL(sp_ptr_RegP reg, immUL12x8 offset) %{ - constraint(ALLOC_IN_RC(sp_ptr_reg)); - match(AddP reg offset); - - op_cost(100); - format %{ "[$reg + $offset]" %} - interface(MEMORY_INTER) %{ - base($reg); -#ifdef AARCH64 - index(0xff); // 0xff => no index -#else - index(0xf); // PC => no index -#endif - scale(0x0); - disp($offset); - %} -%} - -// Indirect with scaled*16 uimm12 offset -operand indOffsetU12ScaleQ(sp_ptr_RegP reg, immUL12x16 offset) %{ - constraint(ALLOC_IN_RC(sp_ptr_reg)); - match(AddP reg offset); - - op_cost(100); - format %{ "[$reg + $offset]" %} - interface(MEMORY_INTER) %{ - base($reg); -#ifdef AARCH64 - index(0xff); // 0xff => no index -#else - index(0xf); // PC => no index -#endif - scale(0x0); - disp($offset); - %} -%} - -#else // ! AARCH64 // Indirect with Offset in ]-4096, 4096[ operand indOffset12(sp_ptr_RegP reg, immI12 offset) %{ @@ -2917,11 +2504,7 @@ format %{ "[$reg + $offset]" %} interface(MEMORY_INTER) %{ base($reg); -#ifdef AARCH64 - index(0xff); // 0xff => no index -#else index(0xf); // PC => no index -#endif scale(0x0); disp($offset); %} @@ -2936,11 +2519,7 @@ format %{ "[$reg + $offset]" %} interface(MEMORY_INTER) %{ base($reg); -#ifdef AARCH64 - index(0xff); // 0xff => no index -#else index(0xf); // PC => no index -#endif scale(0x0); disp($offset); %} @@ -2955,11 +2534,7 @@ format %{ "[$reg + $offset]" %} interface(MEMORY_INTER) %{ base($reg); -#ifdef AARCH64 - index(0xff); // 0xff => no index -#else index(0xf); // PC => no index -#endif scale(0x0); disp($offset); %} @@ -2974,11 +2549,7 @@ format %{ "[$reg + $offset]" %} interface(MEMORY_INTER) %{ base($reg); -#ifdef AARCH64 - index(0xff); // 0xff => no index -#else index(0xf); // PC => no index -#endif scale(0x0); disp($offset); %} @@ -2993,16 +2564,11 @@ format %{ "[$reg + $offset]" %} interface(MEMORY_INTER) %{ base($reg); -#ifdef AARCH64 - index(0xff); // 0xff => no index -#else index(0xf); // PC => no index -#endif scale(0x0); disp($offset); %} %} -#endif // !AARCH64 // Indirect with Register Index operand indIndex(iRegP addr, iRegX index) %{ @@ -3019,9 +2585,8 @@ %} %} -#ifdef AARCH64 // Indirect Memory Times Scale Plus Index Register -operand indIndexScaleS(iRegP addr, iRegX index, immI_1 scale) %{ +operand indIndexScale(iRegP addr, iRegX index, immU5 scale) %{ constraint(ALLOC_IN_RC(ptr_reg)); match(AddP addr (LShiftX index scale)); @@ -3035,130 +2600,9 @@ %} %} -// Indirect Memory Times Scale Plus 32-bit Index Register -operand indIndexIScaleS(iRegP addr, iRegI index, immI_1 scale) %{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP addr (LShiftX (ConvI2L index) scale)); - - op_cost(100); - format %{"[$addr + $index.w << $scale]" %} - interface(MEMORY_INTER) %{ - base($addr); - index($index); - scale($scale); - disp(0x7fffffff); // sxtw - %} -%} - -// Indirect Memory Times Scale Plus Index Register -operand indIndexScaleI(iRegP addr, iRegX index, immI_2 scale) %{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP addr (LShiftX index scale)); - - op_cost(100); - format %{"[$addr + $index << $scale]" %} - interface(MEMORY_INTER) %{ - base($addr); - index($index); - scale($scale); - disp(0x0); - %} -%} - -// Indirect Memory Times Scale Plus 32-bit Index Register -operand indIndexIScaleI(iRegP addr, iRegI index, immI_2 scale) %{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP addr (LShiftX (ConvI2L index) scale)); - - op_cost(100); - format %{"[$addr + $index.w << $scale]" %} - interface(MEMORY_INTER) %{ - base($addr); - index($index); - scale($scale); - disp(0x7fffffff); // sxtw - %} -%} - -// Indirect Memory Times Scale Plus Index Register -operand indIndexScaleL(iRegP addr, iRegX index, immI_3 scale) %{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP addr (LShiftX index scale)); - - op_cost(100); - format %{"[$addr + $index << $scale]" %} - interface(MEMORY_INTER) %{ - base($addr); - index($index); - scale($scale); - disp(0x0); - %} -%} - -// Indirect Memory Times Scale Plus 32-bit Index Register -operand indIndexIScaleL(iRegP addr, iRegI index, immI_3 scale) %{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP addr (LShiftX (ConvI2L index) scale)); - - op_cost(100); - format %{"[$addr + $index.w << $scale]" %} - interface(MEMORY_INTER) %{ - base($addr); - index($index); - scale($scale); - disp(0x7fffffff); // sxtw - %} -%} - -// Indirect Memory Times Scale Plus Index Register -operand indIndexScaleQ(iRegP addr, iRegX index, immI_4 scale) %{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP addr (LShiftX index scale)); - - op_cost(100); - format %{"[$addr + $index << $scale]" %} - interface(MEMORY_INTER) %{ - base($addr); - index($index); - scale($scale); - disp(0x0); - %} -%} - -// Indirect Memory Times Scale Plus 32-bit Index Register -operand indIndexIScaleQ(iRegP addr, iRegI index, immI_4 scale) %{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP addr (LShiftX (ConvI2L index) scale)); - - op_cost(100); - format %{"[$addr + $index.w << $scale]" %} - interface(MEMORY_INTER) %{ - base($addr); - index($index); - scale($scale); - disp(0x7fffffff); // sxtw - %} -%} -#else -// Indirect Memory Times Scale Plus Index Register -operand indIndexScale(iRegP addr, iRegX index, immU5 scale) %{ - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP addr (LShiftX index scale)); - - op_cost(100); - format %{"[$addr + $index << $scale]" %} - interface(MEMORY_INTER) %{ - base($addr); - index($index); - scale($scale); - disp(0x0); - %} -%} -#endif - -// Operands for expressing Control Flow -// NOTE: Label is a predefined operand which should not be redefined in -// the AD file. It is generically handled within the ADLC. +// Operands for expressing Control Flow +// NOTE: Label is a predefined operand which should not be redefined in +// the AD file. It is generically handled within the ADLC. //----------Conditional Branch Operands---------------------------------------- // Comparison Op - This is the operation of the comparison, and is limited to @@ -3312,29 +2756,6 @@ // instructions for every form of operand when the instruction accepts // multiple operand types with the same basic encoding and format. The classic // case of this is memory operands. -#ifdef AARCH64 -opclass memoryB(indirect, indIndex, indOffsetU12ScaleB); -opclass memoryS(indirect, indIndex, indIndexScaleS, indIndexIScaleS, indOffsetU12ScaleS); -opclass memoryI(indirect, indIndex, indIndexScaleI, indIndexIScaleI, indOffsetU12ScaleI); -opclass memoryL(indirect, indIndex, indIndexScaleL, indIndexIScaleL, indOffsetU12ScaleL); -opclass memoryP(indirect, indIndex, indIndexScaleL, indIndexIScaleL, indOffsetU12ScaleL); -opclass memoryQ(indirect, indIndex, indIndexScaleQ, indIndexIScaleQ, indOffsetU12ScaleQ); -opclass memoryF(indirect, indIndex, indIndexScaleI, indIndexIScaleI, indOffsetU12ScaleI); -opclass memoryD(indirect, indIndex, indIndexScaleL, indIndexIScaleL, indOffsetU12ScaleL); - -opclass memoryScaledS(indIndexScaleS, indIndexIScaleS); -opclass memoryScaledI(indIndexScaleI, indIndexIScaleI); -opclass memoryScaledL(indIndexScaleL, indIndexIScaleL); -opclass memoryScaledP(indIndexScaleL, indIndexIScaleL); -opclass memoryScaledQ(indIndexScaleQ, indIndexIScaleQ); -opclass memoryScaledF(indIndexScaleI, indIndexIScaleI); -opclass memoryScaledD(indIndexScaleL, indIndexIScaleL); -// when ldrex/strex is used: -opclass memoryex ( indirect ); -opclass indIndexMemory( indIndex ); -opclass memoryvld ( indirect /* , write back mode not implemented */ ); - -#else opclass memoryI ( indirect, indOffset12, indIndex, indIndexScale ); opclass memoryP ( indirect, indOffset12, indIndex, indIndexScale ); @@ -3354,7 +2775,6 @@ opclass indIndexMemory( indIndex ); opclass memorylong ( indirect, indOffset12x2 ); opclass memoryvld ( indirect /* , write back mode not implemented */ ); -#endif //----------PIPELINE----------------------------------------------------------- pipeline %{ @@ -4163,7 +3583,6 @@ size(4); format %{ "LDRSB $dst,$mem\t! byte -> int" %} ins_encode %{ - // High 32 bits are harmlessly set on Aarch64 __ ldrsb($dst$$Register, $mem$$Address); %} ins_pipe(iload_mask_mem); @@ -4174,13 +3593,6 @@ match(Set dst (ConvI2L (LoadB mem))); ins_cost(MEMORY_REF_COST); -#ifdef AARCH64 - size(4); - format %{ "LDRSB $dst,$mem\t! byte -> long" %} - ins_encode %{ - __ ldrsb($dst$$Register, $mem$$Address); - %} -#else size(8); format %{ "LDRSB $dst.lo,$mem\t! byte -> long\n\t" "ASR $dst.hi,$dst.lo,31" %} @@ -4188,7 +3600,6 @@ __ ldrsb($dst$$Register, $mem$$Address); __ mov($dst$$Register->successor(), AsmOperand($dst$$Register, asr, 31)); %} -#endif ins_pipe(iload_mask_mem); %} @@ -4210,13 +3621,6 @@ match(Set dst (ConvI2L (LoadUB mem))); ins_cost(MEMORY_REF_COST); -#ifdef AARCH64 - size(4); - format %{ "LDRB $dst,$mem\t! ubyte -> long" %} - ins_encode %{ - __ ldrb($dst$$Register, $mem$$Address); - %} -#else size(8); format %{ "LDRB $dst.lo,$mem\t! ubyte -> long\n\t" "MOV $dst.hi,0" %} @@ -4224,7 +3628,6 @@ __ ldrb($dst$$Register, $mem$$Address); __ mov($dst$$Register->successor(), 0); %} -#endif ins_pipe(iload_mem); %} @@ -4232,16 +3635,6 @@ instruct loadUB2L_limmI(iRegL dst, memoryB mem, limmIlow8 mask) %{ match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); -#ifdef AARCH64 - ins_cost(MEMORY_REF_COST + DEFAULT_COST); - size(8); - format %{ "LDRB $dst,$mem\t! ubyte -> long\n\t" - "AND $dst,$dst,$mask" %} - ins_encode %{ - __ ldrb($dst$$Register, $mem$$Address); - __ andr($dst$$Register, $dst$$Register, limmI_low($mask$$constant, 8)); - %} -#else ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST); size(12); format %{ "LDRB $dst.lo,$mem\t! ubyte -> long\n\t" @@ -4252,29 +3645,10 @@ __ mov($dst$$Register->successor(), 0); __ andr($dst$$Register, $dst$$Register, limmI_low($mask$$constant, 8)); %} -#endif ins_pipe(iload_mem); %} // Load Short (16bit signed) -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct loadSoff(iRegI dst, memoryScaledS mem, aimmX off, iRegP tmp) %{ - match(Set dst (LoadS (AddP mem off))); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - - format %{ "LDRSH $dst,$mem+$off\t! short temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ ldrsh($dst$$Register, nmem); - %} - ins_pipe(iload_mask_mem); -%} -#endif instruct loadS(iRegI dst, memoryS mem) %{ match(Set dst (LoadS mem)); @@ -4297,7 +3671,6 @@ format %{ "LDRSB $dst,$mem\t! short -> byte" %} ins_encode %{ - // High 32 bits are harmlessly set on Aarch64 __ ldrsb($dst$$Register, $mem$$Address); %} ins_pipe(iload_mask_mem); @@ -4308,13 +3681,6 @@ match(Set dst (ConvI2L (LoadS mem))); ins_cost(MEMORY_REF_COST); -#ifdef AARCH64 - size(4); - format %{ "LDRSH $dst,$mem\t! short -> long" %} - ins_encode %{ - __ ldrsh($dst$$Register, $mem$$Address); - %} -#else size(8); format %{ "LDRSH $dst.lo,$mem\t! short -> long\n\t" "ASR $dst.hi,$dst.lo,31" %} @@ -4322,30 +3688,11 @@ __ ldrsh($dst$$Register, $mem$$Address); __ mov($dst$$Register->successor(), AsmOperand($dst$$Register, asr, 31)); %} -#endif ins_pipe(iload_mask_mem); %} // Load Unsigned Short/Char (16bit UNsigned) -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct loadUSoff(iRegI dst, memoryScaledS mem, aimmX off, iRegP tmp) %{ - match(Set dst (LoadUS (AddP mem off))); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - - format %{ "LDRH $dst,$mem+$off\t! ushort/char temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ ldrh($dst$$Register, nmem); - %} - ins_pipe(iload_mem); -%} -#endif instruct loadUS(iRegI dst, memoryS mem) %{ match(Set dst (LoadUS mem)); @@ -4377,13 +3724,6 @@ match(Set dst (ConvI2L (LoadUS mem))); ins_cost(MEMORY_REF_COST); -#ifdef AARCH64 - size(4); - format %{ "LDRH $dst,$mem\t! short -> long" %} - ins_encode %{ - __ ldrh($dst$$Register, $mem$$Address); - %} -#else size(8); format %{ "LDRH $dst.lo,$mem\t! short -> long\n\t" "MOV $dst.hi, 0" %} @@ -4391,7 +3731,6 @@ __ ldrh($dst$$Register, $mem$$Address); __ mov($dst$$Register->successor(), 0); %} -#endif ins_pipe(iload_mem); %} @@ -4400,13 +3739,6 @@ match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); ins_cost(MEMORY_REF_COST); -#ifdef AARCH64 - size(4); - format %{ "LDRB $dst,$mem" %} - ins_encode %{ - __ ldrb($dst$$Register, $mem$$Address); - %} -#else size(8); format %{ "LDRB $dst.lo,$mem\t! \n\t" "MOV $dst.hi, 0" %} @@ -4414,24 +3746,12 @@ __ ldrb($dst$$Register, $mem$$Address); __ mov($dst$$Register->successor(), 0); %} -#endif ins_pipe(iload_mem); %} // Load Unsigned Short/Char (16bit UNsigned) with a immediate mask into a Long Register instruct loadUS2L_limmI(iRegL dst, memoryS mem, limmI mask) %{ match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); -#ifdef AARCH64 - ins_cost(MEMORY_REF_COST + 1*DEFAULT_COST); - - size(8); - format %{ "LDRH $dst,$mem\t! ushort/char & mask -> long\n\t" - "AND $dst,$dst,$mask" %} - ins_encode %{ - __ ldrh($dst$$Register, $mem$$Address); - __ andr($dst$$Register, $dst$$Register, (uintx)$mask$$constant); - %} -#else ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST); size(12); @@ -4443,30 +3763,11 @@ __ mov($dst$$Register->successor(), 0); __ andr($dst$$Register, $dst$$Register, $mask$$constant); %} -#endif ins_pipe(iload_mem); %} // Load Integer -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct loadIoff(iRegI dst, memoryScaledI mem, aimmX off, iRegP tmp) %{ - match(Set dst (LoadI (AddP mem off))); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - - format %{ "ldr_s32 $dst,$mem+$off\t! int temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ ldr_s32($dst$$Register, nmem); - %} - ins_pipe(iload_mem); -%} -#endif instruct loadI(iRegI dst, memoryI mem) %{ match(Set dst (LoadI mem)); @@ -4537,15 +3838,6 @@ // Load Integer into a Long Register instruct loadI2L(iRegL dst, memoryI mem) %{ match(Set dst (ConvI2L (LoadI mem))); -#ifdef AARCH64 - ins_cost(MEMORY_REF_COST); - - size(4); - format %{ "LDRSW $dst.lo,$mem\t! int -> long" %} - ins_encode %{ - __ ldr_s32($dst$$Register, $mem$$Address); - %} -#else ins_cost(MEMORY_REF_COST); size(8); @@ -4555,22 +3847,12 @@ __ ldr($dst$$Register, $mem$$Address); __ mov($dst$$Register->successor(), AsmOperand($dst$$Register, asr, 31)); %} -#endif ins_pipe(iload_mask_mem); %} // Load Integer with mask 0xFF into a Long Register instruct loadI2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{ match(Set dst (ConvI2L (AndI (LoadI mem) mask))); -#ifdef AARCH64 - ins_cost(MEMORY_REF_COST); - - size(4); - format %{ "LDRB $dst.lo,$mem\t! int & 0xFF -> long" %} - ins_encode %{ - __ ldrb($dst$$Register, $mem$$Address); - %} -#else ins_cost(MEMORY_REF_COST); size(8); @@ -4580,7 +3862,6 @@ __ ldrb($dst$$Register, $mem$$Address); __ mov($dst$$Register->successor(), 0); %} -#endif ins_pipe(iload_mem); %} @@ -4589,13 +3870,6 @@ match(Set dst (ConvI2L (AndI (LoadI mem) mask))); ins_cost(MEMORY_REF_COST); -#ifdef AARCH64 - size(4); - format %{ "LDRH $dst,$mem\t! int & 0xFFFF -> long" %} - ins_encode %{ - __ ldrh($dst$$Register, $mem$$Address); - %} -#else size(8); format %{ "LDRH $dst,$mem\t! int & 0xFFFF -> long\n\t" "MOV $dst.hi, 0" %} @@ -4603,27 +3877,9 @@ __ ldrh($dst$$Register, $mem$$Address); __ mov($dst$$Register->successor(), 0); %} -#endif ins_pipe(iload_mask_mem); %} -#ifdef AARCH64 -// Load Integer with an immediate mask into a Long Register -instruct loadI2L_limmI(iRegL dst, memoryI mem, limmI mask) %{ - match(Set dst (ConvI2L (AndI (LoadI mem) mask))); - ins_cost(MEMORY_REF_COST + 1*DEFAULT_COST); - - size(8); - format %{ "LDRSW $dst,$mem\t! int -> long\n\t" - "AND $dst,$dst,$mask" %} - - ins_encode %{ - __ ldr_s32($dst$$Register, $mem$$Address); - __ andr($dst$$Register, $dst$$Register, (uintx)$mask$$constant); - %} - ins_pipe(iload_mem); -%} -#else // Load Integer with a 31-bit immediate mask into a Long Register instruct loadI2L_limmU31(iRegL dst, memoryI mem, limmU31 mask) %{ match(Set dst (ConvI2L (AndI (LoadI mem) mask))); @@ -4641,27 +3897,7 @@ %} ins_pipe(iload_mem); %} -#endif - -#ifdef AARCH64 -// Load Integer with mask into a Long Register -// FIXME: use signedRegI mask, remove tmp? -instruct loadI2L_immI(iRegL dst, memoryI mem, immI mask, iRegI tmp) %{ - match(Set dst (ConvI2L (AndI (LoadI mem) mask))); - effect(TEMP dst, TEMP tmp); - ins_cost(MEMORY_REF_COST + 3*DEFAULT_COST); - format %{ "LDRSW $mem,$dst\t! int & 31-bit mask -> long\n\t" - "MOV_SLOW $tmp,$mask\n\t" - "AND $dst,$tmp,$dst" %} - ins_encode %{ - __ ldrsw($dst$$Register, $mem$$Address); - __ mov_slow($tmp$$Register, $mask$$constant); - __ andr($dst$$Register, $dst$$Register, $tmp$$Register); - %} - ins_pipe(iload_mem); -%} -#else // Load Integer with a 31-bit mask into a Long Register // FIXME: use iRegI mask, remove tmp? instruct loadI2L_immU31(iRegL dst, memoryI mem, immU31 mask, iRegI tmp) %{ @@ -4682,20 +3918,12 @@ %} ins_pipe(iload_mem); %} -#endif // Load Unsigned Integer into a Long Register instruct loadUI2L(iRegL dst, memoryI mem, immL_32bits mask) %{ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ins_cost(MEMORY_REF_COST); -#ifdef AARCH64 -//size(4); - format %{ "LDR_w $dst,$mem\t! uint -> long" %} - ins_encode %{ - __ ldr_w($dst$$Register, $mem$$Address); - %} -#else size(8); format %{ "LDR $dst.lo,$mem\t! uint -> long\n\t" "MOV $dst.hi,0" %} @@ -4703,37 +3931,14 @@ __ ldr($dst$$Register, $mem$$Address); __ mov($dst$$Register->successor(), 0); %} -#endif ins_pipe(iload_mem); %} // Load Long -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct loadLoff(iRegLd dst, memoryScaledL mem, aimmX off, iRegP tmp) %{ - match(Set dst (LoadL (AddP mem off))); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - - format %{ "LDR $dst,$mem+$off\t! long temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ ldr($dst$$Register, nmem); - %} - ins_pipe(iload_mem); -%} -#endif instruct loadL(iRegLd dst, memoryL mem ) %{ -#ifdef AARCH64 - // already atomic for Aarch64 -#else predicate(!((LoadLNode*)n)->require_atomic_access()); -#endif match(Set dst (LoadL mem)); effect(TEMP dst); ins_cost(MEMORY_REF_COST); @@ -4746,7 +3951,6 @@ ins_pipe(iload_mem); %} -#ifndef AARCH64 instruct loadL_2instr(iRegL dst, memorylong mem ) %{ predicate(!((LoadLNode*)n)->require_atomic_access()); match(Set dst (LoadL mem)); @@ -4822,7 +4026,6 @@ %} ins_pipe(iload_mem); %} -#endif // !AARCH64 // Load Range instruct loadRange(iRegI dst, memoryI mem) %{ @@ -4839,31 +4042,13 @@ // Load Pointer -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct loadPoff(iRegP dst, memoryScaledP mem, aimmX off, iRegP tmp) %{ - match(Set dst (LoadP (AddP mem off))); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - format %{ "LDR $dst,$mem+$off\t! ptr temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ ldr($dst$$Register, nmem); - %} - ins_pipe(iload_mem); -%} -#endif - -instruct loadP(iRegP dst, memoryP mem) %{ - match(Set dst (LoadP mem)); - ins_cost(MEMORY_REF_COST); - size(4); - - format %{ "LDR $dst,$mem\t! ptr" %} +instruct loadP(iRegP dst, memoryP mem) %{ + match(Set dst (LoadP mem)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "LDR $dst,$mem\t! ptr" %} ins_encode %{ __ ldr($dst$$Register, $mem$$Address); %} @@ -4950,24 +4135,6 @@ %} #endif -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct loadDoff(regD dst, memoryScaledD mem, aimmX off, iRegP tmp) %{ - match(Set dst (LoadD (AddP mem off))); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - - format %{ "ldr $dst,$mem+$off\t! double temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ ldr_d($dst$$FloatRegister, nmem); - %} - ins_pipe(floadD_mem); -%} -#endif instruct loadD(regD dst, memoryD mem) %{ match(Set dst (LoadD mem)); @@ -4983,7 +4150,6 @@ ins_pipe(floadD_mem); %} -#ifndef AARCH64 // Load Double - UNaligned instruct loadD_unaligned(regD_low dst, memoryF2 mem ) %{ match(Set dst (LoadD_unaligned mem)); @@ -4999,26 +4165,7 @@ %} ins_pipe(iload_mem); %} -#endif - -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct loadFoff(regF dst, memoryScaledF mem, aimmX off, iRegP tmp) %{ - match(Set dst (LoadF (AddP mem off))); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - format %{ "ldr $dst,$mem+$off\t! float temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ ldr_s($dst$$FloatRegister, nmem); - %} - ins_pipe(floadF_mem); -%} -#endif instruct loadF(regF dst, memoryF mem) %{ match(Set dst (LoadF mem)); @@ -5032,17 +4179,6 @@ ins_pipe(floadF_mem); %} -#ifdef AARCH64 -instruct load_limmI(iRegI dst, limmI src) %{ - match(Set dst src); - ins_cost(DEFAULT_COST + 1); // + 1 because MOV is preferred - format %{ "ORR_w $dst, ZR, $src\t! int" %} - ins_encode %{ - __ orr_w($dst$$Register, ZR, (uintx)$src$$constant); - %} - ins_pipe(ialu_imm); -%} -#endif // // Load Constant instruct loadConI( iRegI dst, immI src ) %{ @@ -5065,7 +4201,6 @@ ins_pipe(ialu_imm); %} -#ifndef AARCH64 instruct loadConIMovn( iRegI dst, immIRotn src ) %{ match(Set dst src); size(4); @@ -5075,22 +4210,13 @@ %} ins_pipe(ialu_imm_n); %} -#endif instruct loadConI16( iRegI dst, immI16 src ) %{ match(Set dst src); size(4); -#ifdef AARCH64 - format %{ "MOVZ_w $dst, $src" %} -#else format %{ "MOVW $dst, $src" %} -#endif ins_encode %{ -#ifdef AARCH64 - __ mov_w($dst$$Register, $src$$constant); -#else __ movw($dst$$Register, $src$$constant); -#endif %} ins_pipe(ialu_imm_n); %} @@ -5124,80 +4250,6 @@ ins_pipe(loadConP_poll); %} -#ifdef AARCH64 -instruct loadConP0(iRegP dst, immP0 src) %{ - match(Set dst src); - ins_cost(DEFAULT_COST); - format %{ "MOV $dst,ZR\t!ptr" %} - ins_encode %{ - __ mov($dst$$Register, ZR); - %} - ins_pipe(ialu_none); -%} - -instruct loadConN(iRegN dst, immN src) %{ - match(Set dst src); - ins_cost(DEFAULT_COST * 3/2); - format %{ "SET $dst,$src\t! compressed ptr" %} - ins_encode %{ - Register dst = $dst$$Register; - // FIXME: use $constanttablebase? - __ set_narrow_oop(dst, (jobject)$src$$constant); - %} - ins_pipe(ialu_hi_lo_reg); -%} - -instruct loadConN0(iRegN dst, immN0 src) %{ - match(Set dst src); - ins_cost(DEFAULT_COST); - format %{ "MOV $dst,ZR\t! compressed ptr" %} - ins_encode %{ - __ mov($dst$$Register, ZR); - %} - ins_pipe(ialu_none); -%} - -instruct loadConNKlass(iRegN dst, immNKlass src) %{ - match(Set dst src); - ins_cost(DEFAULT_COST * 3/2); - format %{ "SET $dst,$src\t! compressed klass ptr" %} - ins_encode %{ - Register dst = $dst$$Register; - // FIXME: use $constanttablebase? - __ set_narrow_klass(dst, (Klass*)$src$$constant); - %} - ins_pipe(ialu_hi_lo_reg); -%} - -instruct load_limmL(iRegL dst, limmL src) %{ - match(Set dst src); - ins_cost(DEFAULT_COST); - format %{ "ORR $dst, ZR, $src\t! long" %} - ins_encode %{ - __ orr($dst$$Register, ZR, (uintx)$src$$constant); - %} - ins_pipe(loadConL); -%} -instruct load_immLMov(iRegL dst, immLMov src) %{ - match(Set dst src); - ins_cost(DEFAULT_COST); - format %{ "MOV $dst, $src\t! long" %} - ins_encode %{ - __ mov($dst$$Register, $src$$constant); - %} - ins_pipe(loadConL); -%} -instruct loadConL(iRegL dst, immL src) %{ - match(Set dst src); - ins_cost(DEFAULT_COST * 4); // worst case - format %{ "mov_slow $dst, $src\t! long" %} - ins_encode %{ - // FIXME: use $constanttablebase? - __ mov_slow($dst$$Register, $src$$constant); - %} - ins_pipe(loadConL); -%} -#else instruct loadConL(iRegL dst, immL src) %{ match(Set dst src); ins_cost(DEFAULT_COST * 4); @@ -5223,7 +4275,6 @@ %} ins_pipe(ialu_imm); %} -#endif instruct loadConF_imm8(regF dst, imm8F src) %{ match(Set dst src); @@ -5238,25 +4289,6 @@ ins_pipe(loadConFD); // FIXME %} -#ifdef AARCH64 -instruct loadIConF(iRegI dst, immF src) %{ - match(Set dst src); - ins_cost(DEFAULT_COST * 2); - - format %{ "MOV_SLOW $dst, $src\t! loadIConF" %} - - ins_encode %{ - // FIXME revisit once 6961697 is in - union { - jfloat f; - int i; - } v; - v.f = $src$$constant; - __ mov_slow($dst$$Register, v.i); - %} - ins_pipe(ialu_imm); -%} -#endif instruct loadConF(regF dst, immF src, iRegI tmp) %{ match(Set dst src); @@ -5324,11 +4356,7 @@ format %{ "PLDW $mem\t! Prefetch allocation" %} ins_encode %{ -#ifdef AARCH64 - __ prfm(pstl1keep, $mem$$Address); -#else __ pldw($mem$$Address); -#endif %} ins_pipe(iload_mem); %} @@ -5341,11 +4369,7 @@ format %{ "PLD $mem\t! Prefetch allocation" %} ins_encode %{ -#ifdef AARCH64 - __ prfm(pstl1keep, $mem$$Address); -#else __ pld($mem$$Address); -#endif %} ins_pipe(iload_mem); %} @@ -5378,24 +4402,6 @@ // Store Char/Short -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct storeCoff(store_RegI src, memoryScaledS mem, aimmX off, iRegP tmp) %{ - match(Set mem (StoreC (AddP mem off) src)); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - - format %{ "STRH $src,$mem+$off\t! short temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ strh($src$$Register, nmem); - %} - ins_pipe(istore_mem_reg); -%} -#endif instruct storeC(memoryS mem, store_RegI src) %{ match(Set mem (StoreC mem src)); @@ -5411,24 +4417,6 @@ // Store Integer -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct storeIoff(store_RegI src, memoryScaledI mem, aimmX off, iRegP tmp) %{ - match(Set mem (StoreI (AddP mem off) src)); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - - format %{ "str_32 $src,$mem+$off\t! int temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ str_32($src$$Register, nmem); - %} - ins_pipe(istore_mem_reg); -%} -#endif instruct storeI(memoryI mem, store_RegI src) %{ match(Set mem (StoreI mem src)); @@ -5444,31 +4432,9 @@ // Store Long -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct storeLoff(store_RegLd src, memoryScaledL mem, aimmX off, iRegP tmp) %{ - match(Set mem (StoreL (AddP mem off) src)); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - - format %{ "str_64 $src,$mem+$off\t! long temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ str_64($src$$Register, nmem); - %} - ins_pipe(istore_mem_reg); -%} -#endif instruct storeL(memoryL mem, store_RegLd src) %{ -#ifdef AARCH64 - // already atomic for Aarch64 -#else predicate(!((StoreLNode*)n)->require_atomic_access()); -#endif match(Set mem (StoreL mem src)); ins_cost(MEMORY_REF_COST); @@ -5481,7 +4447,6 @@ ins_pipe(istore_mem_reg); %} -#ifndef AARCH64 instruct storeL_2instr(memorylong mem, iRegL src) %{ predicate(!((StoreLNode*)n)->require_atomic_access()); match(Set mem (StoreL mem src)); @@ -5514,9 +4479,7 @@ %} ins_pipe(istore_mem_reg); %} -#endif // !AARCH64 -#ifndef AARCH64 instruct storeL_volatile_fp(memoryD mem, iRegL src) %{ predicate(((StoreLNode*)n)->require_atomic_access()); match(Set mem (StoreL mem src)); @@ -5530,7 +4493,6 @@ %} ins_pipe(istore_mem_reg); %} -#endif #ifdef XXX // Move SP Pointer @@ -5552,60 +4514,12 @@ %} #endif -#ifdef AARCH64 -// FIXME -// Store SP Pointer -instruct storeSP(memoryP mem, SPRegP src, iRegP tmp) %{ - match(Set mem (StoreP mem src)); - predicate(_kids[1]->_leaf->is_Proj() && _kids[1]->_leaf->as_Proj()->_con == TypeFunc::FramePtr); - // Multiple StoreP rules, different only in register mask. - // Matcher makes the last always valid. The others will - // only be valid if they cost less than the last valid - // rule. So cost(rule1) < cost(rule2) < cost(last) - // Unlike immediates, register constraints are not checked - // at match time. - ins_cost(MEMORY_REF_COST+DEFAULT_COST+4); - effect(TEMP tmp); - size(8); - - format %{ "MOV $tmp,$src\t! SP ptr\n\t" - "STR $tmp,$mem\t! SP ptr" %} - ins_encode %{ - assert($src$$Register == SP, "SP expected"); - __ mov($tmp$$Register, $src$$Register); - __ str($tmp$$Register, $mem$$Address); - %} - ins_pipe(istore_mem_spORreg); // FIXME -%} -#endif // AARCH64 // Store Pointer -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct storePoff(store_ptr_RegP src, memoryScaledP mem, aimmX off, iRegP tmp) %{ - predicate(!_kids[1]->_leaf->is_Proj() || _kids[1]->_leaf->as_Proj()->_con != TypeFunc::FramePtr); - match(Set mem (StoreP (AddP mem off) src)); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - - format %{ "STR $src,$mem+$off\t! ptr temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ str($src$$Register, nmem); - %} - ins_pipe(istore_mem_reg); -%} -#endif instruct storeP(memoryP mem, store_ptr_RegP src) %{ match(Set mem (StoreP mem src)); -#ifdef AARCH64 - predicate(!_kids[1]->_leaf->is_Proj() || _kids[1]->_leaf->as_Proj()->_con != TypeFunc::FramePtr); -#endif ins_cost(MEMORY_REF_COST); size(4); @@ -5616,42 +4530,10 @@ ins_pipe(istore_mem_spORreg); %} -#ifdef AARCH64 -// Store NULL Pointer -instruct storeP0(memoryP mem, immP0 src) %{ - match(Set mem (StoreP mem src)); - ins_cost(MEMORY_REF_COST); - size(4); - - format %{ "STR ZR,$mem\t! ptr" %} - ins_encode %{ - __ str(ZR, $mem$$Address); - %} - ins_pipe(istore_mem_spORreg); -%} -#endif // AARCH64 #ifdef _LP64 // Store Compressed Pointer -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct storeNoff(store_RegN src, memoryScaledI mem, aimmX off, iRegP tmp) %{ - match(Set mem (StoreN (AddP mem off) src)); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - - format %{ "str_32 $src,$mem+$off\t! compressed ptr temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ str_32($src$$Register, nmem); - %} - ins_pipe(istore_mem_reg); -%} -#endif instruct storeN(memoryI mem, store_RegN src) %{ match(Set mem (StoreN mem src)); @@ -5665,20 +4547,6 @@ ins_pipe(istore_mem_reg); %} -#ifdef AARCH64 -// Store NULL Pointer -instruct storeN0(memoryI mem, immN0 src) %{ - match(Set mem (StoreN mem src)); - ins_cost(MEMORY_REF_COST); - size(4); - - format %{ "str_32 ZR,$mem\t! compressed ptr" %} - ins_encode %{ - __ str_32(ZR, $mem$$Address); - %} - ins_pipe(istore_mem_reg); -%} -#endif // Store Compressed Klass Pointer instruct storeNKlass(memoryI mem, store_RegN src) %{ @@ -5696,24 +4564,6 @@ // Store Double -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct storeDoff(regD src, memoryScaledD mem, aimmX off, iRegP tmp) %{ - match(Set mem (StoreD (AddP mem off) src)); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); - - format %{ "STR $src,$mem+$off\t! double temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ str_d($src$$FloatRegister, nmem); - %} - ins_pipe(fstoreD_mem_reg); -%} -#endif instruct storeD(memoryD mem, regD src) %{ match(Set mem (StoreD mem src)); @@ -5729,132 +4579,26 @@ ins_pipe(fstoreD_mem_reg); %} -#ifdef AARCH64 -instruct movI2F(regF dst, iRegI src) %{ - match(Set dst src); - size(4); - format %{ "FMOV_sw $dst,$src\t! movI2F" %} - ins_encode %{ - __ fmov_sw($dst$$FloatRegister, $src$$Register); - %} - ins_pipe(ialu_reg); // FIXME -%} +// Store Float -instruct movF2I(iRegI dst, regF src) %{ - match(Set dst src); - size(4); - format %{ "FMOV_ws $dst,$src\t! movF2I" %} +instruct storeF( memoryF mem, regF src) %{ + match(Set mem (StoreF mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "FSTS $src,$mem" %} ins_encode %{ - __ fmov_ws($dst$$Register, $src$$FloatRegister); + __ str_float($src$$FloatRegister, $mem$$Address); %} - ins_pipe(ialu_reg); // FIXME + ins_pipe(fstoreF_mem_reg); %} -#endif -// Store Float -#ifdef AARCH64 -// XXX This variant shouldn't be necessary if 6217251 is implemented -instruct storeFoff(regF src, memoryScaledF mem, aimmX off, iRegP tmp) %{ - match(Set mem (StoreF (AddP mem off) src)); - ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free - effect(TEMP tmp); - size(4 * 2); +//----------MemBar Instructions----------------------------------------------- +// Memory barrier flavors - format %{ "str_s $src,$mem+$off\t! float temp=$tmp" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - __ add($tmp$$Register, base, $off$$constant); - Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - __ str_s($src$$FloatRegister, nmem); - %} - ins_pipe(fstoreF_mem_reg); -%} -#endif - -instruct storeF( memoryF mem, regF src) %{ - match(Set mem (StoreF mem src)); - ins_cost(MEMORY_REF_COST); - - size(4); - format %{ "FSTS $src,$mem" %} - ins_encode %{ - __ str_float($src$$FloatRegister, $mem$$Address); - %} - ins_pipe(fstoreF_mem_reg); -%} - -#ifdef AARCH64 -// Convert oop pointer into compressed form -instruct encodeHeapOop(iRegN dst, iRegP src, flagsReg ccr) %{ - predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); - match(Set dst (EncodeP src)); - effect(KILL ccr); - format %{ "encode_heap_oop $dst, $src" %} - ins_encode %{ - __ encode_heap_oop($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct encodeHeapOop_not_null(iRegN dst, iRegP src) %{ - predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); - match(Set dst (EncodeP src)); - format %{ "encode_heap_oop_not_null $dst, $src" %} - ins_encode %{ - __ encode_heap_oop_not_null($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct decodeHeapOop(iRegP dst, iRegN src, flagsReg ccr) %{ - predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull && - n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant); - match(Set dst (DecodeN src)); - effect(KILL ccr); - format %{ "decode_heap_oop $dst, $src" %} - ins_encode %{ - __ decode_heap_oop($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct decodeHeapOop_not_null(iRegP dst, iRegN src) %{ - predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull || - n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant); - match(Set dst (DecodeN src)); - format %{ "decode_heap_oop_not_null $dst, $src" %} - ins_encode %{ - __ decode_heap_oop_not_null($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct encodeKlass_not_null(iRegN dst, iRegP src) %{ - match(Set dst (EncodePKlass src)); - format %{ "encode_klass_not_null $dst, $src" %} - ins_encode %{ - __ encode_klass_not_null($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct decodeKlass_not_null(iRegP dst, iRegN src) %{ - match(Set dst (DecodeNKlass src)); - format %{ "decode_klass_not_null $dst, $src" %} - ins_encode %{ - __ decode_klass_not_null($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} -#endif // AARCH64 - -//----------MemBar Instructions----------------------------------------------- -// Memory barrier flavors - -// TODO: take advantage of Aarch64 load-acquire, store-release, etc // pattern-match out unnecessary membars instruct membar_storestore() %{ match(MemBarStoreStore); @@ -5950,53 +4694,6 @@ // %} -#ifdef AARCH64 -// 0 constant in register -instruct zrImmI0(ZRRegI dst, immI0 imm) %{ - match(Set dst imm); - size(0); - ins_cost(0); - - format %{ "! ZR (int 0)" %} - ins_encode( /*empty encoding*/ ); - ins_pipe(ialu_none); -%} - -// 0 constant in register -instruct zrImmL0(ZRRegL dst, immL0 imm) %{ - match(Set dst imm); - size(0); - ins_cost(0); - - format %{ "! ZR (long 0)" %} - ins_encode( /*empty encoding*/ ); - ins_pipe(ialu_none); -%} - -#ifdef XXX -// 0 constant in register -instruct zrImmN0(ZRRegN dst, immN0 imm) %{ - match(Set dst imm); - size(0); - ins_cost(0); - - format %{ "! ZR (compressed pointer NULL)" %} - ins_encode( /*empty encoding*/ ); - ins_pipe(ialu_none); -%} - -// 0 constant in register -instruct zrImmP0(ZRRegP dst, immP0 imm) %{ - match(Set dst imm); - size(0); - ins_cost(0); - - format %{ "! ZR (NULL)" %} - ins_encode( /*empty encoding*/ ); - ins_pipe(ialu_none); -%} -#endif -#endif // AARCH64 // Cast Index to Pointer for unsafe natives instruct castX2P(iRegX src, iRegP dst) %{ @@ -6024,7 +4721,6 @@ ins_pipe(ialu_reg); %} -#ifndef AARCH64 //----------Conditional Move--------------------------------------------------- // Conditional move instruct cmovIP_reg(cmpOpP cmp, flagsRegP pcc, iRegI dst, iRegI src) %{ @@ -6037,187 +4733,8 @@ %} ins_pipe(ialu_reg); %} -#endif - -#ifdef AARCH64 -instruct cmovI_reg3(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src1, iRegI src2) %{ - match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovL_reg3(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src1, iRegL src2) %{ - match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} -instruct cmovP_reg3(cmpOp cmp, flagsReg icc, iRegP dst, iRegP src1, iRegP src2) %{ - match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} -instruct cmovN_reg3(cmpOp cmp, flagsReg icc, iRegN dst, iRegN src1, iRegN src2) %{ - match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovIP_reg3(cmpOpP cmp, flagsRegP icc, iRegI dst, iRegI src1, iRegI src2) %{ - match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovLP_reg3(cmpOpP cmp, flagsRegP icc, iRegL dst, iRegL src1, iRegL src2) %{ - match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovPP_reg3(cmpOpP cmp, flagsRegP icc, iRegP dst, iRegP src1, iRegP src2) %{ - match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovNP_reg3(cmpOpP cmp, flagsRegP icc, iRegN dst, iRegN src1, iRegN src2) %{ - match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovIU_reg3(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src1, iRegI src2) %{ - match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovLU_reg3(cmpOpU cmp, flagsRegU icc, iRegL dst, iRegL src1, iRegL src2) %{ - match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovPU_reg3(cmpOpU cmp, flagsRegU icc, iRegP dst, iRegP src1, iRegP src2) %{ - match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovNU_reg3(cmpOpU cmp, flagsRegU icc, iRegN dst, iRegN src1, iRegN src2) %{ - match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovIZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, iRegI src1, iRegI src2) %{ - match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovLZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, iRegL src1, iRegL src2) %{ - match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovPZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, iRegP src1, iRegP src2) %{ - match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} - -instruct cmovNZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegN dst, iRegN src1, iRegN src2) %{ - match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %} - ins_encode %{ - __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} -#endif // AARCH64 - -#ifndef AARCH64 instruct cmovIP_immMov(cmpOpP cmp, flagsRegP pcc, iRegI dst, immIMov src) %{ match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src))); ins_cost(140); @@ -6239,7 +4756,6 @@ %} ins_pipe(ialu_imm); %} -#endif instruct cmovI_reg(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src) %{ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); @@ -6252,20 +4768,7 @@ ins_pipe(ialu_reg); %} -#ifdef AARCH64 -instruct cmovL_reg(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src) %{ - match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); - ins_cost(150); - size(4); - format %{ "MOV$cmp $dst,$src\t! long" %} - ins_encode %{ - __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(ialu_reg); -%} -#endif -#ifndef AARCH64 instruct cmovI_immMov(cmpOp cmp, flagsReg icc, iRegI dst, immIMov src) %{ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); ins_cost(140); @@ -6287,7 +4790,6 @@ %} ins_pipe(ialu_imm); %} -#endif instruct cmovII_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, iRegI src) %{ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); @@ -6304,7 +4806,6 @@ ins_pipe(ialu_reg); %} -#ifndef AARCH64 instruct cmovII_immMov_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, immIMov src) %{ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || @@ -6334,7 +4835,6 @@ %} ins_pipe(ialu_imm); %} -#endif instruct cmovIIu_reg(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src) %{ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); @@ -6347,7 +4847,6 @@ ins_pipe(ialu_reg); %} -#ifndef AARCH64 instruct cmovIIu_immMov(cmpOpU cmp, flagsRegU icc, iRegI dst, immIMov src) %{ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); ins_cost(140); @@ -6369,7 +4868,6 @@ %} ins_pipe(ialu_imm); %} -#endif // Conditional move instruct cmovPP_reg(cmpOpP cmp, flagsRegP pcc, iRegP dst, iRegP src) %{ @@ -6387,17 +4885,9 @@ match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src))); ins_cost(140); size(4); -#ifdef AARCH64 - format %{ "MOV$cmp $dst,ZR" %} -#else format %{ "MOV$cmp $dst,$src" %} -#endif ins_encode %{ -#ifdef AARCH64 - __ mov($dst$$Register, ZR, (AsmCondition)($cmp$$cmpcode)); -#else __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); -#endif %} ins_pipe(ialu_imm); %} @@ -6444,160 +4934,45 @@ %} instruct cmovPI_imm(cmpOp cmp, flagsReg icc, iRegP dst, immP0 src) %{ - match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); - ins_cost(140); - - size(4); -#ifdef AARCH64 - format %{ "MOV$cmp $dst,ZR\t! ptr" %} -#else - format %{ "MOV$cmp $dst,$src\t! ptr" %} -#endif - ins_encode %{ -#ifdef AARCH64 - __ mov($dst$$Register, ZR, (AsmCondition)($cmp$$cmpcode)); -#else - __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); -#endif - %} - ins_pipe(ialu_imm); -%} - -instruct cmovPI_imm_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, immP0 src) %{ - match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); - predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || - _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || - _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || - _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); - ins_cost(140); - - size(4); -#ifdef AARCH64 - format %{ "MOV$cmp $dst,ZR\t! ptr" %} -#else - format %{ "MOV$cmp $dst,$src\t! ptr" %} -#endif - ins_encode %{ -#ifdef AARCH64 - __ mov($dst$$Register, ZR, (AsmCondition)($cmp$$cmpcode)); -#else - __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); -#endif - %} - ins_pipe(ialu_imm); -%} - -instruct cmovPIu_imm(cmpOpU cmp, flagsRegU icc, iRegP dst, immP0 src) %{ - match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); - ins_cost(140); - - size(4); -#ifdef AARCH64 - format %{ "MOV$cmp $dst,ZR\t! ptr" %} -#else - format %{ "MOV$cmp $dst,$src\t! ptr" %} -#endif - ins_encode %{ -#ifdef AARCH64 - __ mov($dst$$Register, ZR, (AsmCondition)($cmp$$cmpcode)); -#else - __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); -#endif - %} - ins_pipe(ialu_imm); -%} - -#ifdef AARCH64 -// Conditional move -instruct cmovF_reg(cmpOp cmp, flagsReg icc, regF dst, regF src1, regF src2) %{ - match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %} - ins_encode %{ - __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(int_conditional_float_move); -%} - -instruct cmovD_reg(cmpOp cmp, flagsReg icc, regD dst, regD src1, regD src2) %{ - match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %} - ins_encode %{ - __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(int_conditional_float_move); -%} - -instruct cmovFP_reg(cmpOpP cmp, flagsRegP icc, regF dst, regF src1, regF src2) %{ - match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %} - ins_encode %{ - __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(int_conditional_float_move); -%} - -instruct cmovDP_reg(cmpOpP cmp, flagsRegP icc, regD dst, regD src1, regD src2) %{ - match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %} - ins_encode %{ - __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(int_conditional_float_move); -%} - -instruct cmovFU_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src1, regF src2) %{ - match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); - size(4); - format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %} - ins_encode %{ - __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); - %} - ins_pipe(int_conditional_float_move); -%} + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(140); -instruct cmovDU_reg(cmpOpU cmp, flagsRegU icc, regD dst, regD src1, regD src2) %{ - match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); size(4); - format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %} + format %{ "MOV$cmp $dst,$src\t! ptr" %} ins_encode %{ - __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); %} - ins_pipe(int_conditional_float_move); + ins_pipe(ialu_imm); %} -instruct cmovFZ_reg(cmpOp0 cmp, flagsReg_EQNELTGE icc, regF dst, regF src1, regF src2) %{ - match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); +instruct cmovPI_imm_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(140); + size(4); - format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %} + format %{ "MOV$cmp $dst,$src\t! ptr" %} ins_encode %{ - __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); %} - ins_pipe(int_conditional_float_move); + ins_pipe(ialu_imm); %} -instruct cmovDZ_reg(cmpOp0 cmp, flagsReg_EQNELTGE icc, regD dst, regD src1, regD src2) %{ - match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1))); - ins_cost(150); +instruct cmovPIu_imm(cmpOpU cmp, flagsRegU icc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(140); + size(4); - format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %} + format %{ "MOV$cmp $dst,$src\t! ptr" %} ins_encode %{ - __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); %} - ins_pipe(int_conditional_float_move); + ins_pipe(ialu_imm); %} -#else // !AARCH64 // Conditional move instruct cmovFP_reg(cmpOpP cmp, flagsRegP pcc, regF dst, regF src) %{ @@ -6858,7 +5233,6 @@ %} ins_pipe(ialu_reg); %} -#endif // !AARCH64 //----------OS and Locking Instructions---------------------------------------- @@ -6915,7 +5289,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct addshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (AddI (LShiftI src1 src2) src3)); @@ -6926,22 +5299,7 @@ %} ins_pipe(ialu_reg_reg); %} -#endif - -#ifdef AARCH64 -#ifdef TODO -instruct addshlL_reg_imm_reg(iRegL dst, iRegL src1, immU6 src2, iRegL src3) %{ - match(Set dst (AddL (LShiftL src1 src2) src3)); - size(4); - format %{ "ADD $dst,$src3,$src1<<$src2\t! long" %} - ins_encode %{ - __ add($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsl, $src2$$constant)); - %} - ins_pipe(ialu_reg_reg); -%} -#endif -#endif instruct addshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ match(Set dst (AddI (LShiftI src1 src2) src3)); @@ -6954,7 +5312,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct addsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (AddI (RShiftI src1 src2) src3)); @@ -6965,7 +5322,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct addsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ match(Set dst (AddI (RShiftI src1 src2) src3)); @@ -6978,7 +5334,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct addshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (AddI (URShiftI src1 src2) src3)); @@ -6989,7 +5344,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct addshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ match(Set dst (AddI (URShiftI src1 src2) src3)); @@ -7026,69 +5380,6 @@ ins_pipe(ialu_reg_reg); %} -#ifdef AARCH64 -// unshifted I2L operand -operand unshiftedI2L(iRegI src2) %{ -//constraint(ALLOC_IN_RC(sp_ptr_reg)); - match(ConvI2L src2); - - op_cost(1); - format %{ "$src2.w" %} - interface(MEMORY_INTER) %{ - base($src2); - index(0xff); - scale(0x0); - disp(0x0); - %} -%} - -// shifted I2L operand -operand shiftedI2L(iRegI src2, immI_0_4 src3) %{ -//constraint(ALLOC_IN_RC(sp_ptr_reg)); - match(LShiftX (ConvI2L src2) src3); - - op_cost(1); - format %{ "$src2.w << $src3" %} - interface(MEMORY_INTER) %{ - base($src2); - index(0xff); - scale($src3); - disp(0x0); - %} -%} - -opclass shiftedRegI(shiftedI2L, unshiftedI2L); - -instruct shlL_reg_regI(iRegL dst, iRegI src1, immU6 src2) %{ - match(Set dst (LShiftL (ConvI2L src1) src2)); - - size(4); - format %{ "LSL $dst,$src1.w,$src2\t! ptr" %} - ins_encode %{ - int c = $src2$$constant; - int r = 64 - c; - int s = 31; - if (s >= r) { - s = r - 1; - } - __ sbfm($dst$$Register, $src1$$Register, r, s); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct addP_reg_regI(iRegP dst, iRegP src1, shiftedRegI src2) %{ - match(Set dst (AddP src1 src2)); - - ins_cost(DEFAULT_COST * 3/2); - size(4); - format %{ "ADD $dst,$src1,$src2, sxtw\t! ptr" %} - ins_encode %{ - Register base = reg_to_register_object($src2$$base); - __ add($dst$$Register, $src1$$Register, base, ex_sxtw, $src2$$scale); - %} - ins_pipe(ialu_reg_reg); -%} -#endif // shifted iRegX operand operand shiftedX(iRegX src2, shimmX src3) %{ @@ -7131,30 +5422,6 @@ %} // Long Addition -#ifdef AARCH64 -instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ - match(Set dst (AddL src1 src2)); - size(4); - format %{ "ADD $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ add($dst$$Register, $src1$$Register, $src2$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct addL_reg_regI(iRegL dst, iRegL src1, shiftedRegI src2) %{ - match(Set dst (AddL src1 src2)); - - ins_cost(DEFAULT_COST * 3/2); - size(4); - format %{ "ADD $dst,$src1,$src2, sxtw\t! long" %} - ins_encode %{ - Register base = reg_to_register_object($src2$$base); - __ add($dst$$Register, $src1$$Register, base, ex_sxtw, $src2$$scale); - %} - ins_pipe(ialu_reg_reg); -%} -#else instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg ccr) %{ match(Set dst (AddL src1 src2)); effect(KILL ccr); @@ -7167,36 +5434,9 @@ %} ins_pipe(ialu_reg_reg); %} -#endif - -#ifdef AARCH64 -// Immediate Addition -instruct addL_reg_aimm(iRegL dst, iRegL src1, aimmL src2) %{ - match(Set dst (AddL src1 src2)); - - size(4); - format %{ "ADD $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ add($dst$$Register, $src1$$Register, $src2$$constant); - %} - ins_pipe(ialu_reg_imm); -%} - -instruct addL_reg_immLneg(iRegL dst, iRegL src1, aimmLneg src2) %{ - match(Set dst (SubL src1 src2)); - size(4); - format %{ "ADD $dst,$src1,-($src2)\t! long" %} - ins_encode %{ - __ add($dst$$Register, $src1$$Register, -$src2$$constant); - %} - ins_pipe(ialu_reg_imm); -%} -#else // TODO -#endif -#ifndef AARCH64 // TODO: try immLRot2 instead, (0, $con$$constant) becomes // (hi($con$$constant), lo($con$$constant)) becomes instruct addL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg ccr) %{ @@ -7211,26 +5451,19 @@ %} ins_pipe(ialu_reg_imm); %} -#endif //----------Conditional_store-------------------------------------------------- // Conditional-store of the updated heap-top. // Used during allocation of the shared heap. // Sets flags (EQ) on success. -// TODO: optimize out barriers with AArch64 load-acquire/store-release // LoadP-locked. instruct loadPLocked(iRegP dst, memoryex mem) %{ match(Set dst (LoadPLocked mem)); size(4); format %{ "LDREX $dst,$mem" %} ins_encode %{ -#ifdef AARCH64 - Register base = reg_to_register_object($mem$$base); - __ ldxr($dst$$Register, base); -#else __ ldrex($dst$$Register,$mem$$Address); -#endif %} ins_pipe(iload_mem); %} @@ -7243,12 +5476,7 @@ format %{ "STREX $tmp,$newval,$heap_top_ptr\n\t" "CMP $tmp, 0" %} ins_encode %{ -#ifdef AARCH64 - Register base = reg_to_register_object($heap_top_ptr$$base); - __ stxr($tmp$$Register, $newval$$Register, base); -#else __ strex($tmp$$Register, $newval$$Register, $heap_top_ptr$$Address); -#endif __ cmp($tmp$$Register, 0); %} ins_pipe( long_memory_op ); @@ -7256,20 +5484,6 @@ // Conditional-store of an intx value. instruct storeXConditional( memoryex mem, iRegX oldval, iRegX newval, iRegX tmp, flagsReg icc ) %{ -#ifdef AARCH64 - match(Set icc (StoreLConditional mem (Binary oldval newval))); - effect( TEMP tmp ); - size(28); - format %{ "loop:\n\t" - "LDXR $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem], DOESN'T set $newval=[$mem] in any case\n\t" - "SUBS $tmp, $tmp, $oldval\n\t" - "B.ne done\n\t" - "STXR $tmp, $newval, $mem\n\t" - "CBNZ_w $tmp, loop\n\t" - "CMP $tmp, 0\n\t" - "done:\n\t" - "membar LoadStore|LoadLoad" %} -#else match(Set icc (StoreIConditional mem (Binary oldval newval))); effect( TEMP tmp ); size(28); @@ -7281,29 +5495,15 @@ "B.eq loop \n\t" "TEQ $tmp, 0\n\t" "membar LoadStore|LoadLoad" %} -#endif ins_encode %{ Label loop; __ bind(loop); -#ifdef AARCH64 -// FIXME: use load-acquire/store-release, remove membar? - Label done; - Register base = reg_to_register_object($mem$$base); - __ ldxr($tmp$$Register, base); - __ subs($tmp$$Register, $tmp$$Register, $oldval$$Register); - __ b(done, ne); - __ stxr($tmp$$Register, $newval$$Register, base); - __ cbnz_w($tmp$$Register, loop); - __ cmp($tmp$$Register, 0); - __ bind(done); -#else __ ldrex($tmp$$Register, $mem$$Address); __ eors($tmp$$Register, $tmp$$Register, $oldval$$Register); __ strex($tmp$$Register, $newval$$Register, $mem$$Address, eq); __ cmp($tmp$$Register, 1, eq); __ b(loop, eq); __ teq($tmp$$Register, 0); -#endif // used by biased locking only. Requires a membar. __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadStore | MacroAssembler::LoadLoad), noreg); %} @@ -7312,118 +5512,6 @@ // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them -#ifdef AARCH64 -// TODO: if combined with membar, elide membar and use -// load-acquire/store-release if appropriate -instruct compareAndSwapL_bool(memoryex mem, iRegL oldval, iRegL newval, iRegI res, iRegI tmp, flagsReg ccr) %{ - match(Set res (CompareAndSwapL mem (Binary oldval newval))); - effect( KILL ccr, TEMP tmp); - size(24); - format %{ "loop:\n\t" - "LDXR $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" - "CMP $tmp, $oldval\n\t" - "B.ne done\n\t" - "STXR $tmp, $newval, $mem\n\t" - "CBNZ_w $tmp, loop\n\t" - "done:\n\t" - "CSET_w $res, eq" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - Label loop, done; - __ bind(loop); - __ ldxr($tmp$$Register, base); - __ cmp($tmp$$Register, $oldval$$Register); - __ b(done, ne); - __ stxr($tmp$$Register, $newval$$Register, base); - __ cbnz_w($tmp$$Register, loop); - __ bind(done); - __ cset_w($res$$Register, eq); - %} - ins_pipe( long_memory_op ); -%} - -instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr) %{ - match(Set res (CompareAndSwapI mem (Binary oldval newval))); - effect( KILL ccr, TEMP tmp); - size(24); - format %{ "loop:\n\t" - "LDXR_w $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" - "CMP_w $tmp, $oldval\n\t" - "B.ne done\n\t" - "STXR_w $tmp, $newval, $mem\n\t" - "CBNZ_w $tmp, loop\n\t" - "done:\n\t" - "CSET_w $res, eq" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - Label loop, done; - __ bind(loop); - __ ldxr_w($tmp$$Register, base); - __ cmp_w($tmp$$Register, $oldval$$Register); - __ b(done, ne); - __ stxr_w($tmp$$Register, $newval$$Register, base); - __ cbnz_w($tmp$$Register, loop); - __ bind(done); - __ cset_w($res$$Register, eq); - %} - ins_pipe( long_memory_op ); -%} - -// tmp must use iRegI instead of iRegN until 8051805 is fixed. -instruct compareAndSwapN_bool(memoryex mem, iRegN oldval, iRegN newval, iRegI res, iRegI tmp, flagsReg ccr) %{ - match(Set res (CompareAndSwapN mem (Binary oldval newval))); - effect( KILL ccr, TEMP tmp); - size(24); - format %{ "loop:\n\t" - "LDXR_w $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" - "CMP_w $tmp, $oldval\n\t" - "B.ne done\n\t" - "STXR_w $tmp, $newval, $mem\n\t" - "CBNZ_w $tmp, loop\n\t" - "done:\n\t" - "CSET_w $res, eq" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - Label loop, done; - __ bind(loop); - __ ldxr_w($tmp$$Register, base); - __ cmp_w($tmp$$Register, $oldval$$Register); - __ b(done, ne); - __ stxr_w($tmp$$Register, $newval$$Register, base); - __ cbnz_w($tmp$$Register, loop); - __ bind(done); - __ cset_w($res$$Register, eq); - %} - ins_pipe( long_memory_op ); -%} - -instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr) %{ - match(Set res (CompareAndSwapP mem (Binary oldval newval))); - effect( KILL ccr, TEMP tmp); - size(24); - format %{ "loop:\n\t" - "LDXR $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" - "CMP $tmp, $oldval\n\t" - "B.ne done\n\t" - "STXR $tmp, $newval, $mem\n\t" - "CBNZ_w $tmp, loop\n\t" - "done:\n\t" - "CSET_w $res, eq" %} - ins_encode %{ - Register base = reg_to_register_object($mem$$base); - Label loop, done; - __ bind(loop); - __ ldxr($tmp$$Register, base); - __ cmp($tmp$$Register, $oldval$$Register); - __ b(done, ne); - __ stxr($tmp$$Register, $newval$$Register, base); - __ cbnz_w($tmp$$Register, loop); - __ bind(done); - __ cset_w($res$$Register, eq); - %} - ins_pipe( long_memory_op ); -%} -#else // !AARCH64 instruct compareAndSwapL_bool(memoryex mem, iRegL oldval, iRegLd newval, iRegI res, iRegLd tmp, flagsReg ccr ) %{ match(Set res (CompareAndSwapL mem (Binary oldval newval))); effect( KILL ccr, TEMP tmp); @@ -7506,32 +5594,7 @@ %} ins_pipe( long_memory_op ); %} -#endif // !AARCH64 - -#ifdef AARCH64 -instruct xaddI_aimmI_no_res(memoryex mem, aimmI add, Universe dummy, iRegI tmp1, iRegI tmp2) %{ - predicate(n->as_LoadStore()->result_not_used()); - match(Set dummy (GetAndAddI mem add)); - effect(TEMP tmp1, TEMP tmp2); - size(16); - format %{ "loop:\n\t" - "LDXR_w $tmp1, $mem\n\t" - "ADD_w $tmp1, $tmp1, $add\n\t" - "STXR_w $tmp2, $tmp1, $mem\n\t" - "CBNZ_w $tmp2, loop" %} - ins_encode %{ - Label loop; - Register base = reg_to_register_object($mem$$base); - __ bind(loop); - __ ldxr_w($tmp1$$Register, base); - __ add_w($tmp1$$Register, $tmp1$$Register, $add$$constant); - __ stxr_w($tmp2$$Register, $tmp1$$Register, base); - __ cbnz_w($tmp2$$Register, loop); - %} - ins_pipe( long_memory_op ); -%} -#else instruct xaddI_aimmI_no_res(memoryex mem, aimmI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ predicate(n->as_LoadStore()->result_not_used()); match(Set dummy (GetAndAddI mem add)); @@ -7555,32 +5618,7 @@ %} ins_pipe( long_memory_op ); %} -#endif - -#ifdef AARCH64 -instruct xaddI_reg_no_res(memoryex mem, iRegI add, Universe dummy, iRegI tmp1, iRegI tmp2) %{ - predicate(n->as_LoadStore()->result_not_used()); - match(Set dummy (GetAndAddI mem add)); - effect(TEMP tmp1, TEMP tmp2); - size(16); - format %{ "loop:\n\t" - "LDXR_w $tmp1, $mem\n\t" - "ADD_w $tmp1, $tmp1, $add\n\t" - "STXR_w $tmp2, $tmp1, $mem\n\t" - "CBNZ_w $tmp2, loop" %} - ins_encode %{ - Label loop; - Register base = reg_to_register_object($mem$$base); - __ bind(loop); - __ ldxr_w($tmp1$$Register, base); - __ add_w($tmp1$$Register, $tmp1$$Register, $add$$Register); - __ stxr_w($tmp2$$Register, $tmp1$$Register, base); - __ cbnz_w($tmp2$$Register, loop); - %} - ins_pipe( long_memory_op ); -%} -#else instruct xaddI_reg_no_res(memoryex mem, iRegI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ predicate(n->as_LoadStore()->result_not_used()); match(Set dummy (GetAndAddI mem add)); @@ -7604,31 +5642,7 @@ %} ins_pipe( long_memory_op ); %} -#endif - -#ifdef AARCH64 -instruct xaddI_aimmI(memoryex mem, aimmI add, iRegI res, iRegI tmp1, iRegI tmp2) %{ - match(Set res (GetAndAddI mem add)); - effect(TEMP tmp1, TEMP tmp2, TEMP res); - size(16); - format %{ "loop:\n\t" - "LDXR_w $res, $mem\n\t" - "ADD_w $tmp1, $res, $add\n\t" - "STXR_w $tmp2, $tmp1, $mem\n\t" - "CBNZ_w $tmp2, loop" %} - ins_encode %{ - Label loop; - Register base = reg_to_register_object($mem$$base); - __ bind(loop); - __ ldxr_w($res$$Register, base); - __ add_w($tmp1$$Register, $res$$Register, $add$$constant); - __ stxr_w($tmp2$$Register, $tmp1$$Register, base); - __ cbnz_w($tmp2$$Register, loop); - %} - ins_pipe( long_memory_op ); -%} -#else instruct xaddI_aimmI(memoryex mem, aimmI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ match(Set res (GetAndAddI mem add)); effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res); @@ -7651,31 +5665,7 @@ %} ins_pipe( long_memory_op ); %} -#endif - -#ifdef AARCH64 -instruct xaddI_reg(memoryex mem, iRegI add, iRegI res, iRegI tmp1, iRegI tmp2) %{ - match(Set res (GetAndAddI mem add)); - effect(TEMP tmp1, TEMP tmp2, TEMP res); - size(16); - format %{ "loop:\n\t" - "LDXR_w $res, $mem\n\t" - "ADD_w $tmp1, $res, $add\n\t" - "STXR_w $tmp2, $tmp1, $mem\n\t" - "CBNZ_w $tmp2, loop" %} - ins_encode %{ - Label loop; - Register base = reg_to_register_object($mem$$base); - __ bind(loop); - __ ldxr_w($res$$Register, base); - __ add_w($tmp1$$Register, $res$$Register, $add$$Register); - __ stxr_w($tmp2$$Register, $tmp1$$Register, base); - __ cbnz_w($tmp2$$Register, loop); - %} - ins_pipe( long_memory_op ); -%} -#else instruct xaddI_reg(memoryex mem, iRegI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ match(Set res (GetAndAddI mem add)); effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res); @@ -7690,40 +5680,15 @@ ins_encode %{ Label loop; __ bind(loop); - __ ldrex($res$$Register,$mem$$Address); - __ add($tmp1$$Register, $res$$Register, $add$$Register); - __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address); - __ cmp($tmp2$$Register, 0); - __ b(loop, ne); - %} - ins_pipe( long_memory_op ); -%} -#endif - -#ifdef AARCH64 -instruct xaddL_reg_no_res(memoryex mem, iRegL add, Universe dummy, iRegL tmp1, iRegI tmp2) %{ - predicate(n->as_LoadStore()->result_not_used()); - match(Set dummy (GetAndAddL mem add)); - effect(TEMP tmp1, TEMP tmp2); - size(16); - format %{ "loop:\n\t" - "LDXR $tmp1, $mem\n\t" - "ADD $tmp1, $tmp1, $add\n\t" - "STXR $tmp2, $tmp1, $mem\n\t" - "CBNZ_w $tmp2, loop" %} - - ins_encode %{ - Label loop; - Register base = reg_to_register_object($mem$$base); - __ bind(loop); - __ ldxr($tmp1$$Register, base); - __ add($tmp1$$Register, $tmp1$$Register, $add$$Register); - __ stxr($tmp2$$Register, $tmp1$$Register, base); - __ cbnz_w($tmp2$$Register, loop); + __ ldrex($res$$Register,$mem$$Address); + __ add($tmp1$$Register, $res$$Register, $add$$Register); + __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, ne); %} ins_pipe( long_memory_op ); %} -#else + instruct xaddL_reg_no_res(memoryex mem, iRegL add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{ predicate(n->as_LoadStore()->result_not_used()); match(Set dummy (GetAndAddL mem add)); @@ -7749,32 +5714,7 @@ %} ins_pipe( long_memory_op ); %} -#endif - -#ifdef AARCH64 -instruct xaddL_imm_no_res(memoryex mem, aimmL add, Universe dummy, iRegL tmp1, iRegI tmp2) %{ - predicate(n->as_LoadStore()->result_not_used()); - match(Set dummy (GetAndAddL mem add)); - effect(TEMP tmp1, TEMP tmp2); - size(16); - format %{ "loop:\n\t" - "LDXR $tmp1, $mem\n\t" - "ADD $tmp1, $tmp1, $add\n\t" - "STXR $tmp2, $tmp1, $mem\n\t" - "CBNZ_w $tmp2, loop" %} - ins_encode %{ - Label loop; - Register base = reg_to_register_object($mem$$base); - __ bind(loop); - __ ldxr($tmp1$$Register, base); - __ add($tmp1$$Register, $tmp1$$Register, $add$$constant); - __ stxr($tmp2$$Register, $tmp1$$Register, base); - __ cbnz_w($tmp2$$Register, loop); - %} - ins_pipe( long_memory_op ); -%} -#else // TODO: try immLRot2 instead, (0, $con$$constant) becomes // (hi($con$$constant), lo($con$$constant)) becomes instruct xaddL_immRot_no_res(memoryex mem, immLlowRot add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{ @@ -7802,31 +5742,7 @@ %} ins_pipe( long_memory_op ); %} -#endif -#ifdef AARCH64 -instruct xaddL_reg(memoryex mem, iRegL add, iRegL res, iRegL tmp1, iRegI tmp2) %{ - match(Set res (GetAndAddL mem add)); - effect(TEMP tmp1, TEMP tmp2, TEMP res); - size(16); - format %{ "loop:\n\t" - "LDXR $res, $mem\n\t" - "ADD $tmp1, $res, $add\n\t" - "STXR $tmp2, $tmp1, $mem\n\t" - "CBNZ_w $tmp2, loop" %} - - ins_encode %{ - Label loop; - Register base = reg_to_register_object($mem$$base); - __ bind(loop); - __ ldxr($res$$Register, base); - __ add($tmp1$$Register, $res$$Register, $add$$Register); - __ stxr($tmp2$$Register, $tmp1$$Register, base); - __ cbnz_w($tmp2$$Register, loop); - %} - ins_pipe( long_memory_op ); -%} -#else instruct xaddL_reg(memoryex mem, iRegL add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{ match(Set res (GetAndAddL mem add)); effect( KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res); @@ -7851,31 +5767,7 @@ %} ins_pipe( long_memory_op ); %} -#endif - -#ifdef AARCH64 -instruct xaddL_imm(memoryex mem, aimmL add, iRegL res, iRegL tmp1, iRegI tmp2) %{ - match(Set res (GetAndAddL mem add)); - effect(TEMP tmp1, TEMP tmp2, TEMP res); - size(16); - format %{ "loop:\n\t" - "LDXR $res, $mem\n\t" - "ADD $tmp1, $res, $add\n\t" - "STXR $tmp2, $tmp1, $mem\n\t" - "CBNZ_w $tmp2, loop" %} - ins_encode %{ - Label loop; - Register base = reg_to_register_object($mem$$base); - __ bind(loop); - __ ldxr($res$$Register, base); - __ add($tmp1$$Register, $res$$Register, $add$$constant); - __ stxr($tmp2$$Register, $tmp1$$Register, base); - __ cbnz_w($tmp2$$Register, loop); - %} - ins_pipe( long_memory_op ); -%} -#else // TODO: try immLRot2 instead, (0, $con$$constant) becomes // (hi($con$$constant), lo($con$$constant)) becomes instruct xaddL_immRot(memoryex mem, immLlowRot add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{ @@ -7902,52 +5794,7 @@ %} ins_pipe( long_memory_op ); %} -#endif - -#ifdef AARCH64 -instruct xchgI(memoryex mem, iRegI newval, iRegI res, iRegI tmp) %{ - match(Set res (GetAndSetI mem newval)); - effect(TEMP tmp, TEMP res); - size(12); - format %{ "loop:\n\t" - "LDXR_w $res, $mem\n\t" - "STXR_w $tmp, $newval, $mem\n\t" - "CBNZ_w $tmp, loop" %} - - ins_encode %{ - Label loop; - Register base = reg_to_register_object($mem$$base); - __ bind(loop); - __ ldxr_w($res$$Register, base); - __ stxr_w($tmp$$Register, $newval$$Register, base); - __ cbnz_w($tmp$$Register, loop); - %} - ins_pipe( long_memory_op ); -%} - -#ifdef XXX -// Disabled until 8051805 is fixed. -instruct xchgN(memoryex mem, iRegN newval, iRegN res, iRegN tmp) %{ - match(Set res (GetAndSetN mem newval)); - effect(TEMP tmp, TEMP res); - size(12); - format %{ "loop:\n\t" - "LDXR_w $res, $mem\n\t" - "STXR_w $tmp, $newval, $mem\n\t" - "CBNZ_w $tmp, loop" %} - ins_encode %{ - Label loop; - Register base = reg_to_register_object($mem$$base); - __ bind(loop); - __ ldxr_w($res$$Register, base); - __ stxr_w($tmp$$Register, $newval$$Register, base); - __ cbnz_w($tmp$$Register, loop); - %} - ins_pipe( long_memory_op ); -%} -#endif -#else instruct xchgI(memoryex mem, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr) %{ match(Set res (GetAndSetI mem newval)); effect(KILL ccr, TEMP tmp, TEMP res); @@ -7968,29 +5815,7 @@ %} ins_pipe( long_memory_op ); %} -#endif -#ifdef AARCH64 -instruct xchgL(memoryex mem, iRegL newval, iRegL res, iRegI tmp) %{ - match(Set res (GetAndSetL mem newval)); - effect(TEMP tmp, TEMP res); - size(12); - format %{ "loop:\n\t" - "LDXR $res, $mem\n\t" - "STXR $tmp, $newval, $mem\n\t" - "CBNZ_w $tmp, loop" %} - - ins_encode %{ - Label loop; - Register base = reg_to_register_object($mem$$base); - __ bind(loop); - __ ldxr($res$$Register, base); - __ stxr($tmp$$Register, $newval$$Register, base); - __ cbnz_w($tmp$$Register, loop); - %} - ins_pipe( long_memory_op ); -%} -#else instruct xchgL(memoryex mem, iRegLd newval, iRegLd res, iRegI tmp, flagsReg ccr) %{ match(Set res (GetAndSetL mem newval)); effect( KILL ccr, TEMP tmp, TEMP res); @@ -8011,29 +5836,7 @@ %} ins_pipe( long_memory_op ); %} -#endif // !AARCH64 - -#ifdef AARCH64 -instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp) %{ - match(Set res (GetAndSetP mem newval)); - effect(TEMP tmp, TEMP res); - size(12); - format %{ "loop:\n\t" - "LDREX $res, $mem\n\t" - "STREX $tmp, $newval, $mem\n\t" - "CBNZ_w $tmp, loop" %} - ins_encode %{ - Label loop; - Register base = reg_to_register_object($mem$$base); - __ bind(loop); - __ ldrex($res$$Register, base); - __ strex($tmp$$Register, $newval$$Register, base); - __ cbnz_w($tmp$$Register, loop); - %} - ins_pipe( long_memory_op ); -%} -#else instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp, flagsReg ccr) %{ match(Set res (GetAndSetP mem newval)); effect(KILL ccr, TEMP tmp, TEMP res); @@ -8054,7 +5857,6 @@ %} ins_pipe( long_memory_op ); %} -#endif // !AARCH64 //--------------------- // Subtraction Instructions @@ -8070,7 +5872,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct subshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (SubI src1 (LShiftI src2 src3))); @@ -8081,7 +5882,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct subshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ match(Set dst (SubI src1 (LShiftI src2 src3))); @@ -8094,7 +5894,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct subsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (SubI src1 (RShiftI src2 src3))); @@ -8105,7 +5904,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct subsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ match(Set dst (SubI src1 (RShiftI src2 src3))); @@ -8118,7 +5916,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct subshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (SubI src1 (URShiftI src2 src3))); @@ -8129,7 +5926,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct subshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ match(Set dst (SubI src1 (URShiftI src2 src3))); @@ -8142,7 +5938,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct rsbshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (SubI (LShiftI src1 src2) src3)); @@ -8208,7 +6003,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif // Immediate Subtraction instruct subI_reg_aimmI(iRegI dst, iRegI src1, aimmI src2) %{ @@ -8233,7 +6027,6 @@ ins_pipe(ialu_reg_imm); %} -#ifndef AARCH64 instruct subI_immRot_reg(iRegI dst, immIRot src1, iRegI src2) %{ match(Set dst (SubI src1 src2)); @@ -8244,21 +6037,8 @@ %} ins_pipe(ialu_zero_reg); %} -#endif // Register Subtraction -#ifdef AARCH64 -instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ - match(Set dst (SubL src1 src2)); - - size(4); - format %{ "SUB $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ sub($dst$$Register, $src1$$Register, $src2$$Register); - %} - ins_pipe(ialu_reg_reg); -%} -#else instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg icc ) %{ match(Set dst (SubL src1 src2)); effect (KILL icc); @@ -8272,36 +6052,9 @@ %} ins_pipe(ialu_reg_reg); %} -#endif - -#ifdef AARCH64 -// Immediate Subtraction -instruct subL_reg_aimm(iRegL dst, iRegL src1, aimmL src2) %{ - match(Set dst (SubL src1 src2)); - - size(4); - format %{ "SUB $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ sub($dst$$Register, $src1$$Register, $src2$$constant); - %} - ins_pipe(ialu_reg_imm); -%} - -instruct subL_reg_immLneg(iRegL dst, iRegL src1, aimmLneg src2) %{ - match(Set dst (AddL src1 src2)); - size(4); - format %{ "SUB $dst,$src1,-($src2)\t! long" %} - ins_encode %{ - __ sub($dst$$Register, $src1$$Register, -$src2$$constant); - %} - ins_pipe(ialu_reg_imm); -%} -#else // TODO -#endif -#ifndef AARCH64 // Immediate Subtraction // TODO: try immLRot2 instead, (0, $con$$constant) becomes // (hi($con$$constant), lo($con$$constant)) becomes @@ -8333,7 +6086,6 @@ %} ins_pipe(ialu_zero_reg); %} -#endif // !AARCH64 // Multiplication Instructions // Integer Multiplication @@ -8349,17 +6101,6 @@ ins_pipe(imul_reg_reg); %} -#ifdef AARCH64 -instruct mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ - match(Set dst (MulL src1 src2)); - size(4); - format %{ "MUL $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ mul($dst$$Register, $src1$$Register, $src2$$Register); - %} - ins_pipe(imul_reg_reg); -%} -#else instruct mulL_lo1_hi2(iRegL dst, iRegL src1, iRegL src2) %{ effect(DEF dst, USE src1, USE src2); size(4); @@ -8401,22 +6142,9 @@ mulL_lo1_lo2(dst, src1, src2); %} %} -#endif // !AARCH64 // Integer Division // Register Division -#ifdef AARCH64 -instruct divI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ - match(Set dst (DivI src1 src2)); - - size(4); - format %{ "SDIV $dst,$src1,$src2\t! 32-bit" %} - ins_encode %{ - __ sdiv_w($dst$$Register, $src1$$Register, $src2$$Register); - %} - ins_pipe(ialu_reg_reg); // FIXME -%} -#else instruct divI_reg_reg(R1RegI dst, R0RegI src1, R2RegI src2, LRRegP lr, flagsReg ccr) %{ match(Set dst (DivI src1 src2)); effect( KILL ccr, KILL src1, KILL src2, KILL lr); @@ -8428,21 +6156,8 @@ %} ins_pipe(sdiv_reg_reg); %} -#endif // Register Long Division -#ifdef AARCH64 -instruct divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ - match(Set dst (DivL src1 src2)); - - size(4); - format %{ "SDIV $dst,$src1,$src2" %} - ins_encode %{ - __ sdiv($dst$$Register, $src1$$Register, $src2$$Register); - %} - ins_pipe(ialu_reg_reg); // FIXME -%} -#else instruct divL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{ match(Set dst (DivL src1 src2)); effect(CALL); @@ -8454,38 +6169,9 @@ %} ins_pipe(divL_reg_reg); %} -#endif // Integer Remainder // Register Remainder -#ifdef AARCH64 -#ifdef TODO -instruct msubI_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ - match(Set dst (SubI src1 (MulI src2 src3))); - - size(4); - format %{ "MSUB $dst,$src2,$src3,$src1\t! 32-bit\n\t" %} - ins_encode %{ - __ msub_w($dst$$Register, $src2$$Register, $src3$$Register, $src1$$Register); - %} - ins_pipe(ialu_reg_reg); // FIXME -%} -#endif - -instruct modI_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI temp) %{ - match(Set dst (ModI src1 src2)); - effect(TEMP temp); - - size(8); - format %{ "SDIV $temp,$src1,$src2\t! 32-bit\n\t" - "MSUB $dst,$src2,$temp,$src1\t! 32-bit\n\t" %} - ins_encode %{ - __ sdiv_w($temp$$Register, $src1$$Register, $src2$$Register); - __ msub_w($dst$$Register, $src2$$Register, $temp$$Register, $src1$$Register); - %} - ins_pipe(ialu_reg_reg); // FIXME -%} -#else instruct modI_reg_reg(R0RegI dst, R0RegI src1, R2RegI src2, R1RegI temp, LRRegP lr, flagsReg ccr ) %{ match(Set dst (ModI src1 src2)); effect( KILL ccr, KILL temp, KILL src2, KILL lr); @@ -8496,24 +6182,8 @@ %} ins_pipe(sdiv_reg_reg); %} -#endif // Register Long Remainder -#ifdef AARCH64 -instruct modL_reg_reg(iRegL dst, iRegL src1, iRegL src2, iRegL temp) %{ - match(Set dst (ModL src1 src2)); - effect(TEMP temp); - - size(8); - format %{ "SDIV $temp,$src1,$src2\n\t" - "MSUB $dst,$src2,$temp,$src1" %} - ins_encode %{ - __ sdiv($temp$$Register, $src1$$Register, $src2$$Register); - __ msub($dst$$Register, $src2$$Register, $temp$$Register, $src1$$Register); - %} - ins_pipe(ialu_reg_reg); // FIXME -%} -#else instruct modL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{ match(Set dst (ModL src1 src2)); effect(CALL); @@ -8525,7 +6195,6 @@ %} ins_pipe(divL_reg_reg); %} -#endif // Integer Shift Instructions @@ -8534,17 +6203,10 @@ match(Set dst (LShiftI src1 src2)); size(4); -#ifdef AARCH64 - format %{ "LSLV $dst,$src1,$src2\t! int" %} - ins_encode %{ - __ lslv_w($dst$$Register, $src1$$Register, $src2$$Register); - %} -#else format %{ "LSL $dst,$src1,$src2 \n\t" %} ins_encode %{ __ mov($dst$$Register, AsmOperand($src1$$Register, lsl, $src2$$Register)); %} -#endif ins_pipe(ialu_reg_reg); %} @@ -8553,21 +6215,13 @@ match(Set dst (LShiftI src1 src2)); size(4); -#ifdef AARCH64 - format %{ "LSL_w $dst,$src1,$src2\t! int" %} - ins_encode %{ - __ _lsl($dst$$Register, $src1$$Register, $src2$$constant); - %} -#else format %{ "LSL $dst,$src1,$src2\t! int" %} ins_encode %{ __ logical_shift_left($dst$$Register, $src1$$Register, $src2$$constant); %} -#endif ins_pipe(ialu_reg_imm); %} -#ifndef AARCH64 instruct shlL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{ effect(USE_DEF dst, USE src1, USE src2); size(4); @@ -8605,40 +6259,18 @@ %} ins_pipe(ialu_reg_reg); %} -#endif // !AARCH64 instruct shlL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ match(Set dst (LShiftL src1 src2)); -#ifdef AARCH64 - size(4); - format %{ "LSLV $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ lslv($dst$$Register, $src1$$Register, $src2$$Register); - %} - ins_pipe(ialu_reg_reg); -#else expand %{ flagsReg ccr; shlL_reg_reg_overlap(dst, src1, src2, ccr); shlL_reg_reg_merge_hi(dst, src1, src2); shlL_reg_reg_merge_lo(dst, src1, src2); %} -#endif %} -#ifdef AARCH64 -instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{ - match(Set dst (LShiftL src1 src2)); - - size(4); - format %{ "LSL $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ logical_shift_left($dst$$Register, $src1$$Register, $src2$$constant); - %} - ins_pipe(ialu_reg_imm); -%} -#else // Register Shift Left Immediate instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{ match(Set dst (LShiftL src1 src2)); @@ -8673,23 +6305,15 @@ %} ins_pipe(ialu_reg_imm); %} -#endif // !AARCH64 // Register Arithmetic Shift Right instruct sarI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ match(Set dst (RShiftI src1 src2)); size(4); -#ifdef AARCH64 - format %{ "ASRV $dst,$src1,$src2\t! int" %} - ins_encode %{ - __ asrv_w($dst$$Register, $src1$$Register, $src2$$Register); - %} -#else format %{ "ASR $dst,$src1,$src2\t! int" %} ins_encode %{ __ mov($dst$$Register, AsmOperand($src1$$Register, asr, $src2$$Register)); %} -#endif ins_pipe(ialu_reg_reg); %} @@ -8698,21 +6322,13 @@ match(Set dst (RShiftI src1 src2)); size(4); -#ifdef AARCH64 - format %{ "ASR_w $dst,$src1,$src2" %} - ins_encode %{ - __ _asr_w($dst$$Register, $src1$$Register, $src2$$constant); - %} -#else format %{ "ASR $dst,$src1,$src2" %} ins_encode %{ __ mov($dst$$Register, AsmOperand($src1$$Register, asr, $src2$$constant)); %} -#endif ins_pipe(ialu_reg_imm); %} -#ifndef AARCH64 // Register Shift Right Arithmetic Long instruct sarL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{ effect(USE_DEF dst, USE src1, USE src2); @@ -8751,41 +6367,19 @@ %} ins_pipe(ialu_reg_reg); %} -#endif // !AARCH64 instruct sarL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ match(Set dst (RShiftL src1 src2)); -#ifdef AARCH64 - size(4); - format %{ "ASRV $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ asrv($dst$$Register, $src1$$Register, $src2$$Register); - %} - ins_pipe(ialu_reg_reg); -#else expand %{ flagsReg ccr; sarL_reg_reg_overlap(dst, src1, src2, ccr); sarL_reg_reg_merge_lo(dst, src1, src2); sarL_reg_reg_merge_hi(dst, src1, src2); %} -#endif %} // Register Shift Left Immediate -#ifdef AARCH64 -instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{ - match(Set dst (RShiftL src1 src2)); - - size(4); - format %{ "ASR $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ _asr($dst$$Register, $src1$$Register, $src2$$constant); - %} - ins_pipe(ialu_reg_imm); -%} -#else instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{ match(Set dst (RShiftL src1 src2)); @@ -8819,23 +6413,15 @@ %} ins_pipe(ialu_reg_imm); %} -#endif // Register Shift Right instruct shrI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ match(Set dst (URShiftI src1 src2)); size(4); -#ifdef AARCH64 - format %{ "LSRV $dst,$src1,$src2\t! int" %} - ins_encode %{ - __ lsrv_w($dst$$Register, $src1$$Register, $src2$$Register); - %} -#else format %{ "LSR $dst,$src1,$src2\t! int" %} ins_encode %{ __ mov($dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$Register)); %} -#endif ins_pipe(ialu_reg_reg); %} @@ -8844,21 +6430,13 @@ match(Set dst (URShiftI src1 src2)); size(4); -#ifdef AARCH64 - format %{ "LSR_w $dst,$src1,$src2" %} - ins_encode %{ - __ _lsr_w($dst$$Register, $src1$$Register, $src2$$constant); - %} -#else format %{ "LSR $dst,$src1,$src2" %} ins_encode %{ __ mov($dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$constant)); %} -#endif ins_pipe(ialu_reg_imm); %} -#ifndef AARCH64 // Register Shift Right instruct shrL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{ effect(USE_DEF dst, USE src1, USE src2); @@ -8897,41 +6475,19 @@ %} ins_pipe(ialu_reg_reg); %} -#endif // !AARCH64 instruct shrL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ match(Set dst (URShiftL src1 src2)); -#ifdef AARCH64 - size(4); - format %{ "LSRV $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ lsrv($dst$$Register, $src1$$Register, $src2$$Register); - %} - ins_pipe(ialu_reg_reg); -#else expand %{ flagsReg ccr; shrL_reg_reg_overlap(dst, src1, src2, ccr); shrL_reg_reg_merge_lo(dst, src1, src2); shrL_reg_reg_merge_hi(dst, src1, src2); %} -#endif -%} - -// Register Shift Right Immediate -#ifdef AARCH64 -instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{ - match(Set dst (URShiftL src1 src2)); - - size(4); - format %{ "LSR $dst,$src1,$src2" %} - ins_encode %{ - __ _lsr($dst$$Register, $src1$$Register, $src2$$constant); - %} - ins_pipe(ialu_reg_imm); %} -#else + +// Register Shift Right Immediate instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{ match(Set dst (URShiftL src1 src2)); @@ -8966,7 +6522,6 @@ %} ins_pipe(ialu_reg_imm); %} -#endif // !AARCH64 instruct shrP_reg_imm5(iRegX dst, iRegP src1, immU5 src2) %{ @@ -9164,7 +6719,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct andshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (AndI src1 (LShiftI src2 src3))); @@ -9175,7 +6729,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct andshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ match(Set dst (AndI src1 (LShiftI src2 src3))); @@ -9188,7 +6741,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct andsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (AndI src1 (RShiftI src2 src3))); @@ -9199,7 +6751,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct andsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ match(Set dst (AndI src1 (RShiftI src2 src3))); @@ -9212,7 +6763,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct andshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (AndI src1 (URShiftI src2 src3))); @@ -9223,7 +6773,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct andshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ match(Set dst (AndI src1 (URShiftI src2 src3))); @@ -9248,7 +6797,6 @@ ins_pipe(ialu_reg_imm); %} -#ifndef AARCH64 instruct andI_reg_limmn(iRegI dst, iRegI src1, limmIn src2) %{ match(Set dst (AndI src1 src2)); @@ -9259,43 +6807,21 @@ %} ins_pipe(ialu_reg_imm); %} -#endif // Register And Long instruct andL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ match(Set dst (AndL src1 src2)); ins_cost(DEFAULT_COST); -#ifdef AARCH64 - size(4); - format %{ "AND $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ andr($dst$$Register, $src1$$Register, $src2$$Register); - %} -#else size(8); format %{ "AND $dst,$src1,$src2\t! long" %} ins_encode %{ __ andr($dst$$Register, $src1$$Register, $src2$$Register); __ andr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); %} -#endif ins_pipe(ialu_reg_reg); %} -#ifdef AARCH64 -// Immediate And -instruct andL_reg_limm(iRegL dst, iRegL src1, limmL src2) %{ - match(Set dst (AndL src1 src2)); - - size(4); - format %{ "AND $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ andr($dst$$Register, $src1$$Register, (uintx)$src2$$constant); - %} - ins_pipe(ialu_reg_imm); -%} -#else // TODO: try immLRot2 instead, (0, $con$$constant) becomes // (hi($con$$constant), lo($con$$constant)) becomes instruct andL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{ @@ -9309,7 +6835,6 @@ %} ins_pipe(ialu_reg_imm); %} -#endif // Or Instructions // Register Or @@ -9324,7 +6849,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct orshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (OrI src1 (LShiftI src2 src3))); @@ -9335,7 +6859,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct orshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ match(Set dst (OrI src1 (LShiftI src2 src3))); @@ -9348,7 +6871,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct orsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (OrI src1 (RShiftI src2 src3))); @@ -9359,7 +6881,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct orsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ match(Set dst (OrI src1 (RShiftI src2 src3))); @@ -9372,7 +6893,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct orshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (OrI src1 (URShiftI src2 src3))); @@ -9383,7 +6903,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct orshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ match(Set dst (OrI src1 (URShiftI src2 src3))); @@ -9414,13 +6933,6 @@ match(Set dst (OrL src1 src2)); ins_cost(DEFAULT_COST); -#ifdef AARCH64 - size(4); - format %{ "OR $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ orr($dst$$Register, $src1$$Register, $src2$$Register); - %} -#else size(8); format %{ "OR $dst.lo,$src1.lo,$src2.lo\t! long\n\t" "OR $dst.hi,$src1.hi,$src2.hi" %} @@ -9428,22 +6940,9 @@ __ orr($dst$$Register, $src1$$Register, $src2$$Register); __ orr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); %} -#endif ins_pipe(ialu_reg_reg); %} -#ifdef AARCH64 -instruct orL_reg_limm(iRegL dst, iRegL src1, limmL src2) %{ - match(Set dst (OrL src1 src2)); - - size(4); - format %{ "ORR $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ orr($dst$$Register, $src1$$Register, (uintx)$src2$$constant); - %} - ins_pipe(ialu_reg_imm); -%} -#else // TODO: try immLRot2 instead, (0, $con$$constant) becomes // (hi($con$$constant), lo($con$$constant)) becomes instruct orL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{ @@ -9458,7 +6957,6 @@ %} ins_pipe(ialu_reg_imm); %} -#endif #ifdef TODO // Use SPRegP to match Rthread (TLS register) without spilling. @@ -9488,7 +6986,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct xorshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (XorI src1 (LShiftI src2 src3))); @@ -9499,7 +6996,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct xorshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ match(Set dst (XorI src1 (LShiftI src2 src3))); @@ -9512,7 +7008,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct xorsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (XorI src1 (RShiftI src2 src3))); @@ -9523,7 +7018,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct xorsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ match(Set dst (XorI src1 (RShiftI src2 src3))); @@ -9536,7 +7030,6 @@ ins_pipe(ialu_reg_reg); %} -#ifndef AARCH64 instruct xorshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ match(Set dst (XorI src1 (URShiftI src2 src3))); @@ -9547,7 +7040,6 @@ %} ins_pipe(ialu_reg_reg); %} -#endif instruct xorshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ match(Set dst (XorI src1 (URShiftI src2 src3))); @@ -9576,13 +7068,6 @@ instruct xorL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ match(Set dst (XorL src1 src2)); ins_cost(DEFAULT_COST); -#ifdef AARCH64 - size(4); - format %{ "XOR $dst,$src1,$src2\t! long" %} - ins_encode %{ - __ eor($dst$$Register, $src1$$Register, $src2$$Register); - %} -#else size(8); format %{ "XOR $dst.hi,$src1.hi,$src2.hi\t! long\n\t" "XOR $dst.lo,$src1.lo,$src2.lo\t! long" %} @@ -9590,22 +7075,9 @@ __ eor($dst$$Register, $src1$$Register, $src2$$Register); __ eor($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); %} -#endif ins_pipe(ialu_reg_reg); %} -#ifdef AARCH64 -instruct xorL_reg_limmL(iRegL dst, iRegL src1, limmL con) %{ - match(Set dst (XorL src1 con)); - ins_cost(DEFAULT_COST); - size(4); - format %{ "EOR $dst,$src1,$con\t! long" %} - ins_encode %{ - __ eor($dst$$Register, $src1$$Register, (uintx)$con$$constant); - %} - ins_pipe(ialu_reg_imm); -%} -#else // TODO: try immLRot2 instead, (0, $con$$constant) becomes // (hi($con$$constant), lo($con$$constant)) becomes instruct xorL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{ @@ -9620,22 +7092,11 @@ %} ins_pipe(ialu_reg_imm); %} -#endif // AARCH64 //----------Convert to Boolean------------------------------------------------- instruct convI2B( iRegI dst, iRegI src, flagsReg ccr ) %{ match(Set dst (Conv2B src)); effect(KILL ccr); -#ifdef AARCH64 - size(8); - ins_cost(DEFAULT_COST*2); - format %{ "cmp_32 $src,ZR\n\t" - "cset_w $dst, ne" %} - ins_encode %{ - __ cmp_32($src$$Register, ZR); - __ cset_w($dst$$Register, ne); - %} -#else size(12); ins_cost(DEFAULT_COST*2); format %{ "TST $src,$src \n\t" @@ -9646,23 +7107,12 @@ __ mov($dst$$Register, 0); __ mov($dst$$Register, 1, ne); %} -#endif ins_pipe(ialu_reg_ialu); %} instruct convP2B( iRegI dst, iRegP src, flagsReg ccr ) %{ match(Set dst (Conv2B src)); effect(KILL ccr); -#ifdef AARCH64 - size(8); - ins_cost(DEFAULT_COST*2); - format %{ "CMP $src,ZR\n\t" - "cset $dst, ne" %} - ins_encode %{ - __ cmp($src$$Register, ZR); - __ cset($dst$$Register, ne); - %} -#else size(12); ins_cost(DEFAULT_COST*2); format %{ "TST $src,$src \n\t" @@ -9673,23 +7123,12 @@ __ mov($dst$$Register, 0); __ mov($dst$$Register, 1, ne); %} -#endif ins_pipe(ialu_reg_ialu); %} instruct cmpLTMask_reg_reg( iRegI dst, iRegI p, iRegI q, flagsReg ccr ) %{ match(Set dst (CmpLTMask p q)); effect( KILL ccr ); -#ifdef AARCH64 - size(8); - ins_cost(DEFAULT_COST*2); - format %{ "CMP_w $p,$q\n\t" - "CSETM_w $dst, lt" %} - ins_encode %{ - __ cmp_w($p$$Register, $q$$Register); - __ csetm_w($dst$$Register, lt); - %} -#else ins_cost(DEFAULT_COST*3); format %{ "CMP $p,$q\n\t" "MOV $dst, #0\n\t" @@ -9699,23 +7138,12 @@ __ mov($dst$$Register, 0); __ mvn($dst$$Register, 0, lt); %} -#endif ins_pipe(ialu_reg_reg_ialu); %} instruct cmpLTMask_reg_imm( iRegI dst, iRegI p, aimmI q, flagsReg ccr ) %{ match(Set dst (CmpLTMask p q)); effect( KILL ccr ); -#ifdef AARCH64 - size(8); - ins_cost(DEFAULT_COST*2); - format %{ "CMP_w $p,$q\n\t" - "CSETM_w $dst, lt" %} - ins_encode %{ - __ cmp_w($p$$Register, $q$$constant); - __ csetm_w($dst$$Register, lt); - %} -#else ins_cost(DEFAULT_COST*3); format %{ "CMP $p,$q\n\t" "MOV $dst, #0\n\t" @@ -9725,27 +7153,9 @@ __ mov($dst$$Register, 0); __ mvn($dst$$Register, 0, lt); %} -#endif ins_pipe(ialu_reg_reg_ialu); %} -#ifdef AARCH64 -instruct cadd_cmpLTMask3( iRegI dst, iRegI p, iRegI q, iRegI y, iRegI x, flagsReg ccr ) %{ - match(Set dst (AddI (AndI (CmpLTMask p q) y) x)); - effect( TEMP dst, KILL ccr ); - size(12); - ins_cost(DEFAULT_COST*3); - format %{ "CMP_w $p,$q\n\t" - "ADD_w $dst,$y,$x\n\t" - "CSEL_w $dst,$dst,$x,lt" %} - ins_encode %{ - __ cmp_w($p$$Register, $q$$Register); - __ add_w($dst$$Register, $y$$Register, $x$$Register); - __ csel_w($dst$$Register, $dst$$Register, $x$$Register, lt); - %} - ins_pipe( cadd_cmpltmask ); -%} -#else instruct cadd_cmpLTMask3( iRegI p, iRegI q, iRegI y, iRegI z, flagsReg ccr ) %{ match(Set z (AddI (AndI (CmpLTMask p q) y) z)); effect( KILL ccr ); @@ -9758,25 +7168,7 @@ %} ins_pipe( cadd_cmpltmask ); %} -#endif -#ifdef AARCH64 -instruct cadd_cmpLTMask4( iRegI dst, iRegI p, aimmI q, iRegI y, iRegI x, flagsReg ccr ) %{ - match(Set dst (AddI (AndI (CmpLTMask p q) y) x)); - effect( TEMP dst, KILL ccr ); - size(12); - ins_cost(DEFAULT_COST*3); - format %{ "CMP_w $p,$q\n\t" - "ADD_w $dst,$y,$x\n\t" - "CSEL_w $dst,$dst,$x,lt" %} - ins_encode %{ - __ cmp_w($p$$Register, $q$$constant); - __ add_w($dst$$Register, $y$$Register, $x$$Register); - __ csel_w($dst$$Register, $dst$$Register, $x$$Register, lt); - %} - ins_pipe( cadd_cmpltmask ); -%} -#else // FIXME: remove unused "dst" instruct cadd_cmpLTMask4( iRegI dst, iRegI p, aimmI q, iRegI y, iRegI z, flagsReg ccr ) %{ match(Set z (AddI (AndI (CmpLTMask p q) y) z)); @@ -9790,25 +7182,7 @@ %} ins_pipe( cadd_cmpltmask ); %} -#endif // !AARCH64 -#ifdef AARCH64 -instruct cadd_cmpLTMask( iRegI dst, iRegI p, iRegI q, iRegI y, flagsReg ccr ) %{ - match(Set dst (AddI (AndI (CmpLTMask p q) y) (SubI p q))); - effect( TEMP dst, KILL ccr ); - size(12); - ins_cost(DEFAULT_COST*3); - format %{ "SUBS_w $p,$p,$q\n\t" - "ADD_w $dst,$y,$p\n\t" - "CSEL_w $dst,$dst,$p,lt" %} - ins_encode %{ - __ subs_w($p$$Register, $p$$Register, $q$$Register); - __ add_w($dst$$Register, $y$$Register, $p$$Register); - __ csel_w($dst$$Register, $dst$$Register, $p$$Register, lt); - %} - ins_pipe( cadd_cmpltmask ); // FIXME -%} -#else instruct cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, flagsReg ccr ) %{ match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); effect( KILL ccr ); @@ -9821,7 +7195,6 @@ %} ins_pipe( cadd_cmpltmask ); %} -#endif //----------Arithmetic Conversion Instructions--------------------------------- // The conversions operations are all Alpha sorted. Please keep it that way! @@ -9839,27 +7212,6 @@ // Convert a double to an int in a float register. // If the double is a NAN, stuff a zero in instead. -#ifdef AARCH64 -instruct convD2I_reg_reg(iRegI dst, regD src) %{ - match(Set dst (ConvD2I src)); - ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME - format %{ "FCVTZS_wd $dst, $src" %} - ins_encode %{ - __ fcvtzs_wd($dst$$Register, $src$$FloatRegister); - %} - ins_pipe(fcvtD2I); -%} - -instruct convD2L_reg_reg(iRegL dst, regD src) %{ - match(Set dst (ConvD2L src)); - ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME - format %{ "FCVTZS_xd $dst, $src" %} - ins_encode %{ - __ fcvtzs_xd($dst$$Register, $src$$FloatRegister); - %} - ins_pipe(fcvtD2L); -%} -#else instruct convD2I_reg_reg(iRegI dst, regD src, regF tmp) %{ match(Set dst (ConvD2I src)); effect( TEMP tmp ); @@ -9872,12 +7224,10 @@ %} ins_pipe(fcvtD2I); %} -#endif // Convert a double to a long in a double register. // If the double is a NAN, stuff a zero in instead. -#ifndef AARCH64 // Double to Long conversion instruct convD2L_reg(R0R1RegL dst, regD src) %{ match(Set dst (ConvD2L src)); @@ -9897,7 +7247,6 @@ %} ins_pipe(fcvtD2L); %} -#endif instruct convF2D_reg(regD dst, regF src) %{ match(Set dst (ConvF2D src)); @@ -9909,29 +7258,6 @@ ins_pipe(fcvtF2D); %} -#ifdef AARCH64 -instruct convF2I_reg_reg(iRegI dst, regF src) %{ - match(Set dst (ConvF2I src)); - ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME - size(4); - format %{ "FCVTZS_ws $dst, $src" %} - ins_encode %{ - __ fcvtzs_ws($dst$$Register, $src$$FloatRegister); - %} - ins_pipe(fcvtF2I); -%} - -instruct convF2L_reg_reg(iRegL dst, regF src) %{ - match(Set dst (ConvF2L src)); - ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME - size(4); - format %{ "FCVTZS_xs $dst, $src" %} - ins_encode %{ - __ fcvtzs_xs($dst$$Register, $src$$FloatRegister); - %} - ins_pipe(fcvtF2L); -%} -#else instruct convF2I_reg_reg(iRegI dst, regF src, regF tmp) %{ match(Set dst (ConvF2I src)); effect( TEMP tmp ); @@ -9965,20 +7291,7 @@ %} ins_pipe(fcvtF2L); %} -#endif -#ifdef AARCH64 -instruct convI2D_reg_reg(iRegI src, regD dst) %{ - match(Set dst (ConvI2D src)); - ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME - size(4); - format %{ "SCVTF_dw $dst,$src" %} - ins_encode %{ - __ scvtf_dw($dst$$FloatRegister, $src$$Register); - %} - ins_pipe(fcvtI2D); -%} -#else instruct convI2D_reg_reg(iRegI src, regD_low dst) %{ match(Set dst (ConvI2D src)); ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME @@ -9991,18 +7304,10 @@ %} ins_pipe(fcvtI2D); %} -#endif instruct convI2F_reg_reg( regF dst, iRegI src ) %{ match(Set dst (ConvI2F src)); ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME -#ifdef AARCH64 - size(4); - format %{ "SCVTF_sw $dst,$src" %} - ins_encode %{ - __ scvtf_sw($dst$$FloatRegister, $src$$Register); - %} -#else size(8); format %{ "FMSR $dst,$src \n\t" "FSITOS $dst, $dst"%} @@ -10010,19 +7315,11 @@ __ fmsr($dst$$FloatRegister, $src$$Register); __ fsitos($dst$$FloatRegister, $dst$$FloatRegister); %} -#endif ins_pipe(fcvtI2F); %} instruct convI2L_reg(iRegL dst, iRegI src) %{ match(Set dst (ConvI2L src)); -#ifdef AARCH64 - size(4); - format %{ "SXTW $dst,$src\t! int->long" %} - ins_encode %{ - __ sxtw($dst$$Register, $src$$Register); - %} -#else size(8); format %{ "MOV $dst.lo, $src \n\t" "ASR $dst.hi,$src,31\t! int->long" %} @@ -10030,20 +7327,12 @@ __ mov($dst$$Register, $src$$Register); __ mov($dst$$Register->successor(), AsmOperand($src$$Register, asr, 31)); %} -#endif ins_pipe(ialu_reg_reg); %} // Zero-extend convert int to long instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask ) %{ match(Set dst (AndL (ConvI2L src) mask) ); -#ifdef AARCH64 - size(4); - format %{ "mov_w $dst,$src\t! zero-extend int to long" %} - ins_encode %{ - __ mov_w($dst$$Register, $src$$Register); - %} -#else size(8); format %{ "MOV $dst.lo,$src.lo\t! zero-extend int to long\n\t" "MOV $dst.hi, 0"%} @@ -10051,20 +7340,12 @@ __ mov($dst$$Register, $src$$Register); __ mov($dst$$Register->successor(), 0); %} -#endif ins_pipe(ialu_reg_reg); %} // Zero-extend long instruct zerox_long(iRegL dst, iRegL src, immL_32bits mask ) %{ match(Set dst (AndL src mask) ); -#ifdef AARCH64 - size(4); - format %{ "mov_w $dst,$src\t! zero-extend long" %} - ins_encode %{ - __ mov_w($dst$$Register, $src$$Register); - %} -#else size(8); format %{ "MOV $dst.lo,$src.lo\t! zero-extend long\n\t" "MOV $dst.hi, 0"%} @@ -10072,7 +7353,6 @@ __ mov($dst$$Register, $src$$Register); __ mov($dst$$Register->successor(), 0); %} -#endif ins_pipe(ialu_reg_reg); %} @@ -10107,17 +7387,10 @@ ins_cost(MEMORY_REF_COST); // FIXME size(4); -#ifdef AARCH64 - format %{ "FMOV_xd $dst,$src\t! MoveD2L" %} - ins_encode %{ - __ fmov_xd($dst$$Register, $src$$FloatRegister); - %} -#else format %{ "FMRRD $dst,$src\t! MoveD2L" %} ins_encode %{ __ fmrrd($dst$$Register, $dst$$Register->successor(), $src$$FloatRegister); %} -#endif ins_pipe(iload_mem); // FIXME %} @@ -10127,46 +7400,16 @@ ins_cost(MEMORY_REF_COST); // FIXME size(4); -#ifdef AARCH64 - format %{ "FMOV_dx $dst,$src\t! MoveL2D" %} - ins_encode %{ - __ fmov_dx($dst$$FloatRegister, $src$$Register); - %} -#else format %{ "FMDRR $dst,$src\t! MoveL2D" %} ins_encode %{ __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register->successor()); %} -#endif ins_pipe(ialu_reg_reg); // FIXME %} //----------- // Long to Double conversion -#ifdef AARCH64 -instruct convL2D(regD dst, iRegL src) %{ - match(Set dst (ConvL2D src)); - ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME - size(4); - format %{ "SCVTF_dx $dst, $src" %} - ins_encode %{ - __ scvtf_dx($dst$$FloatRegister, $src$$Register); - %} - ins_pipe(fcvtL2D); -%} - -instruct convL2F(regF dst, iRegL src) %{ - match(Set dst (ConvL2F src)); - ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME - size(4); - format %{ "SCVTF_sx $dst, $src" %} - ins_encode %{ - __ scvtf_sx($dst$$FloatRegister, $src$$Register); - %} - ins_pipe(fcvtL2F); -%} -#else // Magic constant, 0x43300000 instruct loadConI_x43300000(iRegI dst) %{ effect(DEF dst); @@ -10212,7 +7455,6 @@ ins_pipe(faddD_reg_reg); %} -#ifndef AARCH64 // Convert integer in high half of a double register (in the lower half of // the double register file) to double instruct convI2D_regDHi_regD(regD dst, regD_low src) %{ @@ -10224,7 +7466,6 @@ %} ins_pipe(fcvtLHi2D); %} -#endif // Add float double precision instruct addD_regD_regD(regD dst, regD src1, regD src2) %{ @@ -10315,26 +7556,17 @@ addD_regD_regD(dst, tmp3, tmp4); %} %} -#endif // !AARCH64 instruct convL2I_reg(iRegI dst, iRegL src) %{ match(Set dst (ConvL2I src)); size(4); -#ifdef AARCH64 - format %{ "MOV_w $dst,$src\t! long->int" %} - ins_encode %{ - __ mov_w($dst$$Register, $src$$Register); - %} -#else format %{ "MOV $dst,$src.lo\t! long->int" %} ins_encode %{ __ mov($dst$$Register, $src$$Register); %} -#endif ins_pipe(ialu_move_reg_I_to_L); %} -#ifndef AARCH64 // Register Shift Right Immediate instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt) %{ match(Set dst (ConvL2I (RShiftL src cnt))); @@ -10349,7 +7581,6 @@ %} ins_pipe(ialu_reg_imm); %} -#endif //----------Control Flow Instructions------------------------------------------ @@ -10428,7 +7659,6 @@ ins_pipe(ialu_cconly_reg_reg_zero); %} -#ifndef AARCH64 instruct testshlI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{ match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero)); size(4); @@ -10439,7 +7669,6 @@ %} ins_pipe(ialu_cconly_reg_reg_zero); %} -#endif instruct testshlI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{ match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero)); @@ -10452,7 +7681,6 @@ ins_pipe(ialu_cconly_reg_reg_zero); %} -#ifndef AARCH64 instruct testsarI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{ match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero)); size(4); @@ -10463,7 +7691,6 @@ %} ins_pipe(ialu_cconly_reg_reg_zero); %} -#endif instruct testsarI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{ match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero)); @@ -10476,7 +7703,6 @@ ins_pipe(ialu_cconly_reg_reg_zero); %} -#ifndef AARCH64 instruct testshrI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{ match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero)); size(4); @@ -10487,7 +7713,6 @@ %} ins_pipe(ialu_cconly_reg_reg_zero); %} -#endif instruct testshrI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{ match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero)); @@ -10511,31 +7736,6 @@ ins_pipe(ialu_cconly_reg_imm_zero); %} -#ifdef AARCH64 -instruct compL_reg_reg(flagsReg xcc, iRegL op1, iRegL op2) -%{ - match(Set xcc (CmpL op1 op2)); - effect( DEF xcc, USE op1, USE op2 ); - - size(4); - format %{ "CMP $op1,$op2\t! long" %} - ins_encode %{ - __ cmp($op1$$Register, $op2$$Register); - %} - ins_pipe(ialu_cconly_reg_reg); -%} - -instruct compUL_iReg(flagsRegU xcc, iRegL op1, iRegL op2) %{ - match(Set xcc (CmpUL op1 op2)); - - size(4); - format %{ "CMP $op1,$op2\t! unsigned long" %} - ins_encode %{ - __ cmp($op1$$Register, $op2$$Register); - %} - ins_pipe(ialu_cconly_reg_reg); -%} -#else instruct compL_reg_reg_LTGE(flagsRegL_LTGE xcc, iRegL op1, iRegL op2, iRegL tmp) %{ match(Set xcc (CmpL op1 op2)); effect( DEF xcc, USE op1, USE op2, TEMP tmp ); @@ -10563,35 +7763,7 @@ %} ins_pipe(ialu_cconly_reg_reg); %} -#endif - -#ifdef AARCH64 -instruct compL_reg_con(flagsReg xcc, iRegL op1, aimmL con) %{ - match(Set xcc (CmpL op1 con)); - effect( DEF xcc, USE op1, USE con ); - - size(8); - format %{ "CMP $op1,$con\t\t! long" %} - ins_encode %{ - __ cmp($op1$$Register, $con$$constant); - %} - - ins_pipe(ialu_cconly_reg_imm); -%} - -instruct compUL_reg_con(flagsRegU xcc, iRegL op1, aimmL con) %{ - match(Set xcc (CmpUL op1 con)); - effect(DEF xcc, USE op1, USE con); - - size(8); - format %{ "CMP $op1,$con\t\t! unsigned long" %} - ins_encode %{ - __ cmp($op1$$Register, $con$$constant); - %} - ins_pipe(ialu_cconly_reg_imm); -%} -#else instruct compL_reg_reg_EQNE(flagsRegL_EQNE xcc, iRegL op1, iRegL op2) %{ match(Set xcc (CmpL op1 op2)); effect( DEF xcc, USE op1, USE op2 ); @@ -10749,7 +7921,6 @@ ins_pipe(ialu_cconly_reg_reg); %} -#endif /* instruct testL_reg_reg(flagsRegL xcc, iRegL op1, iRegL op2, immL0 zero) %{ */ /* match(Set xcc (CmpL (AndL op1 op2) zero)); */ @@ -10857,13 +8028,6 @@ match(Set icc (CmpF src1 src2)); effect(KILL fcc); -#ifdef AARCH64 - size(4); - format %{ "FCMP_s $src1,$src2" %} - ins_encode %{ - __ fcmp_s($src1$$FloatRegister, $src2$$FloatRegister); - %} -#else size(8); format %{ "FCMPs $src1,$src2\n\t" "FMSTAT" %} @@ -10871,7 +8035,6 @@ __ fcmps($src1$$FloatRegister, $src2$$FloatRegister); __ fmstat(); %} -#endif ins_pipe(faddF_fcc_reg_reg_zero); %} @@ -10879,13 +8042,6 @@ match(Set icc (CmpF src1 src2)); effect(KILL fcc); -#ifdef AARCH64 - size(4); - format %{ "FCMP0_s $src1" %} - ins_encode %{ - __ fcmp0_s($src1$$FloatRegister); - %} -#else size(8); format %{ "FCMPs $src1,$src2\n\t" "FMSTAT" %} @@ -10893,7 +8049,6 @@ __ fcmpzs($src1$$FloatRegister); __ fmstat(); %} -#endif ins_pipe(faddF_fcc_reg_reg_zero); %} @@ -10901,13 +8056,6 @@ match(Set icc (CmpD src1 src2)); effect(KILL fcc); -#ifdef AARCH64 - size(4); - format %{ "FCMP_d $src1,$src2" %} - ins_encode %{ - __ fcmp_d($src1$$FloatRegister, $src2$$FloatRegister); - %} -#else size(8); format %{ "FCMPd $src1,$src2 \n\t" "FMSTAT" %} @@ -10915,21 +8063,13 @@ __ fcmpd($src1$$FloatRegister, $src2$$FloatRegister); __ fmstat(); %} -#endif ins_pipe(faddD_fcc_reg_reg_zero); %} instruct cmpD0_cc(flagsRegF fcc, flagsReg icc, regD src1, immD0 src2) %{ match(Set icc (CmpD src1 src2)); effect(KILL fcc); - -#ifdef AARCH64 - size(8); - format %{ "FCMP0_d $src1" %} - ins_encode %{ - __ fcmp0_d($src1$$FloatRegister); - %} -#else + size(8); format %{ "FCMPZd $src1,$src2 \n\t" "FMSTAT" %} @@ -10937,87 +8077,9 @@ __ fcmpzd($src1$$FloatRegister); __ fmstat(); %} -#endif ins_pipe(faddD_fcc_reg_reg_zero); %} -#ifdef AARCH64 -// Compare floating, generate -1,0,1 -instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsReg icc) %{ - match(Set dst (CmpF3 src1 src2)); - // effect(KILL fcc); // nobody cares if flagsRegF is killed - effect(KILL icc); - ins_cost(DEFAULT_COST*3); // FIXME - size(12); - format %{ "FCMP_s $src1,$src2\n\t" - "CSET $dst, gt\n\t" - "CSINV $dst, $dst, ZR, ge" %} - ins_encode %{ - Register dst = $dst$$Register; - __ fcmp_s($src1$$FloatRegister, $src2$$FloatRegister); - __ cset(dst, gt); // 1 if '>', else 0 - __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 - %} - ins_pipe( floating_cmp ); // FIXME -%} - -// Compare floating, generate -1,0,1 -instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsReg icc) %{ - match(Set dst (CmpD3 src1 src2)); - // effect(KILL fcc); // nobody cares if flagsRegF is killed - effect(KILL icc); - ins_cost(DEFAULT_COST*3); // FIXME - size(12); - format %{ "FCMP_d $src1,$src2\n\t" - "CSET $dst, gt\n\t" - "CSINV $dst, $dst, ZR, ge" %} - ins_encode %{ - Register dst = $dst$$Register; - __ fcmp_d($src1$$FloatRegister, $src2$$FloatRegister); - __ cset(dst, gt); // 1 if '>', else 0 - __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 - %} - ins_pipe( floating_cmp ); // FIXME -%} - -// Compare floating, generate -1,0,1 -instruct cmpF0_reg(iRegI dst, regF src1, immF0 src2, flagsReg icc) %{ - match(Set dst (CmpF3 src1 src2)); - // effect(KILL fcc); // nobody cares if flagsRegF is killed - effect(KILL icc); - ins_cost(DEFAULT_COST*3); // FIXME - size(12); - format %{ "FCMP0_s $src1\n\t" - "CSET $dst, gt\n\t" - "CSINV $dst, $dst, ZR, ge" %} - ins_encode %{ - Register dst = $dst$$Register; - __ fcmp0_s($src1$$FloatRegister); - __ cset(dst, gt); // 1 if '>', else 0 - __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 - %} - ins_pipe( floating_cmp ); // FIXME -%} - -// Compare floating, generate -1,0,1 -instruct cmpD0_reg(iRegI dst, regD src1, immD0 src2, flagsReg icc) %{ - match(Set dst (CmpD3 src1 src2)); - // effect(KILL fcc); // nobody cares if flagsRegF is killed - effect(KILL icc); - ins_cost(DEFAULT_COST*3); // FIXME - size(12); - format %{ "FCMP0_d $src1\n\t" - "CSET $dst, gt\n\t" - "CSINV $dst, $dst, ZR, ge" %} - ins_encode %{ - Register dst = $dst$$Register; - __ fcmp0_d($src1$$FloatRegister); - __ cset(dst, gt); // 1 if '>', else 0 - __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 - %} - ins_pipe( floating_cmp ); // FIXME -%} -#else // Compare floating, generate -1,0,1 instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsRegF fcc) %{ match(Set dst (CmpF3 src1 src2)); @@ -11094,7 +8156,6 @@ %} ins_pipe( floating_cmp ); %} -#endif // !AARCH64 //----------Branches--------------------------------------------------------- // Jump @@ -11176,61 +8237,6 @@ %} #endif -#ifdef AARCH64 -instruct cbzI(cmpOp cmp, iRegI op1, immI0 op2, label labl) %{ - match(If cmp (CmpI op1 op2)); - effect(USE labl); - predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || - _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); - size(4); - ins_cost(BRANCH_COST); - format %{ "CB{N}Z $op1, $labl\t! int $cmp" %} - ins_encode %{ - if ($cmp$$cmpcode == eq) { - __ cbz_w($op1$$Register, *($labl$$label)); - } else { - __ cbnz_w($op1$$Register, *($labl$$label)); - } - %} - ins_pipe(br_cc); // FIXME -%} - -instruct cbzP(cmpOpP cmp, iRegP op1, immP0 op2, label labl) %{ - match(If cmp (CmpP op1 op2)); - effect(USE labl); - predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || - _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); - size(4); - ins_cost(BRANCH_COST); - format %{ "CB{N}Z $op1, $labl\t! ptr $cmp" %} - ins_encode %{ - if ($cmp$$cmpcode == eq) { - __ cbz($op1$$Register, *($labl$$label)); - } else { - __ cbnz($op1$$Register, *($labl$$label)); - } - %} - ins_pipe(br_cc); // FIXME -%} - -instruct cbzL(cmpOpL cmp, iRegL op1, immL0 op2, label labl) %{ - match(If cmp (CmpL op1 op2)); - effect(USE labl); - predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || - _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); - size(4); - ins_cost(BRANCH_COST); - format %{ "CB{N}Z $op1, $labl\t! long $cmp" %} - ins_encode %{ - if ($cmp$$cmpcode == eq) { - __ cbz($op1$$Register, *($labl$$label)); - } else { - __ cbnz($op1$$Register, *($labl$$label)); - } - %} - ins_pipe(br_cc); // FIXME -%} -#endif instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{ match(If cmp icc); @@ -11258,7 +8264,6 @@ ins_pipe(br_cc); %} -#ifndef AARCH64 instruct branchConL_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, label labl) %{ match(If cmp xcc); effect(USE labl); @@ -11342,7 +8347,6 @@ %} ins_pipe(br_cc); %} -#endif instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{ match(CountedLoopEnd cmp icc); @@ -11390,26 +8394,6 @@ // Manifest a CmpL3 result in an integer register. Very painful. // This is the test to avoid. -#ifdef AARCH64 -instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr) %{ - match(Set dst (CmpL3 src1 src2)); - // effect(KILL fcc); // nobody cares if flagsRegF is killed - effect(KILL ccr); - ins_cost(DEFAULT_COST*3); // FIXME - size(12); - format %{ "CMP $src1,$src2\n\t" - "CSET $dst, gt\n\t" - "CSINV $dst, $dst, ZR, ge" %} - ins_encode %{ - Register dst = $dst$$Register; - __ cmp($src1$$Register, $src2$$Register); - __ cset(dst, gt); // 1 if '>', else 0 - __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 - %} - ins_pipe( ialu_cconly_reg_reg ); // FIXME -%} -// TODO cmpL3_reg_imm -#else instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr ) %{ match(Set dst (CmpL3 src1 src2) ); effect( KILL ccr ); @@ -11437,9 +8421,7 @@ %} ins_pipe(cmpL_reg); %} -#endif -#ifndef AARCH64 // Conditional move instruct cmovLL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegL dst, iRegL src) %{ match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); @@ -11527,9 +8509,7 @@ %} ins_pipe(ialu_imm); %} -#endif // !AARCH64 -#ifndef AARCH64 instruct cmovIL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, iRegI src) %{ match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); @@ -11568,9 +8548,7 @@ %} ins_pipe(ialu_reg); %} -#endif // !AARCH64 -#ifndef AARCH64 instruct cmovIL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, immI16 src) %{ match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); @@ -11756,25 +8734,9 @@ %} ins_pipe(int_conditional_float_move); %} -#endif // !AARCH64 // ============================================================================ // Safepoint Instruction -#ifdef AARCH64 -instruct safePoint_poll(iRegP poll, flagsReg icc, RtempRegP tmp) %{ - match(SafePoint poll); - // The handler stub kills Rtemp - effect(USE poll, KILL tmp, KILL icc); - - size(4); - format %{ "LDR ZR,[$poll]\t! Safepoint: poll for GC" %} - ins_encode %{ - __ relocate(relocInfo::poll_type); - __ ldr(ZR, Address($poll$$Register)); - %} - ins_pipe(loadPollP); -%} -#else // rather than KILL R12, it would be better to use any reg as // TEMP. Can't do that at this point because it crashes the compiler instruct safePoint_poll(iRegP poll, R12RegI tmp, flagsReg icc) %{ @@ -11789,7 +8751,6 @@ %} ins_pipe(loadPollP); %} -#endif // ============================================================================ @@ -11838,13 +8799,8 @@ effect(USE meth); ins_cost(CALL_COST); format %{ "CALL,runtime" %} -#ifdef AARCH64 - ins_encode( save_last_PC, Java_To_Runtime( meth ), - call_epilog ); -#else ins_encode( Java_To_Runtime( meth ), call_epilog ); -#endif ins_pipe(simple_call); %} @@ -11970,11 +8926,7 @@ // Use the following format syntax format %{ "ShouldNotReachHere" %} ins_encode %{ -#ifdef AARCH64 - __ dpcs1(0xdead); -#else __ udf(0xdead); -#endif %} ins_pipe(tail_call); %} @@ -12004,49 +8956,21 @@ // ============================================================================ // inlined locking and unlocking -#ifdef AARCH64 -instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch, iRegP scratch3 ) -#else instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch ) -#endif %{ match(Set pcc (FastLock object box)); -#ifdef AARCH64 - effect(TEMP scratch, TEMP scratch2, TEMP scratch3); -#else effect(TEMP scratch, TEMP scratch2); -#endif ins_cost(100); -#ifdef AARCH64 - format %{ "FASTLOCK $object, $box; KILL $scratch, $scratch2, $scratch3" %} - ins_encode %{ - __ fast_lock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register, $scratch3$$Register); - %} -#else format %{ "FASTLOCK $object, $box; KILL $scratch, $scratch2" %} ins_encode %{ __ fast_lock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register); %} -#endif ins_pipe(long_memory_op); %} -#ifdef AARCH64 -instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch, iRegP scratch3 ) %{ - match(Set pcc (FastUnlock object box)); - effect(TEMP scratch, TEMP scratch2, TEMP scratch3); - ins_cost(100); - - format %{ "FASTUNLOCK $object, $box; KILL $scratch, $scratch2, $scratch3" %} - ins_encode %{ - __ fast_unlock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register, $scratch3$$Register); - %} - ins_pipe(long_memory_op); -%} -#else instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch ) %{ match(Set pcc (FastUnlock object box)); effect(TEMP scratch, TEMP scratch2); @@ -12058,48 +8982,7 @@ %} ins_pipe(long_memory_op); %} -#endif -#ifdef AARCH64 -// TODO: add version that takes immI cnt? -instruct clear_array(iRegX cnt, iRegP base, iRegP ptr, iRegX temp, Universe dummy, flagsReg cpsr) %{ - match(Set dummy (ClearArray cnt base)); - effect(TEMP temp, TEMP ptr, KILL cpsr); - ins_cost(300); - format %{ - " MOV $temp,$cnt\n" - " ADD $ptr,$base,$cnt\n" - " SUBS $temp,$temp,16\t! Count down dword pair in bytes\n" - " B.lt done16\n" - "loop: STP ZR,ZR,[$ptr,-16]!\n" - " SUBS $temp,$temp,16\t! Count down dword pair in bytes\n" - " B.ge loop\t! Clearing loop\n" - "done16: ADDS $temp,$temp,8\t! Room for 1 more long?\n" - " B.lt done\n" - " STR ZR,[$base+$temp]\n" - "done:" - %} - ins_encode %{ - // TODO: preload? - __ mov($temp$$Register, $cnt$$Register); - __ add($ptr$$Register, $base$$Register, $cnt$$Register); - Label loop, done, done16; - __ subs($temp$$Register, $temp$$Register, 16); - __ b(done16, lt); - __ bind(loop); - __ stp(ZR, ZR, Address($ptr$$Register, -16, pre_indexed)); - __ subs($temp$$Register, $temp$$Register, 16); - __ b(loop, ge); - __ bind(done16); - __ adds($temp$$Register, $temp$$Register, 8); - __ b(done, lt); - // $temp should be 0 here - __ str(ZR, Address($base$$Register, $temp$$Register)); - __ bind(done); - %} - ins_pipe(long_memory_op); -%} -#else // Count and Base registers are fixed because the allocator cannot // kill unknown registers. The encodings are generic. instruct clear_array(iRegX cnt, iRegP base, iRegI temp, iRegX zero, Universe dummy, flagsReg cpsr) %{ @@ -12122,7 +9005,6 @@ %} ins_pipe(long_memory_op); %} -#endif #ifdef XXX // FIXME: Why R0/R1/R2/R3? @@ -12177,17 +9059,6 @@ ins_pipe(ialu_reg); %} -#ifdef AARCH64 -instruct countLeadingZerosL(iRegI dst, iRegL src) %{ - match(Set dst (CountLeadingZerosL src)); - size(4); - format %{ "CLZ $dst,$src" %} - ins_encode %{ - __ clz($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} -#else instruct countLeadingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{ match(Set dst (CountLeadingZerosL src)); effect(TEMP tmp, TEMP dst, KILL ccr); @@ -12204,7 +9075,6 @@ %} ins_pipe(ialu_reg); %} -#endif instruct countTrailingZerosI(iRegI dst, iRegI src, iRegI tmp) %{ match(Set dst (CountTrailingZerosI src)); @@ -12219,20 +9089,6 @@ ins_pipe(ialu_reg); %} -#ifdef AARCH64 -instruct countTrailingZerosL(iRegI dst, iRegL src, iRegL tmp) %{ - match(Set dst (CountTrailingZerosL src)); - effect(TEMP tmp); - size(8); - format %{ "RBIT $tmp, $src\n\t" - "CLZ $dst,$tmp" %} - ins_encode %{ - __ rbit($tmp$$Register, $src$$Register); - __ clz($dst$$Register, $tmp$$Register); - %} - ins_pipe(ialu_reg); -%} -#else instruct countTrailingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{ match(Set dst (CountTrailingZerosL src)); effect(TEMP tmp, TEMP dst, KILL ccr); @@ -12253,37 +9109,10 @@ %} ins_pipe(ialu_reg); %} -#endif //---------- Population Count Instructions ------------------------------------- -#ifdef AARCH64 -instruct popCountI(iRegI dst, iRegI src, regD_low tmp) %{ - predicate(UsePopCountInstruction); - match(Set dst (PopCountI src)); - effect(TEMP tmp); - size(20); - - format %{ "MOV_W $dst,$src\n\t" - "FMOV_dx $tmp,$dst\n\t" - "VCNT $tmp.8B,$tmp.8B\n\t" - "ADDV $tmp.B,$tmp.8B\n\t" - "FMRS $dst,$tmp" %} - - ins_encode %{ - __ mov_w($dst$$Register, $src$$Register); - __ fmov_dx($tmp$$FloatRegister, $dst$$Register); - int quad = 0; - int cnt_size = 0; // VELEM_SIZE_8 - __ vcnt($tmp$$FloatRegister, $tmp$$FloatRegister, quad, cnt_size); - int add_size = 0; // VELEM_SIZE_8 - __ addv($tmp$$FloatRegister, $tmp$$FloatRegister, quad, add_size); - __ fmrs($dst$$Register, $tmp$$FloatRegister); - %} - ins_pipe(ialu_reg); // FIXME -%} -#else instruct popCountI(iRegI dst, iRegI src, regD_low tmp) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountI src)); @@ -12305,32 +9134,7 @@ %} ins_pipe(ialu_reg); // FIXME %} -#endif - -#ifdef AARCH64 -instruct popCountL(iRegI dst, iRegL src, regD tmp) %{ - predicate(UsePopCountInstruction); - match(Set dst (PopCountL src)); - effect(TEMP tmp); - size(16); - - format %{ "FMOV_dx $tmp,$src\n\t" - "VCNT $tmp.8B,$tmp.8B\n\t" - "ADDV $tmp.B,$tmp.8B\n\t" - "FMOV_ws $dst,$tmp" %} - ins_encode %{ - __ fmov_dx($tmp$$FloatRegister, $src$$Register); - int quad = 0; - int cnt_size = 0; - __ vcnt($tmp$$FloatRegister, $tmp$$FloatRegister, quad, cnt_size); - int add_size = 0; - __ addv($tmp$$FloatRegister, $tmp$$FloatRegister, quad, add_size); - __ fmov_ws($dst$$Register, $tmp$$FloatRegister); - %} - ins_pipe(ialu_reg); // FIXME -%} -#else // Note: Long.bitCount(long) returns an int. instruct popCountL(iRegI dst, iRegL src, regD_low tmp) %{ predicate(UsePopCountInstruction); @@ -12356,7 +9160,6 @@ %} ins_pipe(ialu_reg); %} -#endif // ============================================================================ @@ -12368,26 +9171,13 @@ size(4); format %{ "REV32 $dst,$src" %} ins_encode %{ -#ifdef AARCH64 - __ rev_w($dst$$Register, $src$$Register); - // high 32 bits zeroed, not sign extended -#else __ rev($dst$$Register, $src$$Register); -#endif %} ins_pipe( iload_mem ); // FIXME %} instruct bytes_reverse_long(iRegL dst, iRegL src) %{ match(Set dst (ReverseBytesL src)); -#ifdef AARCH64 -//size(4); - format %{ "REV $dst,$src" %} - ins_encode %{ - __ rev($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg); // FIXME -#else effect(TEMP dst); size(8); format %{ "REV $dst.lo,$src.lo\n\t" @@ -12397,45 +9187,25 @@ __ rev($dst$$Register->successor(), $src$$Register); %} ins_pipe( iload_mem ); // FIXME -#endif %} instruct bytes_reverse_unsigned_short(iRegI dst, iRegI src) %{ match(Set dst (ReverseBytesUS src)); -#ifdef AARCH64 - size(4); - format %{ "REV16_W $dst,$src" %} - ins_encode %{ - __ rev16_w($dst$$Register, $src$$Register); - // high 32 bits zeroed - %} -#else size(4); format %{ "REV16 $dst,$src" %} ins_encode %{ __ rev16($dst$$Register, $src$$Register); %} -#endif ins_pipe( iload_mem ); // FIXME %} instruct bytes_reverse_short(iRegI dst, iRegI src) %{ match(Set dst (ReverseBytesS src)); -#ifdef AARCH64 - size(8); - format %{ "REV16_W $dst,$src\n\t" - "SIGN_EXT16 $dst" %} - ins_encode %{ - __ rev16_w($dst$$Register, $src$$Register); - __ sign_extend($dst$$Register, $dst$$Register, 16); - %} -#else size(4); format %{ "REVSH $dst,$src" %} ins_encode %{ __ revsh($dst$$Register, $src$$Register); %} -#endif ins_pipe( iload_mem ); // FIXME %} @@ -12494,7 +9264,6 @@ ins_pipe(fstoreD_mem_reg); // FIXME %} -#ifndef AARCH64 // Replicate scalar to packed byte values in Double register instruct Repl8B_reg(vecD dst, iRegI src, iRegI tmp) %{ predicate(n->as_Vector()->length() == 8); @@ -12516,7 +9285,6 @@ %} ins_pipe(ialu_reg); // FIXME %} -#endif /* !AARCH64 */ // Replicate scalar to packed byte values in Double register instruct Repl8B_reg_simd(vecD dst, iRegI src) %{ @@ -12548,7 +9316,6 @@ ins_pipe(ialu_reg); // FIXME %} -#ifndef AARCH64 // Replicate scalar constant to packed byte values in Double register instruct Repl8B_immI(vecD dst, immI src, iRegI tmp) %{ predicate(n->as_Vector()->length() == 8); @@ -12562,7 +9329,6 @@ ins_encode( LdReplImmI(src, dst, tmp, (4), (1)) ); ins_pipe(loadConFD); // FIXME %} -#endif /* !AARCH64 */ // Replicate scalar constant to packed byte values in Double register // TODO: support negative constants with MVNI? @@ -12595,7 +9361,6 @@ ins_pipe(loadConFD); // FIXME %} -#ifndef AARCH64 // Replicate scalar to packed short/char values into Double register instruct Repl4S_reg(vecD dst, iRegI src, iRegI tmp) %{ predicate(n->as_Vector()->length() == 4); @@ -12615,7 +9380,6 @@ %} ins_pipe(ialu_reg); // FIXME %} -#endif /* !AARCH64 */ // Replicate scalar to packed byte values in Double register instruct Repl4S_reg_simd(vecD dst, iRegI src) %{ @@ -12648,7 +9412,6 @@ %} -#ifndef AARCH64 // Replicate scalar constant to packed short/char values in Double register instruct Repl4S_immI(vecD dst, immI src, iRegP tmp) %{ predicate(n->as_Vector()->length() == 4); @@ -12662,7 +9425,6 @@ ins_encode( LdReplImmI(src, dst, tmp, (2), (2)) ); ins_pipe(loadConFD); // FIXME %} -#endif /* !AARCH64 */ // Replicate scalar constant to packed byte values in Double register instruct Repl4S_immU8(vecD dst, immU8 src) %{ @@ -12694,7 +9456,6 @@ ins_pipe(loadConFD); // FIXME %} -#ifndef AARCH64 // Replicate scalar to packed int values in Double register instruct Repl2I_reg(vecD dst, iRegI src) %{ predicate(n->as_Vector()->length() == 2); @@ -12725,7 +9486,6 @@ %} ins_pipe(ialu_reg); // FIXME %} -#endif /* !AARCH64 */ // Replicate scalar to packed int values in Double register instruct Repl2I_reg_simd(vecD dst, iRegI src) %{ @@ -12758,7 +9518,6 @@ %} -#ifndef AARCH64 // Replicate scalar zero constant to packed int values in Double register instruct Repl2I_immI(vecD dst, immI src, iRegI tmp) %{ predicate(n->as_Vector()->length() == 2); @@ -12772,7 +9531,6 @@ ins_encode( LdReplImmI(src, dst, tmp, (1), (4)) ); ins_pipe(loadConFD); // FIXME %} -#endif /* !AARCH64 */ // Replicate scalar constant to packed byte values in Double register instruct Repl2I_immU8(vecD dst, immU8 src) %{ @@ -12804,23 +9562,6 @@ ins_pipe(loadConFD); // FIXME %} -#ifdef AARCH64 -// Replicate scalar to packed byte values in Double register pair -instruct Repl2L_reg(vecX dst, iRegL src) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (ReplicateL src)); - size(4*1); - ins_cost(DEFAULT_COST*1); // FIXME - - format %{ "VDUP.2D $dst.Q,$src\t" %} - ins_encode %{ - bool quad = true; - __ vdupI($dst$$FloatRegister, $src$$Register, - MacroAssembler::VELEM_SIZE_64, quad); - %} - ins_pipe(ialu_reg); // FIXME -%} -#else /* !AARCH64 */ // Replicate scalar to packed byte values in Double register pair instruct Repl2L_reg(vecX dst, iRegL src) %{ predicate(n->as_Vector()->length() == 2); @@ -12865,7 +9606,6 @@ Repl2F_regI(dst,tmp); %} %} -#endif /* !AARCH64 */ // Replicate scalar to packed float values in Double register instruct Repl2F_reg_simd(vecD dst, regF src) %{ @@ -12882,7 +9622,6 @@ ins_pipe(ialu_reg); // FIXME %} -#ifndef AARCH64 // Replicate scalar to packed float values in Double register pair instruct Repl4F_reg(vecX dst, regF src, iRegI tmp) %{ predicate(n->as_Vector()->length() == 4); @@ -12902,7 +9641,6 @@ %} ins_pipe(ialu_reg); // FIXME %} -#endif /* !AARCH64 */ // Replicate scalar to packed float values in Double register pair instruct Repl4F_reg_simd(vecX dst, regF src) %{ @@ -12919,7 +9657,6 @@ ins_pipe(ialu_reg); // FIXME %} -#ifndef AARCH64 // Replicate scalar zero constant to packed float values in Double register instruct Repl2F_immI(vecD dst, immF src, iRegI tmp) %{ predicate(n->as_Vector()->length() == 2); @@ -12933,22 +9670,9 @@ ins_encode( LdReplImmF(src, dst, tmp) ); ins_pipe(loadConFD); // FIXME %} -#endif /* !AAARCH64 */ // Replicate scalar to packed double float values in Double register pair instruct Repl2D_reg(vecX dst, regD src) %{ -#ifdef AARCH64 - predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd()); - match(Set dst (ReplicateD src)); - size(4*1); - ins_cost(DEFAULT_COST*1); // FIXME - - format %{ "VDUP $dst.2D,$src\t" %} - ins_encode %{ - bool quad = true; - __ vdupD($dst$$FloatRegister, $src$$FloatRegister, quad); - %} -#else predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateD src)); size(4*2); @@ -12963,7 +9687,6 @@ FloatRegister dstb = dsta->successor()->successor(); __ fcpyd(dstb, src); %} -#endif ins_pipe(ialu_reg); // FIXME %} @@ -13080,7 +9803,6 @@ ins_pipe( faddD_reg_reg ); // FIXME %} -#ifndef AARCH64 instruct vadd2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{ predicate(n->as_Vector()->length() == 2 && !VM_Version::simd_math_is_compliant()); match(Set dst (AddVF src1 src2)); @@ -13098,7 +9820,6 @@ ins_pipe(faddF_reg_reg); // FIXME %} -#endif instruct vadd4F_reg_simd(vecX dst, vecX src1, vecX src2) %{ predicate(n->as_Vector()->length() == 4 && VM_Version::simd_math_is_compliant()); @@ -13113,20 +9834,6 @@ ins_pipe( faddD_reg_reg ); // FIXME %} -#ifdef AARCH64 -instruct vadd2D_reg_simd(vecX dst, vecX src1, vecX src2) %{ - predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant()); - match(Set dst (AddVD src1 src2)); - size(4); - format %{ "VADD.F64 $dst.Q,$src1.Q,$src2.Q\t! add packed2D" %} - ins_encode %{ - bool quad = true; - __ vaddF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, - MacroAssembler::VFA_SIZE_F64, quad); - %} - ins_pipe( faddD_reg_reg ); // FIXME -%} -#else instruct vadd4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::simd_math_is_compliant()); match(Set dst (AddVF src1 src2)); @@ -13182,7 +9889,6 @@ ins_pipe(faddF_reg_reg); // FIXME %} -#endif // Bytes vector sub @@ -13294,7 +10000,6 @@ ins_pipe( faddF_reg_reg ); // FIXME %} -#ifndef AARCH64 instruct vsub2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{ predicate(n->as_Vector()->length() == 2 && !VM_Version::simd_math_is_compliant()); match(Set dst (SubVF src1 src2)); @@ -13317,7 +10022,6 @@ ins_pipe(faddF_reg_reg); // FIXME %} -#endif instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ @@ -13333,20 +10037,6 @@ ins_pipe( faddF_reg_reg ); // FIXME %} -#ifdef AARCH64 -instruct vsub2D_reg_simd(vecX dst, vecX src1, vecX src2) %{ - predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant()); - match(Set dst (SubVD src1 src2)); - size(4); - format %{ "VSUB.F64 $dst.Q,$src1.Q,$src2.Q\t! add packed2D" %} - ins_encode %{ - bool quad = true; - __ vsubF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, - MacroAssembler::VFA_SIZE_F64, quad); - %} - ins_pipe( faddD_reg_reg ); // FIXME -%} -#else instruct vsub4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::simd_math_is_compliant()); match(Set dst (SubVF src1 src2)); @@ -13402,7 +10092,6 @@ ins_pipe(faddF_reg_reg); // FIXME %} -#endif // Shorts/Chars vector mul instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ @@ -13467,7 +10156,6 @@ ins_pipe( fmulF_reg_reg ); // FIXME %} -#ifndef AARCH64 instruct vmul2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{ predicate(n->as_Vector()->length() == 2 && !VM_Version::simd_math_is_compliant()); match(Set dst (MulVF src1 src2)); @@ -13485,7 +10173,6 @@ ins_pipe(fmulF_reg_reg); // FIXME %} -#endif instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ predicate(n->as_Vector()->length() == 4 && VM_Version::simd_math_is_compliant()); @@ -13499,7 +10186,6 @@ ins_pipe( fmulF_reg_reg ); // FIXME %} -#ifndef AARCH64 instruct vmul4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{ predicate(n->as_Vector()->length() == 4 && !VM_Version::simd_math_is_compliant()); match(Set dst (MulVF src1 src2)); @@ -13532,25 +10218,7 @@ ins_pipe(fmulF_reg_reg); // FIXME %} -#endif - -#ifdef AARCH64 -instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd()); - match(Set dst (MulVD src1 src2)); - size(4*1); - ins_cost(DEFAULT_COST*1); // FIXME - - format %{ "FMUL.2D $dst,$src1,$src2\t! double[2]" %} - ins_encode %{ - int quad = 1; - __ vmulF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, - MacroAssembler::VFA_SIZE_F64, quad); - %} - ins_pipe(fdivF_reg_reg); // FIXME -%} -#else instruct vmul2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (MulVD src1 src2)); @@ -13572,26 +10240,12 @@ ins_pipe(fmulD_reg_reg); // FIXME %} -#endif // Floats vector div instruct vdiv2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (DivVF src1 src2)); -#ifdef AARCH64 - size(4*1); - ins_cost(DEFAULT_COST*1); // FIXME - - format %{ "FDIV.2S $dst,$src1,$src2\t! float[2]" %} - ins_encode %{ - int quad = 0; - __ vdivF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, - MacroAssembler::VFA_SIZE_F32, quad); - %} - - ins_pipe(fdivF_reg_reg); // FIXME -#else size(4*2); ins_cost(DEFAULT_COST*2); // FIXME @@ -13605,25 +10259,11 @@ %} ins_pipe(fdivF_reg_reg); // FIXME -#endif %} instruct vdiv4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (DivVF src1 src2)); -#ifdef AARCH64 - size(4*1); - ins_cost(DEFAULT_COST*1); // FIXME - - format %{ "FDIV.4S $dst,$src1,$src2\t! float[4]" %} - ins_encode %{ - int quad = 1; - __ vdivF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, - MacroAssembler::VFA_SIZE_F32, quad); - %} - - ins_pipe(fdivF_reg_reg); // FIXME -#else size(4*4); ins_cost(DEFAULT_COST*4); // FIXME @@ -13652,26 +10292,8 @@ %} ins_pipe(fdivF_reg_reg); // FIXME -#endif %} -#ifdef AARCH64 -instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd()); - match(Set dst (DivVD src1 src2)); - size(4*1); - ins_cost(DEFAULT_COST*1); // FIXME - - format %{ "FDIV.2D $dst,$src1,$src2\t! double[2]" %} - ins_encode %{ - int quad = 1; - __ vdivF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, - MacroAssembler::VFA_SIZE_F64, quad); - %} - - ins_pipe(fdivF_reg_reg); // FIXME -%} -#else instruct vdiv2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (DivVD src1 src2)); @@ -13693,7 +10315,6 @@ ins_pipe(fdivD_reg_reg); // FIXME %} -#endif // --------------------------------- NEG -------------------------------------- --- old/src/hotspot/cpu/arm/assembler_arm.hpp 2018-09-17 10:29:31.154926871 -0400 +++ new/src/hotspot/cpu/arm/assembler_arm.hpp 2018-09-17 10:29:30.515889546 -0400 @@ -40,29 +40,14 @@ lsl, lsr, asr, ror }; -#ifdef AARCH64 -enum AsmExtendOp { - ex_uxtb, ex_uxth, ex_uxtw, ex_uxtx, - ex_sxtb, ex_sxth, ex_sxtw, ex_sxtx, - - ex_lsl = ex_uxtx -}; -#endif enum AsmOffset { -#ifdef AARCH64 - basic_offset = 0b00, - pre_indexed = 0b11, - post_indexed = 0b01 -#else basic_offset = 1 << 24, pre_indexed = 1 << 24 | 1 << 21, post_indexed = 0 -#endif }; -#ifndef AARCH64 enum AsmWriteback { no_writeback, writeback @@ -72,7 +57,6 @@ sub_offset = 0, add_offset = 1 }; -#endif // ARM Addressing Modes 2 and 3 - Load and store @@ -84,21 +68,13 @@ AsmOffset _mode; RelocationHolder _rspec; int _shift_imm; -#ifdef AARCH64 - AsmExtendOp _extend; -#else AsmShift _shift; AsmOffsetOp _offset_op; static inline int abs(int x) { return x < 0 ? -x : x; } static inline int up (int x) { return x < 0 ? 0 : 1; } -#endif -#ifdef AARCH64 - static const AsmExtendOp LSL = ex_lsl; -#else static const AsmShift LSL = lsl; -#endif public: Address() : _base(noreg) {} @@ -109,12 +85,8 @@ _disp = offset; _mode = mode; _shift_imm = 0; -#ifdef AARCH64 - _extend = ex_lsl; -#else _shift = lsl; _offset_op = add_offset; -#endif } #ifdef ASSERT @@ -124,27 +96,11 @@ _disp = in_bytes(offset); _mode = mode; _shift_imm = 0; -#ifdef AARCH64 - _extend = ex_lsl; -#else _shift = lsl; _offset_op = add_offset; -#endif } #endif -#ifdef AARCH64 - Address(Register rn, Register rm, AsmExtendOp extend = ex_lsl, int shift_imm = 0) { - assert ((extend == ex_uxtw) || (extend == ex_lsl) || (extend == ex_sxtw) || (extend == ex_sxtx), "invalid extend for address mode"); - assert ((0 <= shift_imm) && (shift_imm <= 4), "shift amount is out of range"); - _base = rn; - _index = rm; - _disp = 0; - _mode = basic_offset; - _extend = extend; - _shift_imm = shift_imm; - } -#else Address(Register rn, Register rm, AsmShift shift = lsl, int shift_imm = 0, AsmOffset mode = basic_offset, AsmOffsetOp offset_op = add_offset) { @@ -181,7 +137,6 @@ _mode = basic_offset; _offset_op = add_offset; } -#endif // AARCH64 // [base + index * wordSize] static Address indexed_ptr(Register base, Register index) { @@ -211,25 +166,6 @@ return a; } -#ifdef AARCH64 - int encoding_simd() const { - assert(_index != SP, "encoding constraint"); - assert(_disp == 0 || _mode == post_indexed, "encoding constraint"); - assert(_index == noreg || _mode == basic_offset, "encoding constraint"); - assert(_mode == basic_offset || _mode == post_indexed, "encoding constraint"); - assert(_extend == ex_lsl, "encoding constraint"); - int index; - if (_index == noreg) { - if (_mode == post_indexed) - index = 0b100 << 5 | 31; - else - index = 0; - } else { - index = 0b100 << 5 | _index->encoding(); - } - return index << 16 | _base->encoding_with_sp() << 5; - } -#else /* !AARCH64 */ int encoding2() const { assert(_mode == basic_offset || _base != PC, "unpredictable instruction"); if (_index == noreg) { @@ -287,7 +223,6 @@ return _base->encoding() << 16 | index; } -#endif // !AARCH64 Register base() const { return _base; @@ -309,11 +244,6 @@ return _shift_imm; } -#ifdef AARCH64 - AsmExtendOp extend() const { - return _extend; - } -#else AsmShift shift() const { return _shift; } @@ -321,7 +251,6 @@ AsmOffsetOp offset_op() const { return _offset_op; } -#endif bool uses(Register reg) const { return _base == reg || _index == reg; } @@ -394,11 +323,7 @@ }; #endif -#ifdef AARCH64 -#include "assembler_arm_64.hpp" -#else #include "assembler_arm_32.hpp" -#endif #endif // CPU_ARM_VM_ASSEMBLER_ARM_HPP --- old/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp 2018-09-17 10:29:32.732018985 -0400 +++ new/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp 2018-09-17 10:29:32.097981952 -0400 @@ -67,9 +67,6 @@ __ bind(_entry); if (_info->deoptimize_on_exception()) { -#ifdef AARCH64 - __ NOT_TESTED(); -#endif __ call(Runtime1::entry_for(Runtime1::predicate_failed_trap_id), relocInfo::runtime_call_type); ce->add_call_info_here(_info); ce->verify_oop_map(_info); @@ -86,9 +83,6 @@ } if (_throw_index_out_of_bounds_exception) { -#ifdef AARCH64 - __ NOT_TESTED(); -#endif __ call(Runtime1::entry_for(Runtime1::throw_index_exception_id), relocInfo::runtime_call_type); } else { __ str(_array->as_pointer_register(), Address(SP, BytesPerWord)); // ??? Correct offset? Correct instruction? @@ -208,16 +202,12 @@ const Register lock_reg = _lock_reg->as_pointer_register(); ce->verify_reserved_argument_area_size(2); -#ifdef AARCH64 - __ stp(obj_reg, lock_reg, Address(SP)); -#else if (obj_reg < lock_reg) { __ stmia(SP, RegisterSet(obj_reg) | RegisterSet(lock_reg)); } else { __ str(obj_reg, Address(SP)); __ str(lock_reg, Address(SP, BytesPerWord)); } -#endif // AARCH64 Runtime1::StubID enter_id = ce->compilation()->has_fpu_code() ? Runtime1::monitorenter_id : @@ -259,7 +249,7 @@ } void PatchingStub::emit_code(LIR_Assembler* ce) { - const int patchable_instruction_offset = AARCH64_ONLY(NativeInstruction::instruction_size) NOT_AARCH64(0); + const int patchable_instruction_offset = 0; assert(NativeCall::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF, "not enough room for call"); @@ -267,31 +257,17 @@ Label call_patch; bool is_load = (_id == load_klass_id) || (_id == load_mirror_id) || (_id == load_appendix_id); -#ifdef AARCH64 - assert(nativeInstruction_at(_pc_start)->is_nop(), "required for MT safe patching"); - // Same alignment of reg2mem code and PatchingStub code. Required to make copied bind_literal() code properly aligned. - __ align(wordSize); -#endif // AARCH64 - - if (is_load NOT_AARCH64(&& !VM_Version::supports_movw())) { + if (is_load && !VM_Version::supports_movw()) { address start = __ pc(); // The following sequence duplicates code provided in MacroAssembler::patchable_mov_oop() // without creating relocation info entry. -#ifdef AARCH64 - // Extra nop for MT safe patching - __ nop(); -#endif // AARCH64 assert((__ pc() - start) == patchable_instruction_offset, "should be"); -#ifdef AARCH64 - __ ldr(_obj, __ pc()); -#else __ ldr(_obj, Address(PC)); // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). __ nop(); -#endif // AARCH64 #ifdef ASSERT for (int i = 0; i < _bytes_to_copy; i++) { --- old/src/hotspot/cpu/arm/c1_Defs_arm.hpp 2018-09-17 10:29:34.295110281 -0400 +++ new/src/hotspot/cpu/arm/c1_Defs_arm.hpp 2018-09-17 10:29:33.665073482 -0400 @@ -47,9 +47,9 @@ // registers enum { - pd_nof_cpu_regs_frame_map = AARCH64_ONLY(33) NOT_AARCH64(16), // number of registers used during code emission - pd_nof_caller_save_cpu_regs_frame_map = AARCH64_ONLY(27) NOT_AARCH64(10), // number of registers killed by calls - pd_nof_cpu_regs_reg_alloc = AARCH64_ONLY(27) NOT_AARCH64(10), // number of registers that are visible to register allocator (including Rheap_base which is visible only if compressed pointers are not enabled) + pd_nof_cpu_regs_frame_map = 16, // number of registers used during code emission + pd_nof_caller_save_cpu_regs_frame_map = 10, // number of registers killed by calls + pd_nof_cpu_regs_reg_alloc = 10, // number of registers that are visible to register allocator (including Rheap_base which is visible only if compressed pointers are not enabled) pd_nof_cpu_regs_linearscan = pd_nof_cpu_regs_frame_map, // number of registers visible to linear scan pd_nof_cpu_regs_processed_in_linearscan = pd_nof_cpu_regs_reg_alloc + 1, // number of registers processed in linear scan; includes LR as it is used as temporary register in c1_LIRGenerator_arm pd_first_cpu_reg = 0, @@ -57,7 +57,7 @@ pd_nof_fpu_regs_frame_map = VFP(32) SOFT(0), // number of float registers used during code emission pd_nof_caller_save_fpu_regs_frame_map = VFP(32) SOFT(0), // number of float registers killed by calls - pd_nof_fpu_regs_reg_alloc = AARCH64_ONLY(32) NOT_AARCH64(VFP(30) SOFT(0)), // number of float registers that are visible to register allocator + pd_nof_fpu_regs_reg_alloc = VFP(30) SOFT(0), // number of float registers that are visible to register allocator pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, pd_last_fpu_reg = pd_first_fpu_reg + pd_nof_fpu_regs_frame_map - 1, @@ -74,11 +74,7 @@ pd_float_saved_as_double = false }; -#ifdef AARCH64 -#define PATCHED_ADDR 0xff8 -#else #define PATCHED_ADDR (204) -#endif #define CARDTABLEBARRIERSET_POST_BARRIER_HELPER #define GENERATE_ADDRESS_IS_PREFERRED --- old/src/hotspot/cpu/arm/c1_FrameMap_arm.cpp 2018-09-17 10:29:35.853201286 -0400 +++ new/src/hotspot/cpu/arm/c1_FrameMap_arm.cpp 2018-09-17 10:29:35.217164136 -0400 @@ -49,9 +49,6 @@ LIR_Opr FrameMap::R4_metadata_opr; LIR_Opr FrameMap::R5_metadata_opr; -#ifdef AARCH64 -LIR_Opr FrameMap::ZR_opr; -#endif // AARCH64 LIR_Opr FrameMap::LR_opr; LIR_Opr FrameMap::LR_oop_opr; @@ -82,12 +79,7 @@ } else if (r_1->is_Register()) { Register reg = r_1->as_Register(); if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { -#ifdef AARCH64 - assert(r_1->next() == r_2, "should be the same"); - opr = as_long_opr(reg); -#else opr = as_long_opr(reg, r_2->as_Register()); -#endif } else if (type == T_OBJECT || type == T_ARRAY) { opr = as_oop_opr(reg); } else if (type == T_METADATA) { @@ -115,20 +107,10 @@ int rnum = 0; // Registers used for allocation -#ifdef AARCH64 - assert(Rthread == R28 && Rheap_base == R27 && Rtemp == R16, "change the code here"); - for (i = 0; i < 16; i++) { - map_register(rnum++, as_Register(i)); - } - for (i = 17; i < 28; i++) { - map_register(rnum++, as_Register(i)); - } -#else assert(Rthread == R10 && Rtemp == R12, "change the code here"); for (i = 0; i < 10; i++) { map_register(rnum++, as_Register(i)); } -#endif // AARCH64 assert(rnum == pd_nof_cpu_regs_reg_alloc, "should be"); // Registers not used for allocation @@ -139,11 +121,7 @@ map_register(rnum++, Rthread); map_register(rnum++, FP); // ARM32: R7 or R11 map_register(rnum++, SP); -#ifdef AARCH64 - map_register(rnum++, ZR); -#else map_register(rnum++, PC); -#endif assert(rnum == pd_nof_cpu_regs_frame_map, "should be"); _init_done = true; @@ -155,9 +133,6 @@ R4_opr = as_opr(R4); R4_oop_opr = as_oop_opr(R4); R4_metadata_opr = as_metadata_opr(R4); R5_opr = as_opr(R5); R5_oop_opr = as_oop_opr(R5); R5_metadata_opr = as_metadata_opr(R5); -#ifdef AARCH64 - ZR_opr = as_opr(ZR); -#endif // AARCH64 LR_opr = as_opr(LR); LR_oop_opr = as_oop_opr(LR); @@ -169,11 +144,6 @@ // LIR operands for result Int_result_opr = R0_opr; Object_result_opr = R0_oop_opr; -#ifdef AARCH64 - Long_result_opr = as_long_opr(R0); - Float_result_opr = as_float_opr(S0); - Double_result_opr = as_double_opr(D0); -#else Long_result_opr = as_long_opr(R0, R1); #ifdef __ABI_HARD__ Float_result_opr = as_float_opr(S0); @@ -182,7 +152,6 @@ Float_result_opr = LIR_OprFact::single_softfp(0); Double_result_opr = LIR_OprFact::double_softfp(0, 1); #endif // __ABI_HARD__ -#endif // AARCH64 Exception_oop_opr = as_oop_opr(Rexception_obj); Exception_pc_opr = as_opr(Rexception_pc); @@ -222,7 +191,7 @@ } java_index += type2size[opr->type()]; } - return max_offset < AARCH64_ONLY(16384) NOT_AARCH64(4096); // TODO-AARCH64 check that LIRAssembler does not generate load/store of byte and half-word with SP as address base + return max_offset < 4096; } VMReg FrameMap::fpu_regname(int n) { --- old/src/hotspot/cpu/arm/c1_FrameMap_arm.hpp 2018-09-17 10:29:37.422292933 -0400 +++ new/src/hotspot/cpu/arm/c1_FrameMap_arm.hpp 2018-09-17 10:29:36.786255783 -0400 @@ -54,9 +54,6 @@ static LIR_Opr R4_metadata_opr; static LIR_Opr R5_metadata_opr; -#ifdef AARCH64 - static LIR_Opr ZR_opr; -#endif // AARCH64 static LIR_Opr LR_opr; static LIR_Opr LR_oop_opr; @@ -75,19 +72,6 @@ static LIR_Opr Exception_oop_opr; static LIR_Opr Exception_pc_opr; -#ifdef AARCH64 - static LIR_Opr as_long_opr(Register r) { - return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); - } - - static LIR_Opr as_pointer_opr(Register r) { - return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); - } - - static LIR_Opr as_double_opr(FloatRegister r) { - return LIR_OprFact::double_fpu(r->encoding()); - } -#else static LIR_Opr as_long_opr(Register r, Register r2) { return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r2)); } @@ -99,7 +83,6 @@ static LIR_Opr as_double_opr(FloatRegister r) { return LIR_OprFact::double_fpu(r->encoding(), r->successor()->encoding()); } -#endif static LIR_Opr as_float_opr(FloatRegister r) { return LIR_OprFact::single_fpu(r->encoding()); --- old/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp 2018-09-17 10:29:38.995384813 -0400 +++ new/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp 2018-09-17 10:29:38.361347781 -0400 @@ -127,9 +127,6 @@ Address LIR_Assembler::as_Address(LIR_Address* addr) { Register base = addr->base()->as_pointer_register(); -#ifdef AARCH64 - int align = exact_log2(type2aelembytes(addr->type(), true)); -#endif if (addr->index()->is_illegal() || addr->index()->is_constant()) { int offset = addr->disp(); @@ -137,16 +134,9 @@ offset += addr->index()->as_constant_ptr()->as_jint() << addr->scale(); } -#ifdef AARCH64 - if (!Assembler::is_unsigned_imm_in_range(offset, 12, align) && !Assembler::is_imm_in_range(offset, 9, 0)) { - BAILOUT_("offset not in range", Address(base)); - } - assert(UseUnalignedAccesses || (offset & right_n_bits(align)) == 0, "offset should be aligned"); -#else if ((offset <= -4096) || (offset >= 4096)) { BAILOUT_("offset not in range", Address(base)); } -#endif // AARCH64 return Address(base, offset); @@ -154,44 +144,21 @@ assert(addr->disp() == 0, "can't have both"); int scale = addr->scale(); -#ifdef AARCH64 - assert((scale == 0) || (scale == align), "scale should be zero or equal to embedded shift"); - - bool is_index_extended = (addr->index()->type() == T_INT); - if (is_index_extended) { - assert(addr->index()->is_single_cpu(), "should be"); - return Address(base, addr->index()->as_register(), ex_sxtw, scale); - } else { - assert(addr->index()->is_double_cpu(), "should be"); - return Address(base, addr->index()->as_register_lo(), ex_lsl, scale); - } -#else assert(addr->index()->is_single_cpu(), "should be"); return scale >= 0 ? Address(base, addr->index()->as_register(), lsl, scale) : Address(base, addr->index()->as_register(), lsr, -scale); -#endif // AARCH64 } } Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { -#ifdef AARCH64 - ShouldNotCallThis(); // Not used on AArch64 - return Address(); -#else Address base = as_Address(addr); assert(base.index() == noreg, "must be"); if (base.disp() + BytesPerWord >= 4096) { BAILOUT_("offset not in range", Address(base.base(),0)); } return Address(base.base(), base.disp() + BytesPerWord); -#endif // AARCH64 } Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { -#ifdef AARCH64 - ShouldNotCallThis(); // Not used on AArch64 - return Address(); -#else return as_Address(addr); -#endif // AARCH64 } @@ -327,13 +294,8 @@ int offset = code_offset(); __ mov_relative_address(LR, __ pc()); -#ifdef AARCH64 - __ raw_push(LR, LR); - __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, Rtemp); -#else __ push(LR); // stub expects LR to be saved __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, noreg); -#endif // AARCH64 assert(code_offset() - offset <= deopt_handler_size(), "overflow"); __ end_a_stub(); @@ -347,7 +309,6 @@ __ remove_frame(initial_frame_size_in_bytes()); // mov_slow here is usually one or two instruction - // TODO-AARCH64 3 instructions on AArch64, so try to load polling page by ldr_literal __ mov_address(Rtemp, os::get_polling_page(), symbolic_Relocation::polling_page_reference); __ relocate(relocInfo::poll_return_type); __ ldr(Rtemp, Address(Rtemp)); @@ -386,12 +347,8 @@ case T_LONG: assert(patch_code == lir_patch_none, "no patching handled here"); -#ifdef AARCH64 - __ mov_slow(dest->as_pointer_register(), (intptr_t)c->as_jlong()); -#else __ mov_slow(dest->as_register_lo(), c->as_jint_lo()); __ mov_slow(dest->as_register_hi(), c->as_jint_hi()); -#endif // AARCH64 break; case T_OBJECT: @@ -414,12 +371,8 @@ if (dest->is_single_fpu()) { __ mov_float(dest->as_float_reg(), c->as_jfloat()); } else { -#ifdef AARCH64 - ShouldNotReachHere(); -#else // Simple getters can return float constant directly into r0 __ mov_slow(dest->as_register(), c->as_jint_bits()); -#endif // AARCH64 } break; @@ -427,13 +380,9 @@ if (dest->is_double_fpu()) { __ mov_double(dest->as_double_reg(), c->as_jdouble()); } else { -#ifdef AARCH64 - ShouldNotReachHere(); -#else // Simple getters can return double constant directly into r1r0 __ mov_slow(dest->as_register_lo(), c->as_jint_lo_bits()); __ mov_slow(dest->as_register_hi(), c->as_jint_hi_bits()); -#endif // AARCH64 } break; @@ -466,17 +415,12 @@ case T_LONG: // fall through case T_DOUBLE: -#ifdef AARCH64 - __ mov_slow(Rtemp, c->as_jlong_bits()); - __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix())); -#else __ mov_slow(Rtemp, c->as_jint_lo_bits()); __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes)); if (c->as_jint_hi_bits() != c->as_jint_lo_bits()) { __ mov_slow(Rtemp, c->as_jint_hi_bits()); } __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes)); -#endif // AARCH64 break; default: @@ -486,49 +430,14 @@ void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) { -#ifdef AARCH64 - assert((src->as_constant_ptr()->type() == T_OBJECT && src->as_constant_ptr()->as_jobject() == NULL) || - (src->as_constant_ptr()->type() == T_INT && src->as_constant_ptr()->as_jint() == 0) || - (src->as_constant_ptr()->type() == T_LONG && src->as_constant_ptr()->as_jlong() == 0) || - (src->as_constant_ptr()->type() == T_FLOAT && src->as_constant_ptr()->as_jint_bits() == 0) || - (src->as_constant_ptr()->type() == T_DOUBLE && src->as_constant_ptr()->as_jlong_bits() == 0), - "cannot handle otherwise"); - assert(dest->as_address_ptr()->type() == type, "should be"); - - Address addr = as_Address(dest->as_address_ptr()); - int null_check_offset = code_offset(); - switch (type) { - case T_OBJECT: // fall through - case T_ARRAY: - if (UseCompressedOops && !wide) { - __ str_w(ZR, addr); - } else { - __ str(ZR, addr); - } - break; - case T_ADDRESS: // fall through - case T_DOUBLE: // fall through - case T_LONG: __ str(ZR, addr); break; - case T_FLOAT: // fall through - case T_INT: __ str_w(ZR, addr); break; - case T_BOOLEAN: // fall through - case T_BYTE: __ strb(ZR, addr); break; - case T_CHAR: // fall through - case T_SHORT: __ strh(ZR, addr); break; - default: ShouldNotReachHere(); - } -#else assert((src->as_constant_ptr()->type() == T_OBJECT && src->as_constant_ptr()->as_jobject() == NULL),"cannot handle otherwise"); __ mov(Rtemp, 0); int null_check_offset = code_offset(); __ str(Rtemp, as_Address(dest->as_address_ptr())); -#endif // AARCH64 if (info != NULL) { -#ifndef AARCH64 assert(false, "arm32 didn't support this before, investigate if bug"); -#endif add_debug_info_for_null_check(null_check_offset, info); } } @@ -539,27 +448,17 @@ if (src->is_single_cpu()) { if (dest->is_single_cpu()) { move_regs(src->as_register(), dest->as_register()); -#ifdef AARCH64 - } else if (dest->is_double_cpu()) { - assert ((src->type() == T_OBJECT) || (src->type() == T_ARRAY) || (src->type() == T_ADDRESS), "invalid src type"); - move_regs(src->as_register(), dest->as_register_lo()); -#else } else if (dest->is_single_fpu()) { __ fmsr(dest->as_float_reg(), src->as_register()); -#endif // AARCH64 } else { ShouldNotReachHere(); } } else if (src->is_double_cpu()) { -#ifdef AARCH64 - move_regs(src->as_register_lo(), dest->as_register_lo()); -#else if (dest->is_double_cpu()) { __ long_move(dest->as_register_lo(), dest->as_register_hi(), src->as_register_lo(), src->as_register_hi()); } else { __ fmdrr(dest->as_double_reg(), src->as_register_lo(), src->as_register_hi()); } -#endif // AARCH64 } else if (src->is_single_fpu()) { if (dest->is_single_fpu()) { __ mov_float(dest->as_float_reg(), src->as_float_reg()); @@ -572,11 +471,7 @@ if (dest->is_double_fpu()) { __ mov_double(dest->as_double_reg(), src->as_double_reg()); } else if (dest->is_double_cpu()) { -#ifdef AARCH64 - __ fmov_xd(dest->as_register_lo(), src->as_double_reg()); -#else __ fmrrd(dest->as_register_lo(), dest->as_register_hi(), src->as_double_reg()); -#endif // AARCH64 } else { ShouldNotReachHere(); } @@ -593,12 +488,10 @@ frame_map()->address_for_slot(dest->single_stack_ix()) : frame_map()->address_for_slot(dest->double_stack_ix()); -#ifndef AARCH64 assert(lo_word_offset_in_bytes == 0 && hi_word_offset_in_bytes == 4, "little ending"); if (src->is_single_fpu() || src->is_double_fpu()) { if (addr.disp() >= 1024) { BAILOUT("Too exotic case to handle here"); } } -#endif // !AARCH64 if (src->is_single_cpu()) { switch (type) { @@ -613,9 +506,7 @@ } } else if (src->is_double_cpu()) { __ str(src->as_register_lo(), addr); -#ifndef AARCH64 __ str(src->as_register_hi(), frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes)); -#endif // !AARCH64 } else if (src->is_single_fpu()) { __ str_float(src->as_float_reg(), addr); } else if (src->is_double_fpu()) { @@ -636,15 +527,7 @@ PatchingStub* patch = NULL; if (needs_patching) { -#ifdef AARCH64 - // Same alignment of reg2mem code and PatchingStub code. Required to make copied bind_literal() code properly aligned. - __ align(wordSize); -#endif patch = new PatchingStub(_masm, PatchingStub::access_field_id); -#ifdef AARCH64 - // Extra nop for MT safe patching - __ nop(); -#endif // AARCH64 } int null_check_offset = code_offset(); @@ -653,24 +536,13 @@ case T_ARRAY: case T_OBJECT: if (UseCompressedOops && !wide) { -#ifdef AARCH64 - const Register temp_src = Rtemp; - assert_different_registers(temp_src, src->as_register()); - __ encode_heap_oop(temp_src, src->as_register()); - null_check_offset = code_offset(); - __ str_32(temp_src, as_Address(to_addr)); -#else ShouldNotReachHere(); -#endif // AARCH64 } else { __ str(src->as_register(), as_Address(to_addr)); } break; case T_ADDRESS: -#ifdef AARCH64 - case T_LONG: -#endif // AARCH64 __ str(src->as_pointer_register(), as_Address(to_addr)); break; @@ -691,17 +563,6 @@ __ str_32(src->as_register(), as_Address(to_addr)); break; -#ifdef AARCH64 - - case T_FLOAT: - __ str_s(src->as_float_reg(), as_Address(to_addr)); - break; - - case T_DOUBLE: - __ str_d(src->as_double_reg(), as_Address(to_addr)); - break; - -#else // AARCH64 #ifdef __SOFTFP__ case T_DOUBLE: @@ -765,7 +626,6 @@ break; #endif // __SOFTFP__ -#endif // AARCH64 default: ShouldNotReachHere(); @@ -793,12 +653,10 @@ frame_map()->address_for_slot(src->single_stack_ix()) : frame_map()->address_for_slot(src->double_stack_ix()); -#ifndef AARCH64 assert(lo_word_offset_in_bytes == 0 && hi_word_offset_in_bytes == 4, "little ending"); if (dest->is_single_fpu() || dest->is_double_fpu()) { if (addr.disp() >= 1024) { BAILOUT("Too exotic case to handle here"); } } -#endif // !AARCH64 if (dest->is_single_cpu()) { switch (type) { @@ -816,9 +674,7 @@ } } else if (dest->is_double_cpu()) { __ ldr(dest->as_register_lo(), addr); -#ifndef AARCH64 __ ldr(dest->as_register_hi(), frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes)); -#endif // !AARCH64 } else if (dest->is_single_fpu()) { __ ldr_float(dest->as_float_reg(), addr); } else if (dest->is_double_fpu()) { @@ -853,12 +709,8 @@ assert(src->is_double_stack(), "must be"); __ ldr(Rtemp, frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes)); __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes)); -#ifdef AARCH64 - assert(lo_word_offset_in_bytes == 0, "adjust this code"); -#else __ ldr(Rtemp, frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes)); __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes)); -#endif // AARCH64 } } @@ -875,10 +727,6 @@ PatchingStub* patch = NULL; if (patch_code != lir_patch_none) { patch = new PatchingStub(_masm, PatchingStub::access_field_id); -#ifdef AARCH64 - // Extra nop for MT safe patching - __ nop(); -#endif // AARCH64 } if (info != NULL) { add_debug_info_for_null_check_here(info); @@ -902,14 +750,10 @@ } break; -#ifdef AARCH64 - case T_LONG: -#else case T_INT: #ifdef __SOFTFP__ case T_FLOAT: #endif // __SOFTFP__ -#endif // AARCH64 __ ldr(dest->as_pointer_register(), as_Address(addr)); break; @@ -929,21 +773,6 @@ __ ldrsh(dest->as_register(), as_Address(addr)); break; -#ifdef AARCH64 - - case T_INT: - __ ldr_w(dest->as_register(), as_Address(addr)); - break; - - case T_FLOAT: - __ ldr_s(dest->as_float_reg(), as_Address(addr)); - break; - - case T_DOUBLE: - __ ldr_d(dest->as_double_reg(), as_Address(addr)); - break; - -#else // AARCH64 #ifdef __SOFTFP__ case T_DOUBLE: @@ -1007,7 +836,6 @@ break; #endif // __SOFTFP__ -#endif // AARCH64 default: ShouldNotReachHere(); @@ -1021,23 +849,6 @@ patching_epilog(patch, patch_code, base_reg, info); } -#ifdef AARCH64 - switch (type) { - case T_ARRAY: - case T_OBJECT: - if (UseCompressedOops && !wide) { - __ decode_heap_oop(dest->as_register()); - } - __ verify_oop(dest->as_register()); - break; - - case T_ADDRESS: - if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { - __ decode_klass_not_null(dest->as_register()); - } - break; - } -#endif // AARCH64 } @@ -1064,48 +875,13 @@ // x/0x80000000 is a special case, since dividend is a power of two, but is negative. // The only possible result values are 0 and 1, with 1 only for dividend == divisor == 0x80000000. __ cmp_32(left, c); -#ifdef AARCH64 - __ cset(dest, eq); -#else __ mov(dest, 0, ne); __ mov(dest, 1, eq); -#endif // AARCH64 } } else { -#ifdef AARCH64 - Register left = op->in_opr1()->as_pointer_register(); - Register right = op->in_opr2()->as_pointer_register(); - Register dest = op->result_opr()->as_pointer_register(); - - switch (op->code()) { - case lir_idiv: - if (is_32) { - __ sdiv_w(dest, left, right); - } else { - __ sdiv(dest, left, right); - } - break; - case lir_irem: { - Register tmp = op->in_opr3()->as_pointer_register(); - assert_different_registers(left, tmp); - assert_different_registers(right, tmp); - if (is_32) { - __ sdiv_w(tmp, left, right); - __ msub_w(dest, right, tmp, left); - } else { - __ sdiv(tmp, left, right); - __ msub(dest, right, tmp, left); - } - break; - } - default: - ShouldNotReachHere(); - } -#else assert(op->code() == lir_idiv || op->code() == lir_irem, "unexpected op3"); __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::runtime_call_type); add_debug_info_for_div0_here(op->info()); -#endif // AARCH64 } } @@ -1122,9 +898,7 @@ assert (op->code() != lir_cond_float_branch, "this should be impossible"); #else if (op->code() == lir_cond_float_branch) { -#ifndef AARCH64 __ fmstat(); -#endif // !AARCH64 __ b(*(op->ublock()->label()), vs); } #endif // __SOFTFP__ @@ -1151,12 +925,8 @@ switch (op->bytecode()) { case Bytecodes::_i2l: -#ifdef AARCH64 - __ sign_extend(dest->as_register_lo(), src->as_register(), 32); -#else move_regs(src->as_register(), dest->as_register_lo()); __ mov(dest->as_register_hi(), AsmOperand(src->as_register(), asr, 31)); -#endif // AARCH64 break; case Bytecodes::_l2i: move_regs(src->as_register_lo(), dest->as_register()); @@ -1177,51 +947,21 @@ __ convert_d2f(dest->as_float_reg(), src->as_double_reg()); break; case Bytecodes::_i2f: -#ifdef AARCH64 - __ scvtf_sw(dest->as_float_reg(), src->as_register()); -#else __ fmsr(Stemp, src->as_register()); __ fsitos(dest->as_float_reg(), Stemp); -#endif // AARCH64 break; case Bytecodes::_i2d: -#ifdef AARCH64 - __ scvtf_dw(dest->as_double_reg(), src->as_register()); -#else __ fmsr(Stemp, src->as_register()); __ fsitod(dest->as_double_reg(), Stemp); -#endif // AARCH64 break; case Bytecodes::_f2i: -#ifdef AARCH64 - __ fcvtzs_ws(dest->as_register(), src->as_float_reg()); -#else __ ftosizs(Stemp, src->as_float_reg()); __ fmrs(dest->as_register(), Stemp); -#endif // AARCH64 break; case Bytecodes::_d2i: -#ifdef AARCH64 - __ fcvtzs_wd(dest->as_register(), src->as_double_reg()); -#else __ ftosizd(Stemp, src->as_double_reg()); __ fmrs(dest->as_register(), Stemp); -#endif // AARCH64 - break; -#ifdef AARCH64 - case Bytecodes::_l2f: - __ scvtf_sx(dest->as_float_reg(), src->as_register_lo()); - break; - case Bytecodes::_l2d: - __ scvtf_dx(dest->as_double_reg(), src->as_register_lo()); - break; - case Bytecodes::_f2l: - __ fcvtzs_xs(dest->as_register_lo(), src->as_float_reg()); break; - case Bytecodes::_d2l: - __ fcvtzs_xd(dest->as_register_lo(), src->as_double_reg()); - break; -#endif // AARCH64 default: ShouldNotReachHere(); } @@ -1327,11 +1067,7 @@ assert_different_registers(obj, mdo, data_val); setup_md_access(method, bci, md, data, mdo_offset_bias); Label not_null; -#ifdef AARCH64 - __ cbnz(obj, not_null); -#else __ b(not_null, ne); -#endif // AARCH64 __ mov_metadata(mdo, md->constant_encoding()); if (mdo_offset_bias > 0) { __ mov_slow(data_val, mdo_offset_bias); @@ -1373,13 +1109,9 @@ __ b(*failure); } -// Sets `res` to true, if `cond` holds. On AArch64 also sets `res` to false if `cond` does not hold. +// Sets `res` to true, if `cond` holds. static void set_instanceof_result(MacroAssembler* _masm, Register res, AsmCondition cond) { -#ifdef AARCH64 - __ cset(res, cond); -#else __ mov(res, 1, cond); -#endif // AARCH64 } @@ -1406,9 +1138,7 @@ Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry(); if (op->should_profile()) { -#ifndef AARCH64 __ cmp(value, 0); -#endif // !AARCH64 typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, value, k_RInfo, Rtemp, &done); } else { __ cbz(value, done); @@ -1470,57 +1200,6 @@ Label *failure_target = op->should_profile() ? &profile_cast_failure : op->stub()->entry(); Label *success_target = op->should_profile() ? &profile_cast_success : &done; -#ifdef AARCH64 - move_regs(obj, res); - if (op->should_profile()) { - typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, res, klass_RInfo, Rtemp, &done); - } else { - __ cbz(obj, done); - } - if (k->is_loaded()) { - __ mov_metadata(k_RInfo, k->constant_encoding()); - } else { - if (res != obj) { - op->info_for_patch()->add_register_oop(FrameMap::as_oop_opr(res)); - } - klass2reg_with_patching(k_RInfo, op->info_for_patch()); - } - __ load_klass(klass_RInfo, res); - - if (op->fast_check()) { - __ cmp(klass_RInfo, k_RInfo); - __ b(*failure_target, ne); - } else if (k->is_loaded()) { - __ ldr(Rtemp, Address(klass_RInfo, k->super_check_offset())); - if (in_bytes(Klass::secondary_super_cache_offset()) != (int) k->super_check_offset()) { - __ cmp(Rtemp, k_RInfo); - __ b(*failure_target, ne); - } else { - __ cmp(klass_RInfo, k_RInfo); - __ cond_cmp(Rtemp, k_RInfo, ne); - __ b(*success_target, eq); - assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup"); - __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); - __ cbz(R0, *failure_target); - } - } else { - __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset())); - // check for immediate positive hit - __ ldr(Rtemp, Address(klass_RInfo, Rtemp)); - __ cmp(klass_RInfo, k_RInfo); - __ cond_cmp(Rtemp, k_RInfo, ne); - __ b(*success_target, eq); - // check for immediate negative hit - __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset())); - __ cmp(Rtemp, in_bytes(Klass::secondary_super_cache_offset())); - __ b(*failure_target, ne); - // slow case - assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup"); - __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); - __ cbz(R0, *failure_target); - } - -#else // AARCH64 __ movs(res, obj); if (op->should_profile()) { @@ -1575,7 +1254,6 @@ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); __ cbz(R0, *failure_target); } -#endif // AARCH64 if (op->should_profile()) { Register mdo = klass_RInfo, recv = k_RInfo, tmp1 = Rtemp; @@ -1605,20 +1283,12 @@ Label *failure_target = op->should_profile() ? &profile_cast_failure : &done; Label *success_target = op->should_profile() ? &profile_cast_success : &done; -#ifdef AARCH64 - move_regs(obj, res); -#else __ movs(res, obj); -#endif // AARCH64 if (op->should_profile()) { typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, res, klass_RInfo, Rtemp, &done); } else { -#ifdef AARCH64 - __ cbz(obj, done); // If obj == NULL, res is false -#else __ b(done, eq); -#endif // AARCH64 } if (k->is_loaded()) { @@ -1629,11 +1299,9 @@ } __ load_klass(klass_RInfo, res); -#ifndef AARCH64 if (!op->should_profile()) { __ mov(res, 0); } -#endif // !AARCH64 if (op->fast_check()) { __ cmp(klass_RInfo, k_RInfo); @@ -1671,21 +1339,11 @@ // check for immediate positive hit __ cmp(klass_RInfo, k_RInfo); if (!op->should_profile()) { -#ifdef AARCH64 - // TODO-AARCH64 check if separate conditional branch is more efficient than ldr+cond_cmp - __ ldr(res, Address(klass_RInfo, Rtemp)); -#else __ ldr(res, Address(klass_RInfo, Rtemp), ne); -#endif // AARCH64 __ cond_cmp(res, k_RInfo, ne); set_instanceof_result(_masm, res, eq); } else { -#ifdef AARCH64 - // TODO-AARCH64 check if separate conditional branch is more efficient than ldr+cond_cmp - __ ldr(Rtemp, Address(klass_RInfo, Rtemp)); -#else __ ldr(Rtemp, Address(klass_RInfo, Rtemp), ne); -#endif // AARCH64 __ cond_cmp(Rtemp, k_RInfo, ne); } __ b(*success_target, eq); @@ -1695,11 +1353,7 @@ } __ cmp(Rtemp, in_bytes(Klass::secondary_super_cache_offset())); if (!op->should_profile()) { -#ifdef AARCH64 - __ mov(res, 0); -#else __ mov(res, 0, ne); -#endif // AARCH64 } __ b(*failure_target, ne); // slow case @@ -1741,41 +1395,6 @@ // } else { // dest = 0; // } -#ifdef AARCH64 - Label retry, done; - Register addr = op->addr()->as_pointer_register(); - Register cmpval = op->cmp_value()->as_pointer_register(); - Register newval = op->new_value()->as_pointer_register(); - Register dest = op->result_opr()->as_pointer_register(); - assert_different_registers(dest, addr, cmpval, newval, Rtemp); - - if (UseCompressedOops && op->code() == lir_cas_obj) { - Register tmp1 = op->tmp1()->as_pointer_register(); - Register tmp2 = op->tmp2()->as_pointer_register(); - assert_different_registers(dest, addr, cmpval, newval, tmp1, tmp2, Rtemp); - __ encode_heap_oop(tmp1, cmpval); cmpval = tmp1; - __ encode_heap_oop(tmp2, newval); newval = tmp2; - } - - __ mov(dest, ZR); - __ bind(retry); - if (((op->code() == lir_cas_obj) && !UseCompressedOops) || op->code() == lir_cas_long) { - __ ldaxr(Rtemp, addr); - __ cmp(Rtemp, cmpval); - __ b(done, ne); - __ stlxr(Rtemp, newval, addr); - } else if (((op->code() == lir_cas_obj) && UseCompressedOops) || op->code() == lir_cas_int) { - __ ldaxr_w(Rtemp, addr); - __ cmp_w(Rtemp, cmpval); - __ b(done, ne); - __ stlxr_w(Rtemp, newval, addr); - } else { - ShouldNotReachHere(); - } - __ cbnz_w(Rtemp, retry); - __ mov(dest, 1); - __ bind(done); -#else // FIXME: membar_release __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp); Register addr = op->addr()->is_register() ? @@ -1812,7 +1431,6 @@ } else { Unimplemented(); } -#endif // AARCH64 // FIXME: is full membar really needed instead of just membar_acquire? __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp); } @@ -1835,36 +1453,6 @@ } } -#ifdef AARCH64 - - // TODO-AARCH64 implement it more efficiently - - if (opr1->is_register()) { - reg2reg(opr1, result); - } else if (opr1->is_stack()) { - stack2reg(opr1, result, result->type()); - } else if (opr1->is_constant()) { - const2reg(opr1, result, lir_patch_none, NULL); - } else { - ShouldNotReachHere(); - } - - Label skip; - __ b(skip, acond); - - if (opr2->is_register()) { - reg2reg(opr2, result); - } else if (opr2->is_stack()) { - stack2reg(opr2, result, result->type()); - } else if (opr2->is_constant()) { - const2reg(opr2, result, lir_patch_none, NULL); - } else { - ShouldNotReachHere(); - } - - __ bind(skip); - -#else for (;;) { // two iterations only if (opr1 == result) { // do nothing @@ -1924,10 +1512,9 @@ opr1 = opr2; acond = ncond; } -#endif // AARCH64 } -#if defined(AARCH64) || defined(ASSERT) +#if defined(ASSERT) static int reg_size(LIR_Opr op) { switch (op->type()) { case T_FLOAT: @@ -1959,37 +1546,6 @@ int scale = addr->scale(); AsmShift shift = lsl; -#ifdef AARCH64 - bool is_index_extended = reg_size(addr->base()) > reg_size(addr->index()); - if (scale < 0) { - scale = -scale; - shift = lsr; - } - assert(shift == lsl || !is_index_extended, "could not have extend and right shift in one operand"); - assert(0 <= scale && scale <= 63, "scale is too large"); - - if (is_index_extended) { - assert(scale <= 4, "scale is too large for add with extended register"); - assert(addr->index()->is_single_cpu(), "should be"); - assert(addr->index()->type() == T_INT, "should be"); - assert(dest->is_double_cpu(), "should be"); - assert(code == lir_add, "special case of add with extended register"); - - __ add(res, lreg, addr->index()->as_register(), ex_sxtw, scale); - return; - } else if (reg_size(dest) == BytesPerInt) { - assert(reg_size(addr->base()) == reg_size(addr->index()), "should be"); - assert(reg_size(addr->base()) == reg_size(dest), "should be"); - - AsmOperand operand(addr->index()->as_pointer_register(), shift, scale); - switch (code) { - case lir_add: __ add_32(res, lreg, operand); break; - case lir_sub: __ sub_32(res, lreg, operand); break; - default: ShouldNotReachHere(); - } - return; - } -#endif // AARCH64 assert(reg_size(addr->base()) == reg_size(addr->index()), "should be"); assert(reg_size(addr->base()) == reg_size(dest), "should be"); @@ -2002,7 +1558,6 @@ default: ShouldNotReachHere(); } -#ifndef AARCH64 } else if (left->is_address()) { assert(code == lir_sub && right->is_single_cpu(), "special case used by strength_reduce_multiply()"); const LIR_Address* addr = left->as_address_ptr(); @@ -2010,15 +1565,9 @@ const Register rreg = right->as_register(); assert(addr->base()->as_register() == rreg && addr->index()->is_register() && addr->disp() == 0, "must be"); __ rsb(res, rreg, AsmOperand(addr->index()->as_register(), lsl, addr->scale())); -#endif // !AARCH64 } else if (dest->is_single_cpu()) { assert(left->is_single_cpu(), "unexpected left operand"); -#ifdef AARCH64 - assert(dest->type() == T_INT, "unexpected dest type"); - assert(left->type() == T_INT, "unexpected left type"); - assert(right->type() == T_INT, "unexpected right type"); -#endif // AARCH64 const Register res = dest->as_register(); const Register lreg = left->as_register(); @@ -2045,36 +1594,6 @@ } } else if (dest->is_double_cpu()) { -#ifdef AARCH64 - assert(left->is_double_cpu() || - (left->is_single_cpu() && ((left->type() == T_OBJECT) || (left->type() == T_ARRAY) || (left->type() == T_ADDRESS))), - "unexpected left operand"); - - const Register res = dest->as_register_lo(); - const Register lreg = left->as_pointer_register(); - - if (right->is_constant()) { - assert(right->type() == T_LONG, "unexpected right type"); - assert((right->as_constant_ptr()->as_jlong() >> 24) == 0, "out of range"); - jint imm = (jint)right->as_constant_ptr()->as_jlong(); - switch (code) { - case lir_add: __ add(res, lreg, imm); break; - case lir_sub: __ sub(res, lreg, imm); break; - default: ShouldNotReachHere(); - } - } else { - assert(right->is_double_cpu() || - (right->is_single_cpu() && ((right->type() == T_OBJECT) || (right->type() == T_ARRAY) || (right->type() == T_ADDRESS))), - "unexpected right operand"); - const Register rreg = right->as_pointer_register(); - switch (code) { - case lir_add: __ add(res, lreg, rreg); break; - case lir_sub: __ sub(res, lreg, rreg); break; - case lir_mul: __ mul(res, lreg, rreg); break; - default: ShouldNotReachHere(); - } - } -#else // AARCH64 Register res_lo = dest->as_register_lo(); Register res_hi = dest->as_register_hi(); Register lreg_lo = left->as_register_lo(); @@ -2118,7 +1637,6 @@ } } move_regs(res_lo, dest->as_register_lo()); -#endif // AARCH64 } else if (dest->is_single_fpu()) { assert(left->is_single_fpu(), "must be"); @@ -2175,11 +1693,6 @@ assert(left->is_register(), "wrong items state"); if (dest->is_single_cpu()) { -#ifdef AARCH64 - assert (dest->type() == T_INT, "unexpected result type"); - assert (left->type() == T_INT, "unexpected left type"); - assert (right->type() == T_INT, "unexpected right type"); -#endif // AARCH64 const Register res = dest->as_register(); const Register lreg = left->as_register(); @@ -2206,10 +1719,6 @@ assert(dest->is_double_cpu(), "should be"); Register res_lo = dest->as_register_lo(); -#ifdef AARCH64 - assert ((left->is_single_cpu() && left->is_oop_register()) || left->is_double_cpu(), "should be"); - const Register lreg_lo = left->as_pointer_register(); -#else assert (dest->type() == T_LONG, "unexpected result type"); assert (left->type() == T_LONG, "unexpected left type"); assert (right->type() == T_LONG, "unexpected right type"); @@ -2217,19 +1726,8 @@ const Register res_hi = dest->as_register_hi(); const Register lreg_lo = left->as_register_lo(); const Register lreg_hi = left->as_register_hi(); -#endif // AARCH64 if (right->is_register()) { -#ifdef AARCH64 - assert ((right->is_single_cpu() && right->is_oop_register()) || right->is_double_cpu(), "should be"); - const Register rreg_lo = right->as_pointer_register(); - switch (code) { - case lir_logic_and: __ andr(res_lo, lreg_lo, rreg_lo); break; - case lir_logic_or: __ orr (res_lo, lreg_lo, rreg_lo); break; - case lir_logic_xor: __ eor (res_lo, lreg_lo, rreg_lo); break; - default: ShouldNotReachHere(); - } -#else const Register rreg_lo = right->as_register_lo(); const Register rreg_hi = right->as_register_hi(); if (res_lo == lreg_hi || res_lo == rreg_hi) { @@ -2252,23 +1750,8 @@ ShouldNotReachHere(); } move_regs(res_lo, dest->as_register_lo()); -#endif // AARCH64 } else { assert(right->is_constant(), "must be"); -#ifdef AARCH64 - const julong c = (julong)right->as_constant_ptr()->as_jlong(); - Assembler::LogicalImmediate imm(c, false); - if (imm.is_encoded()) { - switch (code) { - case lir_logic_and: __ andr(res_lo, lreg_lo, imm); break; - case lir_logic_or: __ orr (res_lo, lreg_lo, imm); break; - case lir_logic_xor: __ eor (res_lo, lreg_lo, imm); break; - default: ShouldNotReachHere(); - } - } else { - BAILOUT("64 bit constant cannot be inlined"); - } -#else const jint c_lo = (jint) right->as_constant_ptr()->as_jlong(); const jint c_hi = (jint) (right->as_constant_ptr()->as_jlong() >> 32); // Case for logic_or from do_ClassIDIntrinsic() @@ -2303,36 +1786,11 @@ } else { BAILOUT("64 bit constant cannot be inlined"); } -#endif // AARCH64 } } } -#ifdef AARCH64 - -void LIR_Assembler::long_compare_helper(LIR_Opr opr1, LIR_Opr opr2) { - assert(opr1->is_double_cpu(), "should be"); - Register x = opr1->as_register_lo(); - - if (opr2->is_double_cpu()) { - Register y = opr2->as_register_lo(); - __ cmp(x, y); - - } else { - assert(opr2->is_constant(), "should be"); - assert(opr2->as_constant_ptr()->type() == T_LONG, "long constant expected"); - jlong c = opr2->as_jlong(); - assert(((c >> 31) == 0) || ((c >> 31) == -1), "immediate is out of range"); - if (c >= 0) { - __ cmp(x, (jint)c); - } else { - __ cmn(x, (jint)(-c)); - } - } -} - -#endif // AARCH64 void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { if (opr1->is_single_cpu()) { @@ -2370,9 +1828,6 @@ ShouldNotReachHere(); } } else if (opr1->is_double_cpu()) { -#ifdef AARCH64 - long_compare_helper(opr1, opr2); -#else Register xlo = opr1->as_register_lo(); Register xhi = opr1->as_register_hi(); if (opr2->is_constant() && opr2->as_jlong() == 0) { @@ -2391,7 +1846,6 @@ } else { ShouldNotReachHere(); } -#endif // AARCH64 } else if (opr1->is_single_fpu()) { if (opr2->is_constant()) { assert(opr2->as_jfloat() == 0.0f, "cannot handle otherwise"); @@ -2415,15 +1869,6 @@ const Register res = dst->as_register(); if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { comp_op(lir_cond_unknown, left, right, op); -#ifdef AARCH64 - if (code == lir_ucmp_fd2i) { // unordered is less - __ cset(res, gt); // 1 if '>', else 0 - __ csinv(res, res, ZR, ge); // previous value if '>=', else -1 - } else { - __ cset(res, hi); // 1 if '>' or unordered, else 0 - __ csinv(res, res, ZR, pl); // previous value if '>=' or unordered, else -1 - } -#else __ fmstat(); if (code == lir_ucmp_fd2i) { // unordered is less __ mvn(res, 0, lt); @@ -2433,17 +1878,10 @@ __ mvn(res, 0, cc); } __ mov(res, 0, eq); -#endif // AARCH64 } else { assert(code == lir_cmp_l2i, "must be"); -#ifdef AARCH64 - long_compare_helper(left, right); - - __ cset(res, gt); // 1 if '>', else 0 - __ csinv(res, res, ZR, ge); // previous value if '>=', else -1 -#else Label done; const Register xlo = left->as_register_lo(); const Register xhi = left->as_register_hi(); @@ -2457,7 +1895,6 @@ __ mov(res, 1, hi); __ mvn(res, 0, lo); __ bind(done); -#endif // AARCH64 } } @@ -2478,19 +1915,15 @@ bool near_range = __ cache_fully_reachable(); address oop_address = pc(); - bool use_movw = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw()); + bool use_movw = VM_Version::supports_movw(); // Ricklass may contain something that is not a metadata pointer so // mov_metadata can't be used InlinedAddress value((address)Universe::non_oop_word()); InlinedAddress addr(op->addr()); if (use_movw) { -#ifdef AARCH64 - ShouldNotReachHere(); -#else __ movw(Ricklass, ((unsigned int)Universe::non_oop_word()) & 0xffff); __ movt(Ricklass, ((unsigned int)Universe::non_oop_word()) >> 16); -#endif // AARCH64 } else { // No movw/movt, must be load a pc relative value but no // relocation so no metadata table to load from. @@ -2582,35 +2015,6 @@ } void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { -#ifdef AARCH64 - if (dest->is_single_cpu()) { - Register res = dest->as_register(); - Register x = left->as_register(); - Register y = count->as_register(); - assert (dest->type() == T_INT, "unexpected result type"); - assert (left->type() == T_INT, "unexpected left type"); - - switch (code) { - case lir_shl: __ lslv_w(res, x, y); break; - case lir_shr: __ asrv_w(res, x, y); break; - case lir_ushr: __ lsrv_w(res, x, y); break; - default: ShouldNotReachHere(); - } - } else if (dest->is_double_cpu()) { - Register res = dest->as_register_lo(); - Register x = left->as_register_lo(); - Register y = count->as_register(); - - switch (code) { - case lir_shl: __ lslv(res, x, y); break; - case lir_shr: __ asrv(res, x, y); break; - case lir_ushr: __ lsrv(res, x, y); break; - default: ShouldNotReachHere(); - } - } else { - ShouldNotReachHere(); - } -#else AsmShift shift = lsl; switch (code) { case lir_shl: shift = lsl; break; @@ -2645,43 +2049,10 @@ } else { ShouldNotReachHere(); } -#endif // AARCH64 } void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { -#ifdef AARCH64 - if (dest->is_single_cpu()) { - assert (dest->type() == T_INT, "unexpected result type"); - assert (left->type() == T_INT, "unexpected left type"); - count &= 31; - if (count != 0) { - switch (code) { - case lir_shl: __ _lsl_w(dest->as_register(), left->as_register(), count); break; - case lir_shr: __ _asr_w(dest->as_register(), left->as_register(), count); break; - case lir_ushr: __ _lsr_w(dest->as_register(), left->as_register(), count); break; - default: ShouldNotReachHere(); - } - } else { - move_regs(left->as_register(), dest->as_register()); - } - } else if (dest->is_double_cpu()) { - count &= 63; - if (count != 0) { - switch (code) { - case lir_shl: __ _lsl(dest->as_register_lo(), left->as_register_lo(), count); break; - case lir_shr: __ _asr(dest->as_register_lo(), left->as_register_lo(), count); break; - case lir_ushr: __ _lsr(dest->as_register_lo(), left->as_register_lo(), count); break; - default: ShouldNotReachHere(); - } - } else { - move_regs(left->as_register_lo(), dest->as_register_lo()); - } - } else { - ShouldNotReachHere(); - } - -#else AsmShift shift = lsl; switch (code) { case lir_shl: shift = lsl; break; @@ -2720,29 +2091,18 @@ } else { ShouldNotReachHere(); } -#endif // AARCH64 } // Saves 4 given registers in reserved argument area. void LIR_Assembler::save_in_reserved_area(Register r1, Register r2, Register r3, Register r4) { verify_reserved_argument_area_size(4); -#ifdef AARCH64 - __ stp(r1, r2, Address(SP, 0)); - __ stp(r3, r4, Address(SP, 2*wordSize)); -#else __ stmia(SP, RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3) | RegisterSet(r4)); -#endif // AARCH64 } // Restores 4 given registers from reserved argument area. void LIR_Assembler::restore_from_reserved_area(Register r1, Register r2, Register r3, Register r4) { -#ifdef AARCH64 - __ ldp(r1, r2, Address(SP, 0)); - __ ldp(r3, r4, Address(SP, 2*wordSize)); -#else __ ldmia(SP, RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3) | RegisterSet(r4), no_writeback); -#endif // AARCH64 } @@ -2757,9 +2117,6 @@ Register tmp2 = Rtemp; assert(src == R0 && src_pos == R1 && dst == R2 && dst_pos == R3, "code assumption"); -#ifdef AARCH64 - assert(length == R4, "code assumption"); -#endif // AARCH64 CodeStub* stub = op->stub(); @@ -2773,13 +2130,8 @@ // save arguments, because they will be killed by a runtime call save_in_reserved_area(R0, R1, R2, R3); -#ifdef AARCH64 - // save length argument, will be killed by a runtime call - __ raw_push(length, ZR); -#else // pass length argument on SP[0] __ str(length, Address(SP, -2*wordSize, pre_indexed)); // 2 words for a proper stack alignment -#endif // AARCH64 address copyfunc_addr = StubRoutines::generic_arraycopy(); assert(copyfunc_addr != NULL, "generic arraycopy stub required"); @@ -2791,11 +2143,7 @@ // the stub is in the code cache so close enough __ call(copyfunc_addr, relocInfo::runtime_call_type); -#ifdef AARCH64 - __ raw_pop(length, ZR); -#else __ add(SP, SP, 2*wordSize); -#endif // AARCH64 __ cbz_32(R0, *stub->continuation()); @@ -2969,7 +2317,7 @@ Register dst_ptr = R1; Register len = R2; Register chk_off = R3; - Register super_k = AARCH64_ONLY(R4) NOT_AARCH64(tmp); + Register super_k = tmp; __ add(src_ptr, src, arrayOopDesc::base_offset_in_bytes(basic_type)); __ add_ptr_scaled_int32(src_ptr, src_ptr, src_pos, shift); @@ -2981,20 +2329,11 @@ int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); int sco_offset = in_bytes(Klass::super_check_offset_offset()); -#ifdef AARCH64 - __ raw_push(length, ZR); // Preserve length around *copyfunc_addr call - - __ mov(len, length); - __ ldr(super_k, Address(tmp, ek_offset)); // super_k == R4 == length, so this load cannot be performed earlier - // TODO-AARCH64: check whether it is faster to load super klass early by using tmp and additional mov. - __ ldr_u32(chk_off, Address(super_k, sco_offset)); -#else // AARCH64 __ ldr(super_k, Address(tmp, ek_offset)); __ mov(len, length); __ ldr_u32(chk_off, Address(super_k, sco_offset)); __ push(super_k); -#endif // AARCH64 __ call(copyfunc_addr, relocInfo::runtime_call_type); @@ -3007,11 +2346,7 @@ } #endif // PRODUCT -#ifdef AARCH64 - __ raw_pop(length, ZR); -#else __ add(SP, SP, wordSize); // Drop super_k argument -#endif // AARCH64 __ cbz_32(R0, *stub->continuation()); __ mvn_32(tmp, R0); @@ -3073,9 +2408,6 @@ void LIR_Assembler::emit_assert(LIR_OpAssert* op) { assert(op->code() == lir_assert, "must be"); -#ifdef AARCH64 - __ NOT_IMPLEMENTED(); -#else if (op->in_opr1()->is_valid()) { assert(op->in_opr2()->is_valid(), "both operands must be valid"); comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op); @@ -3107,7 +2439,6 @@ breakpoint(); } __ bind(ok); -#endif // AARCH64 } #endif // ASSERT @@ -3156,7 +2487,7 @@ assert_different_registers(mdo, tmp1); __ mov_metadata(mdo, md->constant_encoding()); int mdo_offset_bias = 0; - int max_offset = AARCH64_ONLY(4096 << LogBytesPerWord) NOT_AARCH64(4096); + int max_offset = 4096; if (md->byte_offset_of_slot(data, CounterData::count_offset()) + data->size_in_bytes() >= max_offset) { // The offset is large so bias the mdo by the base of the slot so // that the ldr can use an immediate offset to reference the slots of the data @@ -3252,7 +2583,6 @@ void LIR_Assembler::align_backward_branch_target() { - // TODO-AARCH64 review it // Some ARM processors do better with 8-byte branch target alignment __ align(8); } @@ -3265,9 +2595,6 @@ assert (left->type() == T_INT, "unexpected left type"); __ neg_32(dest->as_register(), left->as_register()); } else if (left->is_double_cpu()) { -#ifdef AARCH64 - __ neg(dest->as_register_lo(), left->as_register_lo()); -#else Register dest_lo = dest->as_register_lo(); Register dest_hi = dest->as_register_hi(); Register src_lo = left->as_register_lo(); @@ -3278,7 +2605,6 @@ __ rsbs(dest_lo, src_lo, 0); __ rsc(dest_hi, src_hi, 0); move_regs(dest_lo, dest->as_register_lo()); -#endif // AARCH64 } else if (left->is_single_fpu()) { __ neg_float(dest->as_float_reg(), left->as_float_reg()); } else if (left->is_double_fpu()) { @@ -3300,9 +2626,6 @@ __ add(dest->as_pointer_register(), addr->base()->as_pointer_register(), c); } else { assert(addr->disp() == 0, "cannot handle otherwise"); -#ifdef AARCH64 - assert(addr->index()->is_double_cpu(), "should be"); -#endif // AARCH64 __ add(dest->as_pointer_register(), addr->base()->as_pointer_register(), AsmOperand(addr->index()->as_pointer_register(), lsl, addr->scale())); } @@ -3319,9 +2642,6 @@ void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { -#ifdef AARCH64 - Unimplemented(); // TODO-AARCH64: Use stlr/ldar instructions for volatile load/store -#else assert(src->is_double_cpu() && dest->is_address() || src->is_address() && dest->is_double_cpu(), "Simple move_op is called for all other cases"); @@ -3363,7 +2683,6 @@ if (info != NULL) { add_debug_info_for_null_check(null_check_offset, info); } -#endif // AARCH64 } @@ -3405,9 +2724,6 @@ } void LIR_Assembler::peephole(LIR_List* lir) { -#ifdef AARCH64 - return; // TODO-AARCH64 implement peephole optimizations -#endif LIR_OpList* inst = lir->instructions_list(); const int inst_length = inst->length(); for (int i = 0; i < inst_length; i++) { @@ -3471,38 +2787,23 @@ } void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) { -#ifdef AARCH64 - Register ptr = src->as_pointer_register(); -#else assert(src->is_address(), "sanity"); Address addr = as_Address(src->as_address_ptr()); -#endif if (code == lir_xchg) { -#ifdef AARCH64 - if (UseCompressedOops && data->is_oop()) { - __ encode_heap_oop(tmp->as_pointer_register(), data->as_register()); - } -#endif // AARCH64 } else { assert (!data->is_oop(), "xadd for oops"); } -#ifndef AARCH64 __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp); -#endif // !AARCH64 Label retry; __ bind(retry); - if ((data->type() == T_INT) || (data->is_oop() AARCH64_ONLY(&& UseCompressedOops))) { + if (data->type() == T_INT || data->is_oop()) { Register dst = dest->as_register(); Register new_val = noreg; -#ifdef AARCH64 - __ ldaxr_w(dst, ptr); -#else __ ldrex(dst, addr); -#endif if (code == lir_xadd) { Register tmp_reg = tmp->as_register(); if (data->is_constant()) { @@ -3521,35 +2822,8 @@ } assert_different_registers(dst, new_val); } -#ifdef AARCH64 - __ stlxr_w(Rtemp, new_val, ptr); -#else __ strex(Rtemp, new_val, addr); -#endif // AARCH64 -#ifdef AARCH64 - } else if ((data->type() == T_LONG) || (data->is_oop() && !UseCompressedOops)) { - Register dst = dest->as_pointer_register(); - Register new_val = noreg; - __ ldaxr(dst, ptr); - if (code == lir_xadd) { - Register tmp_reg = tmp->as_pointer_register(); - if (data->is_constant()) { - assert_different_registers(dst, ptr, tmp_reg); - jlong c = data->as_constant_ptr()->as_jlong(); - assert((jlong)((jint)c) == c, "overflow"); - __ add(tmp_reg, dst, (jint)c); - } else { - assert_different_registers(dst, ptr, tmp_reg, data->as_pointer_register()); - __ add(tmp_reg, dst, data->as_pointer_register()); - } - new_val = tmp_reg; - } else { - new_val = data->as_pointer_register(); - assert_different_registers(dst, ptr, new_val); - } - __ stlxr(Rtemp, new_val, ptr); -#else } else if (data->type() == T_LONG) { Register dst_lo = dest->as_register_lo(); Register new_val_lo = noreg; @@ -3590,7 +2864,6 @@ assert((new_val_lo->encoding() & 0x1) == 0, "misaligned register pair"); } __ strexd(Rtemp, new_val_lo, addr); -#endif // AARCH64 } else { ShouldNotReachHere(); } @@ -3598,11 +2871,6 @@ __ cbnz_32(Rtemp, retry); __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp); -#ifdef AARCH64 - if (UseCompressedOops && data->is_oop()) { - __ decode_heap_oop(dest->as_register()); - } -#endif // AARCH64 } #undef __ --- old/src/hotspot/cpu/arm/c1_LIRAssembler_arm.hpp 2018-09-17 10:29:40.696484170 -0400 +++ new/src/hotspot/cpu/arm/c1_LIRAssembler_arm.hpp 2018-09-17 10:29:40.067447430 -0400 @@ -44,9 +44,6 @@ Label* profile_cast_success, Label* profile_cast_failure, Label* success, Label* failure); -#ifdef AARCH64 - void long_compare_helper(LIR_Opr opr1, LIR_Opr opr2); -#endif // AARCH64 // Saves 4 given registers in reserved argument area. void save_in_reserved_area(Register r1, Register r2, Register r3, Register r4); @@ -55,10 +52,10 @@ void restore_from_reserved_area(Register r1, Register r2, Register r3, Register r4); enum { - _call_stub_size = AARCH64_ONLY(32) NOT_AARCH64(16), + _call_stub_size = 16, _call_aot_stub_size = 0, - _exception_handler_size = PRODUCT_ONLY(AARCH64_ONLY(256) NOT_AARCH64(68)) NOT_PRODUCT(AARCH64_ONLY(256+216) NOT_AARCH64(68+60)), - _deopt_handler_size = AARCH64_ONLY(32) NOT_AARCH64(16) + _exception_handler_size = PRODUCT_ONLY(68) NOT_PRODUCT(68+60), + _deopt_handler_size = 16 }; public: --- old/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp 2018-09-17 10:29:42.256575292 -0400 +++ new/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp 2018-09-17 10:29:41.623538317 -0400 @@ -118,19 +118,6 @@ bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { -#ifdef AARCH64 - if (v->type()->as_IntConstant() != NULL) { - return v->type()->as_IntConstant()->value() == 0; - } else if (v->type()->as_LongConstant() != NULL) { - return v->type()->as_LongConstant()->value() == 0; - } else if (v->type()->as_ObjectConstant() != NULL) { - return v->type()->as_ObjectConstant()->value()->is_null_object(); - } else if (v->type()->as_FloatConstant() != NULL) { - return jint_cast(v->type()->as_FloatConstant()->value()) == 0; - } else if (v->type()->as_DoubleConstant() != NULL) { - return jlong_cast(v->type()->as_DoubleConstant()->value()) == 0; - } -#endif // AARCH64 return false; } @@ -140,15 +127,10 @@ return Assembler::is_arith_imm_in_range(v->type()->as_IntConstant()->value()); } else if (v->type()->as_ObjectConstant() != NULL) { return v->type()->as_ObjectConstant()->value()->is_null_object(); -#ifdef AARCH64 - } else if (v->type()->as_LongConstant() != NULL) { - return Assembler::is_arith_imm_in_range(v->type()->as_LongConstant()->value()); -#else } else if (v->type()->as_FloatConstant() != NULL) { return v->type()->as_FloatConstant()->value() == 0.0f; } else if (v->type()->as_DoubleConstant() != NULL) { return v->type()->as_DoubleConstant()->value() == 0.0; -#endif // AARCH64 } return false; } @@ -160,39 +142,6 @@ } -#ifdef AARCH64 - -static bool can_inline_as_constant_in_cmp(Value v) { - jlong constant; - if (v->type()->as_IntConstant() != NULL) { - constant = v->type()->as_IntConstant()->value(); - } else if (v->type()->as_LongConstant() != NULL) { - constant = v->type()->as_LongConstant()->value(); - } else if (v->type()->as_ObjectConstant() != NULL) { - return v->type()->as_ObjectConstant()->value()->is_null_object(); - } else if (v->type()->as_FloatConstant() != NULL) { - return v->type()->as_FloatConstant()->value() == 0.0f; - } else if (v->type()->as_DoubleConstant() != NULL) { - return v->type()->as_DoubleConstant()->value() == 0.0; - } else { - return false; - } - - return Assembler::is_arith_imm_in_range(constant) || Assembler::is_arith_imm_in_range(-constant); -} - - -static bool can_inline_as_constant_in_logic(Value v) { - if (v->type()->as_IntConstant() != NULL) { - return Assembler::LogicalImmediate(v->type()->as_IntConstant()->value(), true).is_encoded(); - } else if (v->type()->as_LongConstant() != NULL) { - return Assembler::LogicalImmediate(v->type()->as_LongConstant()->value(), false).is_encoded(); - } - return false; -} - - -#endif // AARCH64 LIR_Opr LIRGenerator::safepoint_poll_register() { @@ -211,48 +160,10 @@ } } -#ifdef AARCH64 - -void LIRGenerator::add_constant(LIR_Opr src, jlong c, LIR_Opr dest) { - if (c == 0) { - __ move(src, dest); - return; - } - - BasicType type = src->type(); - bool is_neg = (c < 0); - c = ABS(c); - - if ((c >> 24) == 0) { - for (int shift = 0; shift <= 12; shift += 12) { - int part = ((int)c) & (right_n_bits(12) << shift); - if (part != 0) { - if (is_neg) { - __ sub(src, make_constant(type, part), dest); - } else { - __ add(src, make_constant(type, part), dest); - } - src = dest; - } - } - } else { - __ move(make_constant(type, c), dest); - if (is_neg) { - __ sub(src, dest, dest); - } else { - __ add(src, dest, dest); - } - } -} - -#endif // AARCH64 void LIRGenerator::add_large_constant(LIR_Opr src, int c, LIR_Opr dest) { assert(c != 0, "must be"); -#ifdef AARCH64 - add_constant(src, c, dest); -#else // Find first non-zero bit int shift = 0; while ((c & (3 << shift)) == 0) { @@ -272,7 +183,6 @@ if (c & (mask << 24)) { __ add(dest, LIR_OprFact::intConst(c & (mask << 24)), dest); } -#endif // AARCH64 } static LIR_Address* make_address(LIR_Opr base, LIR_Opr index, LIR_Address::Scale scale, BasicType type) { @@ -288,7 +198,6 @@ index = LIR_OprFact::illegalOpr; } -#ifndef AARCH64 if (base->type() == T_LONG) { LIR_Opr tmp = new_register(T_INT); __ convert(Bytecodes::_l2i, base, tmp); @@ -302,26 +211,11 @@ // At this point base and index should be all ints and not constants assert(base->is_single_cpu() && !base->is_constant(), "base should be an non-constant int"); assert(index->is_illegal() || (index->type() == T_INT && !index->is_constant()), "index should be an non-constant int"); -#endif int max_disp; bool disp_is_in_range; bool embedded_shift; -#ifdef AARCH64 - int align = exact_log2(type2aelembytes(type, true)); - assert((disp & right_n_bits(align)) == 0, "displacement is not aligned"); - assert(shift == 0 || shift == align, "shift should be zero or equal to embedded align"); - max_disp = (1 << 12) << align; - - if (disp >= 0) { - disp_is_in_range = Assembler::is_unsigned_imm_in_range(disp, 12, align); - } else { - disp_is_in_range = Assembler::is_imm_in_range(disp, 9, 0); - } - - embedded_shift = true; -#else switch (type) { case T_BYTE: case T_SHORT: @@ -344,7 +238,6 @@ } disp_is_in_range = (-max_disp < disp && disp < max_disp); -#endif // !AARCH64 if (index->is_register()) { LIR_Opr tmp = new_pointer_register(); @@ -394,11 +287,7 @@ LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { assert(type == T_LONG || type == T_INT, "should be"); LIR_Opr r = make_constant(type, x); -#ifdef AARCH64 - bool imm_in_range = Assembler::LogicalImmediate(x, type == T_INT).is_encoded(); -#else bool imm_in_range = AsmOperand::is_rotated_imm(x); -#endif // AARCH64 if (!imm_in_range) { LIR_Opr tmp = new_register(type); __ move(r, tmp); @@ -439,14 +328,9 @@ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { assert(left != result, "should be different registers"); if (is_power_of_2(c + 1)) { -#ifdef AARCH64 - __ shift_left(left, log2_intptr(c + 1), result); - __ sub(result, left, result); -#else LIR_Address::Scale scale = (LIR_Address::Scale) log2_intptr(c + 1); LIR_Address* addr = new LIR_Address(left, left, scale, 0, T_INT); __ sub(LIR_OprFact::address(addr), left, result); // rsb with shifted register -#endif // AARCH64 return true; } else if (is_power_of_2(c - 1)) { LIR_Address::Scale scale = (LIR_Address::Scale) log2_intptr(c - 1); @@ -465,12 +349,7 @@ void LIRGenerator::set_card(LIR_Opr value, LIR_Address* card_addr) { assert(CardTable::dirty_card_val() == 0, - "Cannot use ZR register (aarch64) or the register containing the card table base address directly (aarch32) otherwise"); -#ifdef AARCH64 - // AARCH64 has a register that is constant zero. We can use that one to set the - // value in the card table to dirty. - __ move(FrameMap::ZR_opr, card_addr); -#else // AARCH64 + "Cannot use the register containing the card table base address directly"); if((ci_card_table_address_as() & 0xff) == 0) { // If the card table base address is aligned to 256 bytes, we can use the register // that contains the card_table_base_address. @@ -481,7 +360,6 @@ __ move(LIR_OprFact::intConst(CardTable::dirty_card_val()), tmp_zero); __ move(tmp_zero, card_addr); } -#endif // AARCH64 } void LIRGenerator::CardTableBarrierSet_post_barrier_helper(LIR_OprDesc* addr, LIR_Const* card_table_base) { @@ -492,24 +370,16 @@ LIR_Opr tmp = FrameMap::LR_ptr_opr; - // TODO-AARCH64: check performance - bool load_card_table_base_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw()); + bool load_card_table_base_const = VM_Version::supports_movw(); if (load_card_table_base_const) { __ move((LIR_Opr)card_table_base, tmp); } else { __ move(new LIR_Address(FrameMap::Rthread_opr, in_bytes(JavaThread::card_table_base_offset()), T_ADDRESS), tmp); } -#ifdef AARCH64 - LIR_Address* shifted_reg_operand = new LIR_Address(tmp, addr, (LIR_Address::Scale) -CardTable::card_shift, 0, T_BYTE); - LIR_Opr tmp2 = tmp; - __ add(tmp, LIR_OprFact::address(shifted_reg_operand), tmp2); // tmp2 = tmp + (addr >> CardTable::card_shift) - LIR_Address* card_addr = new LIR_Address(tmp2, T_BYTE); -#else // Use unsigned type T_BOOLEAN here rather than (signed) T_BYTE since signed load // byte instruction does not support the addressing mode we need. LIR_Address* card_addr = new LIR_Address(tmp, addr, (LIR_Address::Scale) -CardTable::card_shift, 0, T_BOOLEAN); -#endif if (UseCondCardMark) { if (ct->scanned_concurrently()) { __ membar_storeload(); @@ -679,63 +549,6 @@ info = state_for(x); } -#ifdef AARCH64 - LIRItem left(x->x(), this); - LIRItem right(x->y(), this); - LIRItem* left_arg = &left; - LIRItem* right_arg = &right; - - // Test if instr is commutative and if we should swap - if (x->is_commutative() && left.is_constant()) { - left_arg = &right; - right_arg = &left; - } - - left_arg->load_item(); - switch (x->op()) { - case Bytecodes::_ldiv: - right_arg->load_item(); - make_div_by_zero_check(right_arg->result(), T_LONG, info); - __ idiv(left_arg->result(), right_arg->result(), rlock_result(x), LIR_OprFact::illegalOpr, NULL); - break; - - case Bytecodes::_lrem: { - right_arg->load_item(); - make_div_by_zero_check(right_arg->result(), T_LONG, info); - // a % b is implemented with 2 instructions: - // tmp = a/b (sdiv) - // res = a - b*tmp (msub) - LIR_Opr tmp = FrameMap::as_long_opr(Rtemp); - __ irem(left_arg->result(), right_arg->result(), rlock_result(x), tmp, NULL); - break; - } - - case Bytecodes::_lmul: - if (right_arg->is_constant() && is_power_of_2_long(right_arg->get_jlong_constant())) { - right_arg->dont_load_item(); - __ shift_left(left_arg->result(), exact_log2_long(right_arg->get_jlong_constant()), rlock_result(x)); - } else { - right_arg->load_item(); - __ mul(left_arg->result(), right_arg->result(), rlock_result(x)); - } - break; - - case Bytecodes::_ladd: - case Bytecodes::_lsub: - if (right_arg->is_constant()) { - jlong c = right_arg->get_jlong_constant(); - add_constant(left_arg->result(), (x->op() == Bytecodes::_ladd) ? c : -c, rlock_result(x)); - } else { - right_arg->load_item(); - arithmetic_op_long(x->op(), rlock_result(x), left_arg->result(), right_arg->result(), NULL); - } - break; - - default: - ShouldNotReachHere(); - return; - } -#else switch (x->op()) { case Bytecodes::_ldiv: case Bytecodes::_lrem: { @@ -777,7 +590,6 @@ default: ShouldNotReachHere(); } -#endif // AARCH64 } @@ -804,20 +616,6 @@ LIR_Opr result = rlock_result(x); __ idiv(left_arg->result(), right_arg->result(), result, tmp, info); } else { -#ifdef AARCH64 - left_arg->load_item(); - right_arg->load_item(); - make_div_by_zero_check(right_arg->result(), T_INT, info); - if (x->op() == Bytecodes::_idiv) { - __ idiv(left_arg->result(), right_arg->result(), rlock_result(x), LIR_OprFact::illegalOpr, NULL); - } else { - // a % b is implemented with 2 instructions: - // tmp = a/b (sdiv) - // res = a - b*tmp (msub) - LIR_Opr tmp = FrameMap::as_opr(Rtemp); - __ irem(left_arg->result(), right_arg->result(), rlock_result(x), tmp, NULL); - } -#else left_arg->load_item_force(FrameMap::R0_opr); right_arg->load_item_force(FrameMap::R2_opr); LIR_Opr tmp = FrameMap::R1_opr; @@ -831,16 +629,8 @@ __ idiv(left_arg->result(), right_arg->result(), out_reg, tmp, info); } __ move(out_reg, result); -#endif // AARCH64 } -#ifdef AARCH64 - } else if (((x->op() == Bytecodes::_iadd) || (x->op() == Bytecodes::_isub)) && right_arg->is_constant()) { - left_arg->load_item(); - jint c = right_arg->get_jint_constant(); - right_arg->dont_load_item(); - add_constant(left_arg->result(), (x->op() == Bytecodes::_iadd) ? c : -c, rlock_result(x)); -#endif // AARCH64 } else { left_arg->load_item(); @@ -852,7 +642,6 @@ right_arg->load_item(); } } else { - AARCH64_ONLY(assert(!right_arg->is_constant(), "constant right_arg is already handled by this moment");) right_arg->load_nonconstant(); } rlock_result(x); @@ -880,11 +669,9 @@ LIRItem value(x->x(), this); LIRItem count(x->y(), this); -#ifndef AARCH64 if (value.type()->is_long()) { count.set_destroys_register(); } -#endif // !AARCH64 if (count.is_constant()) { assert(count.type()->as_IntConstant() != NULL, "should be"); @@ -906,15 +693,7 @@ left.load_item(); -#ifdef AARCH64 - if (right.is_constant() && can_inline_as_constant_in_logic(right.value())) { - right.dont_load_item(); - } else { - right.load_item(); - } -#else right.load_nonconstant(); -#endif // AARCH64 logic_op(x->op(), rlock_result(x), left.result(), right.result()); } @@ -956,15 +735,7 @@ LIRItem right(x->y(), this); left.load_item(); -#ifdef AARCH64 - if (right.is_constant() && can_inline_as_constant_in_cmp(right.value())) { - right.dont_load_item(); - } else { - right.load_item(); - } -#else right.load_nonconstant(); -#endif // AARCH64 LIR_Opr reg = rlock_result(x); @@ -987,19 +758,11 @@ cmp_value.load_item(); LIR_Opr result = new_register(T_INT); if (type == T_OBJECT || type == T_ARRAY) { -#ifdef AARCH64 - if (UseCompressedOops) { - tmp1 = new_pointer_register(); - tmp2 = new_pointer_register(); - } -#endif __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result); } else if (type == T_INT) { __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), tmp1, tmp1, result); } else if (type == T_LONG) { -#ifndef AARCH64 tmp1 = new_register(T_LONG); -#endif // !AARCH64 __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), tmp1, tmp2, result); } else { ShouldNotReachHere(); @@ -1135,7 +898,6 @@ void LIRGenerator::do_Convert(Convert* x) { address runtime_func; switch (x->op()) { -#ifndef AARCH64 case Bytecodes::_l2f: runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::l2f); break; @@ -1170,7 +932,6 @@ runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::d2i); break; #endif // __SOFTFP__ -#endif // !AARCH64 default: { LIRItem value(x->value(), this); value.load_item(); @@ -1488,7 +1249,6 @@ LIRItem* yin = &yitem; If::Condition cond = x->cond(); -#ifndef AARCH64 if (tag == longTag) { if (cond == If::gtr || cond == If::leq) { cond = Instruction::mirror(cond); @@ -1497,20 +1257,11 @@ } xin->set_destroys_register(); } -#endif // !AARCH64 xin->load_item(); LIR_Opr left = xin->result(); LIR_Opr right; -#ifdef AARCH64 - if (yin->is_constant() && can_inline_as_constant_in_cmp(yin->value())) { - yin->dont_load_item(); - } else { - yin->load_item(); - } - right = yin->result(); -#else if (tag == longTag && yin->is_constant() && yin->get_jlong_constant() == 0 && (cond == If::eql || cond == If::neq)) { // inline long zero @@ -1519,7 +1270,6 @@ yin->load_nonconstant(); right = yin->result(); } -#endif // AARCH64 set_no_result(x); @@ -1558,7 +1308,6 @@ void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, CodeEmitInfo* info) { -#ifndef AARCH64 if (value->is_double_cpu()) { assert(address->index()->is_illegal(), "should have a constant displacement"); LIR_Opr tmp = new_pointer_register(); @@ -1566,14 +1315,11 @@ __ volatile_store_mem_reg(value, new LIR_Address(tmp, (intx)0, address->type()), info); return; } -#endif // !AARCH64 - // TODO-AARCH64 implement with stlr instruction __ store(value, address, info, lir_patch_none); } void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, CodeEmitInfo* info) { -#ifndef AARCH64 if (result->is_double_cpu()) { assert(address->index()->is_illegal(), "should have a constant displacement"); LIR_Opr tmp = new_pointer_register(); @@ -1581,7 +1327,5 @@ __ volatile_load_mem_reg(new LIR_Address(tmp, (intx)0, address->type()), result, info); return; } -#endif // !AARCH64 - // TODO-AARCH64 implement with ldar instruction __ load(address, result, info, lir_patch_none); } --- old/src/hotspot/cpu/arm/c1_LIRGenerator_arm.hpp 2018-09-17 10:29:43.838667698 -0400 +++ new/src/hotspot/cpu/arm/c1_LIRGenerator_arm.hpp 2018-09-17 10:29:43.202630549 -0400 @@ -27,7 +27,3 @@ void make_div_by_zero_check(LIR_Opr right_arg, BasicType type, CodeEmitInfo* info); -#ifdef AARCH64 - // the helper for arithmetic - void add_constant(LIR_Opr src, jlong c, LIR_Opr dest); -#endif // AARCH64 --- old/src/hotspot/cpu/arm/c1_LIR_arm.cpp 2018-09-17 10:29:45.402759053 -0400 +++ new/src/hotspot/cpu/arm/c1_LIR_arm.cpp 2018-09-17 10:29:44.766721904 -0400 @@ -33,17 +33,6 @@ return as_FloatRegister(fpu_regnrLo()); } -#ifdef AARCH64 -// Reg2 unused. -LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { - assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); - return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | - (reg1 << LIR_OprDesc::reg2_shift) | - LIR_OprDesc::double_type | - LIR_OprDesc::fpu_register | - LIR_OprDesc::double_size); -} -#else LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { assert(as_FloatRegister(reg2) != fnoreg, "Arm32 holds double in two regs."); return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | @@ -52,22 +41,12 @@ LIR_OprDesc::fpu_register | LIR_OprDesc::double_size); } -#endif #ifndef PRODUCT void LIR_Address::verify() const { #ifdef _LP64 assert(base()->is_cpu_register(), "wrong base operand"); #endif -#ifdef AARCH64 - if (base()->type() == T_INT) { - assert(index()->is_single_cpu() && (index()->type() == T_INT), "wrong index operand"); - } else { - assert(index()->is_illegal() || index()->is_double_cpu() || - (index()->is_single_cpu() && (index()->is_oop_register() || index()->type() == T_INT)), "wrong index operand"); - assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, "wrong type for addresses"); - } -#else assert(disp() == 0 || index()->is_illegal(), "can't have both"); // Note: offsets higher than 4096 must not be rejected here. They can // be handled by the back-end or will be rejected if not. @@ -81,6 +60,5 @@ assert(base()->type() == T_OBJECT || base()->type() == T_INT || base()->type() == T_METADATA, "wrong type for addresses"); #endif -#endif // AARCH64 } #endif // PRODUCT --- old/src/hotspot/cpu/arm/c1_LinearScan_arm.hpp 2018-09-17 10:29:46.986851576 -0400 +++ new/src/hotspot/cpu/arm/c1_LinearScan_arm.hpp 2018-09-17 10:29:46.341813901 -0400 @@ -31,24 +31,17 @@ } inline int LinearScan::num_physical_regs(BasicType type) { -#ifndef AARCH64 if (type == T_LONG || type == T_DOUBLE) return 2; -#endif // !AARCH64 return 1; } inline bool LinearScan::requires_adjacent_regs(BasicType type) { -#ifdef AARCH64 - return false; -#else return type == T_DOUBLE || type == T_LONG; -#endif // AARCH64 } inline bool LinearScan::is_caller_save(int assigned_reg) { assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); - // TODO-AARCH64 try to add callee-saved registers return true; } --- old/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp 2018-09-17 10:29:48.567943924 -0400 +++ new/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp 2018-09-17 10:29:47.928906599 -0400 @@ -46,11 +46,7 @@ load_klass(Rtemp, receiver); cmp(Rtemp, iCache); b(verified, eq); // jump over alignment no-ops -#ifdef AARCH64 - jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp); -#else jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); -#endif align(CodeEntryAlignment); bind(verified); } @@ -59,10 +55,6 @@ assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); assert((frame_size_in_bytes % StackAlignmentInBytes) == 0, "frame size should be aligned"); -#ifdef AARCH64 - // Extra nop for MT-safe patching in NativeJump::patch_verified_entry - nop(); -#endif // AARCH64 arm_stack_overflow_check(bang_size_in_bytes, Rtemp); @@ -104,28 +96,12 @@ mov(tmp, (intptr_t)markOopDesc::prototype()); } -#ifdef AARCH64 - if (UseCompressedClassPointers) { - str(tmp, Address(obj, oopDesc::mark_offset_in_bytes())); - encode_klass_not_null(tmp, klass); // Take care not to kill klass - str_w(tmp, Address(obj, oopDesc::klass_offset_in_bytes())); - } else { - assert(oopDesc::mark_offset_in_bytes() + wordSize == oopDesc::klass_offset_in_bytes(), "adjust this code"); - stp(tmp, klass, Address(obj, oopDesc::mark_offset_in_bytes())); - } -#else str(tmp, Address(obj, oopDesc::mark_offset_in_bytes())); str(klass, Address(obj, oopDesc::klass_offset_in_bytes())); -#endif // AARCH64 if (len->is_valid()) { str_32(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); } -#ifdef AARCH64 - else if (UseCompressedClassPointers) { - store_klass_gap(obj); - } -#endif // AARCH64 } @@ -146,40 +122,6 @@ const Register ptr = tmp2; if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { -#ifdef AARCH64 - if (obj_size_in_bytes < 0) { - add_rc(ptr, obj, header_size); - initialize_body(ptr, obj_end, tmp1); - - } else { - int base = instanceOopDesc::header_size() * HeapWordSize; - assert(obj_size_in_bytes >= base, "should be"); - - const int zero_bytes = obj_size_in_bytes - base; - assert((zero_bytes % wordSize) == 0, "should be"); - - if ((zero_bytes % (2*wordSize)) != 0) { - str(ZR, Address(obj, base)); - base += wordSize; - } - - const int stp_count = zero_bytes / (2*wordSize); - - if (zero_bytes > 8 * wordSize) { - Label loop; - add(ptr, obj, base); - mov(tmp1, stp_count); - bind(loop); - subs(tmp1, tmp1, 1); - stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); - b(loop, gt); - } else { - for (int i = 0; i < stp_count; i++) { - stp(ZR, ZR, Address(obj, base + i * 2 * wordSize)); - } - } - } -#else if (obj_size_in_bytes >= 0 && obj_size_in_bytes <= 8 * BytesPerWord) { mov(tmp1, 0); const int base = instanceOopDesc::header_size() * HeapWordSize; @@ -191,7 +133,6 @@ add(ptr, obj, header_size); initialize_body(ptr, obj_end, tmp1); } -#endif // AARCH64 } // StoreStore barrier required after complete initialization @@ -228,12 +169,7 @@ const int scale_shift = exact_log2(element_size); const Register obj_size = Rtemp; // Rtemp should be free at c1 LIR level -#ifdef AARCH64 - mov_slow(Rtemp, max_array_allocation_length); - cmp_32(len, Rtemp); -#else cmp_32(len, max_array_allocation_length); -#endif // AARCH64 b(slow_case, hs); bool align_header = ((header_size_in_bytes | element_size) & MinObjAlignmentInBytesMask) != 0; @@ -272,34 +208,6 @@ assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions"); -#ifdef AARCH64 - - str(obj, Address(disp_hdr, obj_offset)); - - if (!UseBiasedLocking) { - null_check_offset = offset(); - } - ldr(hdr, obj); - - // Test if object is already locked - assert(markOopDesc::unlocked_value == 1, "adjust this code"); - tbnz(hdr, exact_log2(markOopDesc::unlocked_value), fast_lock); - - // Check for recursive locking - // See comments in InterpreterMacroAssembler::lock_object for - // explanations on the fast recursive locking check. - intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); - Assembler::LogicalImmediate imm(mask, false); - mov(tmp2, SP); - sub(tmp2, hdr, tmp2); - ands(tmp2, tmp2, imm); - b(slow_case, ne); - - // Recursive locking: store 0 into a lock record - str(ZR, Address(disp_hdr, mark_offset)); - b(fast_lock_done); - -#else // AARCH64 if (!UseBiasedLocking) { null_check_offset = offset(); @@ -329,7 +237,6 @@ // else need slow case b(slow_case); -#endif // AARCH64 bind(fast_lock); // Save previous object header in BasicLock structure and update the header --- old/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp 2018-09-17 10:29:50.165037206 -0400 +++ new/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp 2018-09-17 10:29:49.507998829 -0400 @@ -80,15 +80,8 @@ // Runtime1::exception_handler_for_pc if (_stub_id != Runtime1::forward_exception_id) { assert(frame_size() != no_frame_size, "cannot directly call forward_exception_id"); -#ifdef AARCH64 - Label skip; - cbz(R3, skip); - jump(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type, Rtemp); - bind(skip); -#else cmp(R3, 0); jump(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type, Rtemp, ne); -#endif // AARCH64 } else { #ifdef ASSERT // Should not have pending exception in forward_exception stub @@ -124,43 +117,6 @@ #define __ sasm-> // TODO: ARM - does this duplicate RegisterSaver in SharedRuntime? -#ifdef AARCH64 - - // - // On AArch64 registers save area has the following layout: - // - // |---------------------| - // | return address (LR) | - // | FP | - // |---------------------| - // | D31 | - // | ... | - // | D0 | - // |---------------------| - // | padding | - // |---------------------| - // | R28 | - // | ... | - // | R0 | - // |---------------------| <-- SP - // - -enum RegisterLayout { - number_of_saved_gprs = 29, - number_of_saved_fprs = FloatRegisterImpl::number_of_registers, - - R0_offset = 0, - D0_offset = R0_offset + number_of_saved_gprs + 1, - FP_offset = D0_offset + number_of_saved_fprs, - LR_offset = FP_offset + 1, - - reg_save_size = LR_offset + 1, - - arg1_offset = reg_save_size * wordSize, - arg2_offset = (reg_save_size + 1) * wordSize -}; - -#else enum RegisterLayout { fpu_save_size = pd_nof_fpu_regs_reg_alloc, @@ -191,7 +147,6 @@ arg2_offset = (reg_save_size + 1) * wordSize }; -#endif // AARCH64 static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers = HaveVFP) { sasm->set_frame_size(reg_save_size /* in words */); @@ -200,19 +155,6 @@ // Locations are offsets from sp after runtime call. OopMap* map = new OopMap(VMRegImpl::slots_per_word * reg_save_size, 0); -#ifdef AARCH64 - for (int i = 0; i < number_of_saved_gprs; i++) { - map->set_callee_saved(VMRegImpl::stack2reg((R0_offset + i) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg()); - } - map->set_callee_saved(VMRegImpl::stack2reg(FP_offset * VMRegImpl::slots_per_word), FP->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(LR_offset * VMRegImpl::slots_per_word), LR->as_VMReg()); - - if (save_fpu_registers) { - for (int i = 0; i < number_of_saved_fprs; i++) { - map->set_callee_saved(VMRegImpl::stack2reg((D0_offset + i) * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg()); - } - } -#else int j=0; for (int i = R0_offset; i < R10_offset; i++) { if (j == FP_REG_NUM) { @@ -235,7 +177,6 @@ map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg()); } } -#endif // AARCH64 return map; } @@ -244,29 +185,6 @@ __ block_comment("save_live_registers"); sasm->set_frame_size(reg_save_size /* in words */); -#ifdef AARCH64 - assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned"); - - __ raw_push(FP, LR); - - __ sub(SP, SP, (reg_save_size - 2) * wordSize); - - for (int i = 0; i < align_down((int)number_of_saved_gprs, 2); i += 2) { - __ stp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize)); - } - - if (is_odd(number_of_saved_gprs)) { - int i = number_of_saved_gprs - 1; - __ str(as_Register(i), Address(SP, (R0_offset + i) * wordSize)); - } - - if (save_fpu_registers) { - assert (is_even(number_of_saved_fprs), "adjust this code"); - for (int i = 0; i < number_of_saved_fprs; i += 2) { - __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), Address(SP, (D0_offset + i) * wordSize)); - } - } -#else __ push(RegisterSet(FP) | RegisterSet(LR)); __ push(RegisterSet(R0, R6) | RegisterSet(R8, R10) | R12 | altFP_7_11); if (save_fpu_registers) { @@ -274,7 +192,6 @@ } else { __ sub(SP, SP, fpu_save_size * wordSize); } -#endif // AARCH64 return generate_oop_map(sasm, save_fpu_registers); } @@ -287,34 +204,6 @@ bool restore_fpu_registers = HaveVFP) { __ block_comment("restore_live_registers"); -#ifdef AARCH64 - if (restore_R0) { - __ ldr(R0, Address(SP, R0_offset * wordSize)); - } - - assert(is_odd(number_of_saved_gprs), "adjust this code"); - for (int i = 1; i < number_of_saved_gprs; i += 2) { - __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize)); - } - - if (restore_fpu_registers) { - assert (is_even(number_of_saved_fprs), "adjust this code"); - for (int i = 0; i < number_of_saved_fprs; i += 2) { - __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), Address(SP, (D0_offset + i) * wordSize)); - } - } - - __ add(SP, SP, (reg_save_size - 2) * wordSize); - - if (restore_FP_LR) { - __ raw_pop(FP, LR); - if (do_return) { - __ ret(); - } - } else { - assert (!do_return, "return without restoring FP/LR"); - } -#else if (restore_fpu_registers) { __ fldmiad(SP, FloatRegisterSet(D0, fpu_save_size / 2), writeback); if (!restore_R0) { @@ -329,7 +218,6 @@ } else { assert (!do_return, "return without restoring FP/LR"); } -#endif // AARCH64 } @@ -341,11 +229,9 @@ restore_live_registers(sasm, true, true, true, restore_fpu_registers); } -#ifndef AARCH64 static void restore_live_registers_except_FP_LR(StubAssembler* sasm, bool restore_fpu_registers = HaveVFP) { restore_live_registers(sasm, true, false, false, restore_fpu_registers); } -#endif // !AARCH64 static void restore_live_registers_without_return(StubAssembler* sasm, bool restore_fpu_registers = HaveVFP) { restore_live_registers(sasm, true, true, false, restore_fpu_registers); @@ -386,15 +272,8 @@ static void restore_sp_for_method_handle(StubAssembler* sasm) { // Restore SP from its saved reg (FP) if the exception PC is a MethodHandle call site. __ ldr_s32(Rtemp, Address(Rthread, JavaThread::is_method_handle_return_offset())); -#ifdef AARCH64 - Label skip; - __ cbz(Rtemp, skip); - __ mov(SP, Rmh_SP_save); - __ bind(skip); -#else __ cmp(Rtemp, 0); __ mov(SP, Rmh_SP_save, ne); -#endif // AARCH64 } @@ -500,22 +379,12 @@ __ cmp_32(R0, 0); -#ifdef AARCH64 - Label call_deopt; - - restore_live_registers_without_return(sasm); - __ b(call_deopt, ne); - __ ret(); - - __ bind(call_deopt); -#else restore_live_registers_except_FP_LR(sasm); __ pop(RegisterSet(FP) | RegisterSet(PC), eq); // Deoptimization needed // TODO: ARM - no need to restore FP & LR because unpack_with_reexecution() stores them back __ pop(RegisterSet(FP) | RegisterSet(LR)); -#endif // AARCH64 __ jump(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type, Rtemp); @@ -623,12 +492,7 @@ if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { Label slow_case, slow_case_no_pop; -#ifdef AARCH64 - __ mov_slow(Rtemp, C1_MacroAssembler::max_array_allocation_length); - __ cmp_32(length, Rtemp); -#else __ cmp_32(length, C1_MacroAssembler::max_array_allocation_length); -#endif // AARCH64 __ b(slow_case_no_pop, hs); // Free some temporary registers @@ -645,12 +509,7 @@ __ mov(arr_size, MinObjAlignmentInBytesMask); __ and_32(tmp2, tmp1, (unsigned int)(Klass::_lh_header_size_mask << Klass::_lh_header_size_shift)); -#ifdef AARCH64 - __ lslv_w(tmp3, length, tmp1); - __ add(arr_size, arr_size, tmp3); -#else __ add(arr_size, arr_size, AsmOperand(length, lsl, tmp1)); -#endif // AARCH64 __ add(arr_size, arr_size, AsmOperand(tmp2, lsr, Klass::_lh_header_size_shift)); __ align_reg(arr_size, arr_size, MinObjAlignmentInBytes); @@ -716,15 +575,8 @@ __ load_klass(Rtemp, R0); __ ldr_u32(Rtemp, Address(Rtemp, Klass::access_flags_offset())); -#ifdef AARCH64 - Label L; - __ tbnz(Rtemp, exact_log2(JVM_ACC_HAS_FINALIZER), L); - __ ret(); - __ bind(L); -#else __ tst(Rtemp, JVM_ACC_HAS_FINALIZER); __ bx(LR, eq); -#endif // AARCH64 // Call VM OopMap* map = save_live_registers(sasm); @@ -746,9 +598,6 @@ case throw_index_exception_id: { __ set_info("index_range_check_failed", dont_gc_arguments); -#ifdef AARCH64 - __ NOT_TESTED(); -#endif oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); } break; @@ -806,9 +655,6 @@ case throw_incompatible_class_change_error_id: { __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments); -#ifdef AARCH64 - __ NOT_TESTED(); -#endif oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); } break; @@ -892,7 +738,7 @@ restore_live_registers_without_return(sasm); DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); assert(deopt_blob != NULL, "deoptimization blob must have been created"); - __ jump(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type, AARCH64_ONLY(Rtemp) NOT_AARCH64(noreg)); + __ jump(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type, noreg); } break; --- old/src/hotspot/cpu/arm/c2_globals_arm.hpp 2018-09-17 10:29:51.752129905 -0400 +++ new/src/hotspot/cpu/arm/c2_globals_arm.hpp 2018-09-17 10:29:51.114092638 -0400 @@ -39,27 +39,15 @@ define_pd_global(bool, ProfileTraps, true); define_pd_global(bool, UseOnStackReplacement, true); define_pd_global(bool, ProfileInterpreter, true); -#ifdef AARCH64 -define_pd_global(bool, TieredCompilation, trueInTiered); -#else define_pd_global(bool, TieredCompilation, false); -#endif define_pd_global(intx, CompileThreshold, 10000); define_pd_global(intx, OnStackReplacePercentage, 140); define_pd_global(intx, ConditionalMoveLimit, 4); // C2 gets to use all the float/double registers -#ifdef AARCH64 -define_pd_global(intx, FLOATPRESSURE, 31); -#else define_pd_global(intx, FLOATPRESSURE, 30); -#endif define_pd_global(intx, FreqInlineSize, 175); -#ifdef AARCH64 -define_pd_global(intx, INTPRESSURE, 27); -#else define_pd_global(intx, INTPRESSURE, 12); -#endif define_pd_global(intx, InteriorEntryAlignment, 16); // = CodeEntryAlignment define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); // The default setting 16/16 seems to work best. --- old/src/hotspot/cpu/arm/frame_arm.cpp 2018-09-17 10:29:53.324221727 -0400 +++ new/src/hotspot/cpu/arm/frame_arm.cpp 2018-09-17 10:29:52.687184519 -0400 @@ -304,26 +304,12 @@ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; } -#ifdef AARCH64 - -// Used by template based interpreter deoptimization -void frame::interpreter_frame_set_stack_top(intptr_t* stack_top) { - *((intptr_t**)addr_at(interpreter_frame_stack_top_offset)) = stack_top; -} - -// Used by template based interpreter deoptimization -void frame::interpreter_frame_set_extended_sp(intptr_t* sp) { - *((intptr_t**)addr_at(interpreter_frame_extended_sp_offset)) = sp; -} - -#else // Used by template based interpreter deoptimization void frame::interpreter_frame_set_last_sp(intptr_t* sp) { *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; } -#endif // AARCH64 frame frame::sender_for_entry_frame(RegisterMap* map) const { assert(map != NULL, "map must be set"); @@ -334,18 +320,12 @@ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); map->clear(); assert(map->include_argument_oops(), "should be set by clear"); -#ifdef AARCH64 - assert (jfa->last_Java_pc() != NULL, "pc should be stored"); - frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); - return fr; -#else if (jfa->last_Java_pc() != NULL) { frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); return fr; } frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); return fr; -#endif // AARCH64 } //------------------------------------------------------------------------------ @@ -403,10 +383,6 @@ void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { // see x86 for comments map->set_location(FP->as_VMReg(), (address) link_addr); -#ifdef AARCH64 - // also adjust a high part of register - map->set_location(FP->as_VMReg()->next(), (address) link_addr); -#endif // AARCH64 } frame frame::sender_for_interpreter_frame(RegisterMap* map) const { @@ -539,14 +515,6 @@ if (method->is_native()) { // Prior to calling into the runtime to report the method_exit both of // the possible return value registers are saved. -#ifdef AARCH64 - // Return value registers are saved into the frame - if (type == T_FLOAT || type == T_DOUBLE) { - res_addr = addr_at(interpreter_frame_fp_saved_result_offset); - } else { - res_addr = addr_at(interpreter_frame_gp_saved_result_offset); - } -#else // Return value registers are pushed to the native stack res_addr = (intptr_t*)sp(); #ifdef __ABI_HARD__ @@ -555,7 +523,6 @@ res_addr += 2; } #endif // __ABI_HARD__ -#endif // AARCH64 } else { res_addr = (intptr_t*)interpreter_frame_tos_address(); } @@ -602,12 +569,7 @@ void frame::describe_pd(FrameValues& values, int frame_no) { if (is_interpreted_frame()) { DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); -#ifdef AARCH64 - DESCRIBE_FP_OFFSET(interpreter_frame_stack_top); - DESCRIBE_FP_OFFSET(interpreter_frame_extended_sp); -#else DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); -#endif // AARCH64 DESCRIBE_FP_OFFSET(interpreter_frame_method); DESCRIBE_FP_OFFSET(interpreter_frame_mdp); DESCRIBE_FP_OFFSET(interpreter_frame_cache); @@ -631,7 +593,6 @@ } intptr_t* frame::real_fp() const { -#ifndef AARCH64 if (is_entry_frame()) { // Work-around: FP (currently) does not conform to the ABI for entry // frames (see generate_call_stub). Might be worth fixing as another CR. @@ -644,7 +605,6 @@ #endif return new_fp; } -#endif // !AARCH64 if (_cb != NULL) { // use the frame size if valid int size = _cb->frame_size(); --- old/src/hotspot/cpu/arm/frame_arm.hpp 2018-09-17 10:29:54.895313490 -0400 +++ new/src/hotspot/cpu/arm/frame_arm.hpp 2018-09-17 10:29:54.257276224 -0400 @@ -37,22 +37,12 @@ sender_sp_offset = 2, // Interpreter frames -#ifdef AARCH64 - interpreter_frame_gp_saved_result_offset = 4, // for native calls only - interpreter_frame_fp_saved_result_offset = 3, // for native calls only -#endif interpreter_frame_oop_temp_offset = 2, // for native calls only interpreter_frame_sender_sp_offset = -1, -#ifdef AARCH64 - interpreter_frame_stack_top_offset = interpreter_frame_sender_sp_offset - 1, - interpreter_frame_extended_sp_offset = interpreter_frame_stack_top_offset - 1, - interpreter_frame_method_offset = interpreter_frame_extended_sp_offset - 1, -#else // outgoing sp before a call to an invoked method interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, -#endif // AARCH64 interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1, interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1, interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1, @@ -64,7 +54,7 @@ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, // Entry frames - entry_frame_call_wrapper_offset = AARCH64_ONLY(2) NOT_AARCH64(0) + entry_frame_call_wrapper_offset = 0 }; intptr_t ptr_at(int offset) const { @@ -107,9 +97,7 @@ frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); -#ifndef AARCH64 frame(intptr_t* sp, intptr_t* fp); -#endif // !AARCH64 void init(intptr_t* sp, intptr_t* fp, address pc); @@ -119,18 +107,11 @@ inline address* sender_pc_addr() const; -#ifdef AARCH64 - // Used by template based interpreter deoptimization - void interpreter_frame_set_stack_top(intptr_t* stack_top); - void interpreter_frame_set_extended_sp(intptr_t* sp); - -#else // expression stack tos if we are nested in a java call intptr_t* interpreter_frame_last_sp() const; // deoptimization support void interpreter_frame_set_last_sp(intptr_t* sp); -#endif // AARCH64 // helper to update a map with callee-saved FP static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); --- old/src/hotspot/cpu/arm/frame_arm.inline.hpp 2018-09-17 10:29:56.465405196 -0400 +++ new/src/hotspot/cpu/arm/frame_arm.inline.hpp 2018-09-17 10:29:55.832368222 -0400 @@ -83,7 +83,6 @@ } } -#ifndef AARCH64 inline frame::frame(intptr_t* sp, intptr_t* fp) { _sp = sp; @@ -104,7 +103,6 @@ } } -#endif // !AARCH64 // Accessors @@ -148,11 +146,9 @@ return (intptr_t**)addr_at(interpreter_frame_locals_offset); } -#ifndef AARCH64 inline intptr_t* frame::interpreter_frame_last_sp() const { return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); } -#endif // !AARCH64 inline intptr_t* frame::interpreter_frame_bcp_addr() const { return (intptr_t*)addr_at(interpreter_frame_bcp_offset); @@ -181,12 +177,6 @@ // top of expression stack inline intptr_t* frame::interpreter_frame_tos_address() const { -#ifdef AARCH64 - intptr_t* stack_top = (intptr_t*)*addr_at(interpreter_frame_stack_top_offset); - assert(stack_top != NULL, "should be stored before call"); - assert(stack_top <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); - return stack_top; -#else intptr_t* last_sp = interpreter_frame_last_sp(); if (last_sp == NULL ) { return sp(); @@ -197,7 +187,6 @@ assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); return last_sp; } -#endif // AARCH64 } inline oop* frame::interpreter_frame_temp_oop_addr() const { --- old/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp 2018-09-17 10:29:58.039497135 -0400 +++ new/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp 2018-09-17 10:29:57.403459985 -0400 @@ -60,27 +60,16 @@ BLOCK_COMMENT("PreBarrier"); -#ifdef AARCH64 - callee_saved_regs = align_up(callee_saved_regs, 2); - for (int i = 0; i < callee_saved_regs; i += 2) { - __ raw_push(as_Register(i), as_Register(i+1)); - } -#else RegisterSet saved_regs = RegisterSet(R0, as_Register(callee_saved_regs-1)); __ push(saved_regs | R9ifScratched); -#endif // AARCH64 if (addr != R0) { assert_different_registers(count, R0); __ mov(R0, addr); } -#ifdef AARCH64 - __ zero_extend(R1, count, 32); // G1BarrierSetRuntime::write_ref_array_pre_*_entry takes size_t -#else if (count != R1) { __ mov(R1, count); } -#endif // AARCH64 if (UseCompressedOops) { __ call(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry)); @@ -88,13 +77,7 @@ __ call(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry)); } -#ifdef AARCH64 - for (int i = callee_saved_regs - 2; i >= 0; i -= 2) { - __ raw_pop(as_Register(i), as_Register(i+1)); - } -#else __ pop(saved_regs | R9ifScratched); -#endif // AARCH64 } } @@ -106,9 +89,6 @@ assert_different_registers(count, R0); __ mov(R0, addr); } -#ifdef AARCH64 - __ zero_extend(R1, count, 32); // G1BarrierSetRuntime::write_ref_array_post_entry takes size_t -#else if (count != R1) { __ mov(R1, count); } @@ -120,17 +100,14 @@ // difficult for this particular call site. __ push(R9); #endif // !R9_IS_SCRATCHED -#endif // !AARCH64 __ call(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry)); -#ifndef AARCH64 #if R9_IS_SCRATCHED __ pop(R9); #endif // !R9_IS_SCRATCHED -#endif // !AARCH64 } // G1 pre-barrier. -// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +// Blows all volatile registers R0-R3, Rtemp, LR). // If store_addr != noreg, then previous value is loaded from [store_addr]; // in such case store_addr and new_val registers are preserved; // otherwise pre_val register is preserved. @@ -186,20 +163,12 @@ __ bind(runtime); // save the live input values -#ifdef AARCH64 - if (store_addr != noreg) { - __ raw_push(store_addr, new_val); - } else { - __ raw_push(pre_val, ZR); - } -#else if (store_addr != noreg) { // avoid raw_push to support any ordering of store_addr and new_val __ push(RegisterSet(store_addr) | RegisterSet(new_val)); } else { __ push(pre_val); } -#endif // AARCH64 if (pre_val != R0) { __ mov(R0, pre_val); @@ -208,25 +177,17 @@ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), R0, R1); -#ifdef AARCH64 - if (store_addr != noreg) { - __ raw_pop(store_addr, new_val); - } else { - __ raw_pop(pre_val, ZR); - } -#else if (store_addr != noreg) { __ pop(RegisterSet(store_addr) | RegisterSet(new_val)); } else { __ pop(pre_val); } -#endif // AARCH64 __ bind(done); } // G1 post-barrier. -// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +// Blows all volatile registers R0-R3, Rtemp, LR). void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, @@ -246,13 +207,8 @@ // Does store cross heap regions? __ eor(tmp1, store_addr, new_val); -#ifdef AARCH64 - __ logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes); - __ cbz(tmp1, done); -#else __ movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes)); __ b(done, eq); -#endif // crosses regions, storing NULL? @@ -333,12 +289,8 @@ const Register store_addr = obj.base(); if (obj.index() != noreg) { assert (obj.disp() == 0, "index or displacement, not both"); -#ifdef AARCH64 - __ add(store_addr, obj.base(), obj.index(), obj.extend(), obj.shift_imm()); -#else assert(obj.offset_op() == add_offset, "addition is expected"); __ add(store_addr, obj.base(), AsmOperand(obj.index(), obj.shift(), obj.shift_imm())); -#endif // AARCH64 } else if (obj.disp() != 0) { __ add(store_addr, obj.base(), obj.disp()); } @@ -415,16 +367,10 @@ __ set_info("g1_pre_barrier_slow_id", false); // save at least the registers that need saving if the runtime is called -#ifdef AARCH64 - __ raw_push(R0, R1); - __ raw_push(R2, R3); - const int nb_saved_regs = 4; -#else // AARCH64 const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR); const int nb_saved_regs = 6; assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs"); __ push(saved_regs); -#endif // AARCH64 const Register r_pre_val_0 = R0; // must be R0, to be ready for the runtime call const Register r_index_1 = R1; @@ -454,12 +400,7 @@ __ bind(done); -#ifdef AARCH64 - __ raw_pop(R2, R3); - __ raw_pop(R0, R1); -#else // AARCH64 __ pop(saved_regs); -#endif // AARCH64 __ ret(); @@ -492,16 +433,10 @@ AddressLiteral cardtable(ci_card_table_address_as
(), relocInfo::none); // save at least the registers that need saving if the runtime is called -#ifdef AARCH64 - __ raw_push(R0, R1); - __ raw_push(R2, R3); - const int nb_saved_regs = 4; -#else // AARCH64 const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR); const int nb_saved_regs = 6; assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs"); __ push(saved_regs); -#endif // AARCH64 const Register r_card_addr_0 = R0; // must be R0 for the slow case const Register r_obj_0 = R0; @@ -528,12 +463,7 @@ __ bind(done); -#ifdef AARCH64 - __ raw_pop(R2, R3); - __ raw_pop(R0, R1); -#else // AARCH64 __ pop(saved_regs); -#endif // AARCH64 __ ret(); --- old/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp 2018-09-17 10:29:59.624589716 -0400 +++ new/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp 2018-09-17 10:29:58.974551749 -0400 @@ -35,12 +35,6 @@ case T_OBJECT: case T_ARRAY: { if (in_heap) { -#ifdef AARCH64 - if (UseCompressedOops) { - __ ldr_w(dst, src); - __ decode_heap_oop(dst); - } else -#endif // AARCH64 { __ ldr(dst, src); } @@ -57,13 +51,9 @@ case T_INT: __ ldr_s32 (dst, src); break; case T_ADDRESS: __ ldr (dst, src); break; case T_LONG: -#ifdef AARCH64 - __ ldr (dst, src); break; -#else assert(dst == noreg, "only to ltos"); __ add (src.index(), src.index(), src.base()); __ ldmia (src.index(), RegisterSet(R0_tos_lo) | RegisterSet(R1_tos_hi)); -#endif // AARCH64 break; #ifdef __SOFTFP__ case T_FLOAT: @@ -100,15 +90,6 @@ case T_OBJECT: case T_ARRAY: { if (in_heap) { -#ifdef AARCH64 - if (UseCompressedOops) { - assert(!dst.uses(src), "not enough registers"); - if (!is_null) { - __ encode_heap_oop(src); - } - __ str_w(val, obj); - } else -#endif // AARCH64 { __ str(val, obj); } @@ -128,13 +109,9 @@ case T_INT: __ str (val, obj); break; case T_ADDRESS: __ str (val, obj); break; case T_LONG: -#ifdef AARCH64 - __ str (val, obj); break; -#else // AARCH64 assert(val == noreg, "only tos"); __ add (obj.index(), obj.index(), obj.base()); __ stmia (obj.index(), RegisterSet(R0_tos_lo) | RegisterSet(R1_tos_hi)); -#endif // AARCH64 break; #ifdef __SOFTFP__ case T_FLOAT: --- old/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp 2018-09-17 10:30:01.328689249 -0400 +++ new/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp 2018-09-17 10:30:00.612647426 -0400 @@ -119,7 +119,6 @@ Possible cause is a cache miss (card table base address resides in a rarely accessed area of thread descriptor). */ - // TODO-AARCH64 Investigate if mov_slow is faster than ldr from Rthread on AArch64 __ mov_address(card_table_base, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference); } @@ -136,12 +135,7 @@ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "Adjust store check code"); assert(CardTable::dirty_card_val() == 0, "Dirty card value must be 0 due to optimizations."); -#ifdef AARCH64 - add(card_table_base, card_table_base, AsmOperand(obj, lsr, CardTable::card_shift)); - Address card_table_addr(card_table_base); -#else Address card_table_addr(card_table_base, obj, lsr, CardTable::card_shift); -#endif if (UseCondCardMark) { if (ct->scanned_concurrently()) { @@ -164,9 +158,6 @@ } void CardTableBarrierSetAssembler::set_card(MacroAssembler* masm, Register card_table_base, Address card_table_addr, Register tmp) { -#ifdef AARCH64 - strb(ZR, card_table_addr); -#else CardTableBarrierSet* ctbs = barrier_set_cast(BarrierSet::barrier_set()); CardTable* ct = ctbs->card_table(); if ((((uintptr_t)ct->byte_map_base() & 0xff) == 0)) { @@ -178,5 +169,4 @@ __ mov(tmp, 0); __ strb(tmp, card_table_addr); } -#endif // AARCH64 } --- old/src/hotspot/cpu/arm/globalDefinitions_arm.hpp 2018-09-17 10:30:03.059790358 -0400 +++ new/src/hotspot/cpu/arm/globalDefinitions_arm.hpp 2018-09-17 10:30:02.425753325 -0400 @@ -25,19 +25,7 @@ #ifndef CPU_ARM_VM_GLOBALDEFINITIONS_ARM_HPP #define CPU_ARM_VM_GLOBALDEFINITIONS_ARM_HPP -#ifdef AARCH64 -#define AARCH64_ONLY(code) code -#define AARCH64_ONLY_ARG(arg) , arg -#define NOT_AARCH64(code) -#define NOT_AARCH64_ARG(arg) -#else -#define AARCH64_ONLY(code) -#define AARCH64_ONLY_ARG(arg) -#define NOT_AARCH64(code) code -#define NOT_AARCH64_ARG(arg) , arg -#endif - -const int StackAlignmentInBytes = AARCH64_ONLY(16) NOT_AARCH64(8); +const int StackAlignmentInBytes = 8; // Indicates whether the C calling conventions require that // 32-bit integer argument values are extended to 64 bits. @@ -49,24 +37,19 @@ const bool HaveVFP = true; #endif -#if defined(__ARM_PCS_VFP) || defined(AARCH64) +#if defined(__ARM_PCS_VFP) #define __ABI_HARD__ #endif -#if defined(__ARM_ARCH_7A__) || defined(AARCH64) +#if defined(__ARM_ARCH_7A__) #define SUPPORTS_NATIVE_CX8 #endif #define STUBROUTINES_MD_HPP "stubRoutines_arm.hpp" #define INTERP_MASM_MD_HPP "interp_masm_arm.hpp" #define TEMPLATETABLE_MD_HPP "templateTable_arm.hpp" -#ifdef AARCH64 -#define ADGLOBALS_MD_HPP "adfiles/adGlobals_arm_64.hpp" -#define AD_MD_HPP "adfiles/ad_arm_64.hpp" -#else #define ADGLOBALS_MD_HPP "adfiles/adGlobals_arm_32.hpp" #define AD_MD_HPP "adfiles/ad_arm_32.hpp" -#endif #define C1_LIRGENERATOR_MD_HPP "c1_LIRGenerator_arm.hpp" #ifdef TARGET_COMPILER_gcc --- old/src/hotspot/cpu/arm/globals_arm.hpp 2018-09-17 10:30:04.617881362 -0400 +++ new/src/hotspot/cpu/arm/globals_arm.hpp 2018-09-17 10:30:03.984844388 -0400 @@ -88,13 +88,5 @@ notproduct, \ range, \ constraint, \ - writeable) \ - \ - develop(bool, VerifyInterpreterStackTop, false, \ - "Verify interpreter stack top at every stack expansion (AArch64 only)") \ - \ - develop(bool, ZapHighNonSignificantBits, false, \ - "Zap high non-significant bits of values (AArch64 only)") \ - \ - + writeable) #endif // CPU_ARM_VM_GLOBALS_ARM_HPP --- old/src/hotspot/cpu/arm/icBuffer_arm.cpp 2018-09-17 10:30:06.287978909 -0400 +++ new/src/hotspot/cpu/arm/icBuffer_arm.cpp 2018-09-17 10:30:05.654941935 -0400 @@ -35,7 +35,7 @@ #define __ masm-> int InlineCacheBuffer::ic_stub_code_size() { - return (AARCH64_ONLY(8) NOT_AARCH64(4)) * Assembler::InstructionSize; + return (4 * Assembler::InstructionSize); } void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { --- old/src/hotspot/cpu/arm/icache_arm.cpp 2018-09-17 10:30:07.847069972 -0400 +++ new/src/hotspot/cpu/arm/icache_arm.cpp 2018-09-17 10:30:07.213032939 -0400 @@ -29,49 +29,12 @@ #define __ _masm-> -#ifdef AARCH64 - -static int icache_flush(address addr, int lines, int magic) { - // TODO-AARCH64 Figure out actual cache line size (mrs Xt, CTR_EL0) - - address p = addr; - for (int i = 0; i < lines; i++, p += ICache::line_size) { - __asm__ volatile( - " dc cvau, %[p]" - : - : [p] "r" (p) - : "memory"); - } - - __asm__ volatile( - " dsb ish" - : : : "memory"); - - p = addr; - for (int i = 0; i < lines; i++, p += ICache::line_size) { - __asm__ volatile( - " ic ivau, %[p]" - : - : [p] "r" (p) - : "memory"); - } - - __asm__ volatile( - " dsb ish\n\t" - " isb\n\t" - : : : "memory"); - - return magic; -} - -#else static int icache_flush(address addr, int lines, int magic) { __builtin___clear_cache(addr, addr + (lines << ICache::log2_line_size)); return magic; } -#endif // AARCH64 void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { address start = (address)icache_flush; --- old/src/hotspot/cpu/arm/interp_masm_arm.cpp 2018-09-17 10:30:10.021196957 -0400 +++ new/src/hotspot/cpu/arm/interp_masm_arm.cpp 2018-09-17 10:30:09.174147483 -0400 @@ -54,7 +54,7 @@ } void InterpreterMacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { -#if defined(ASSERT) && !defined(AARCH64) +#if defined(ASSERT) // Ensure that last_sp is not filled. { Label L; ldr(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); @@ -62,27 +62,15 @@ stop("InterpreterMacroAssembler::call_VM_helper: last_sp != NULL"); bind(L); } -#endif // ASSERT && !AARCH64 +#endif // ASSERT // Rbcp must be saved/restored since it may change due to GC. save_bcp(); -#ifdef AARCH64 - check_no_cached_stack_top(Rtemp); - save_stack_top(); - check_extended_sp(Rtemp); - cut_sp_before_call(); -#endif // AARCH64 // super call MacroAssembler::call_VM_helper(oop_result, entry_point, number_of_arguments, check_exceptions); -#ifdef AARCH64 - // Restore SP to extended SP - restore_sp_after_call(Rtemp); - check_stack_top(); - clear_cached_stack_top(); -#endif // AARCH64 // Restore interpreter specific registers. restore_bcp(); @@ -128,10 +116,8 @@ const Address tos_addr(thread_state, JvmtiThreadState::earlyret_tos_offset()); const Address oop_addr(thread_state, JvmtiThreadState::earlyret_oop_offset()); const Address val_addr(thread_state, JvmtiThreadState::earlyret_value_offset()); -#ifndef AARCH64 const Address val_addr_hi(thread_state, JvmtiThreadState::earlyret_value_offset() + in_ByteSize(wordSize)); -#endif // !AARCH64 Register zero = zero_register(Rtemp); @@ -141,11 +127,7 @@ interp_verify_oop(R0_tos, state, __FILE__, __LINE__); break; -#ifdef AARCH64 - case ltos: ldr(R0_tos, val_addr); break; -#else case ltos: ldr(R1_tos_hi, val_addr_hi); // fall through -#endif // AARCH64 case btos: // fall through case ztos: // fall through case ctos: // fall through @@ -163,9 +145,7 @@ } // Clean up tos value in the thread object str(zero, val_addr); -#ifndef AARCH64 str(zero, val_addr_hi); -#endif // !AARCH64 mov(Rtemp, (int) ilgl); str_32(Rtemp, tos_addr); @@ -220,7 +200,6 @@ ldrb(tmp_reg, Address(Rbcp, bcp_offset)); orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); } else if (index_size == sizeof(u4)) { - // TODO-AARCH64: consider using unaligned access here ldrb(index, Address(Rbcp, bcp_offset+3)); ldrb(tmp_reg, Address(Rbcp, bcp_offset+2)); orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); @@ -252,7 +231,6 @@ // convert from field index to ConstantPoolCacheEntry index assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); - // TODO-AARCH64 merge this shift with shift "add(..., Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord))" after this method is called logical_shift_left(index, index, 2); } @@ -261,13 +239,8 @@ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); // caution index and bytecode can be the same add(bytecode, cache, AsmOperand(index, lsl, LogBytesPerWord)); -#ifdef AARCH64 - add(bytecode, bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); - ldarb(bytecode, bytecode); -#else ldrb(bytecode, Address(bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()))); TemplateTable::volatile_barrier(MacroAssembler::LoadLoad, noreg, true); -#endif // AARCH64 } // Sets cache. Blows reg_tmp. @@ -365,31 +338,21 @@ ldr(supers_arr, Address(Rsub_klass, Klass::secondary_supers_offset())); ldr_u32(supers_cnt, Address(supers_arr, Array::length_offset_in_bytes())); // Load the array length -#ifdef AARCH64 - cbz(supers_cnt, not_subtype); - add(supers_arr, supers_arr, Array::base_offset_in_bytes()); -#else cmp(supers_cnt, 0); // Skip to the start of array elements and prefetch the first super-klass. ldr(cur_super, Address(supers_arr, Array::base_offset_in_bytes(), pre_indexed), ne); b(not_subtype, eq); -#endif // AARCH64 bind(loop); -#ifdef AARCH64 - ldr(cur_super, Address(supers_arr, wordSize, post_indexed)); -#endif // AARCH64 cmp(cur_super, Rsuper_klass); b(update_cache, eq); subs(supers_cnt, supers_cnt, 1); -#ifndef AARCH64 ldr(cur_super, Address(supers_arr, wordSize, pre_indexed), ne); -#endif // !AARCH64 b(loop, ne); @@ -419,33 +382,18 @@ zap_high_non_significant_bits(r); } -#ifdef AARCH64 -void InterpreterMacroAssembler::pop_l(Register r) { - assert(r != Rstack_top, "unpredictable instruction"); - ldr(r, Address(Rstack_top, 2*wordSize, post_indexed)); -} -#else void InterpreterMacroAssembler::pop_l(Register lo, Register hi) { assert_different_registers(lo, hi); assert(lo < hi, "lo must be < hi"); pop(RegisterSet(lo) | RegisterSet(hi)); } -#endif // AARCH64 void InterpreterMacroAssembler::pop_f(FloatRegister fd) { -#ifdef AARCH64 - ldr_s(fd, Address(Rstack_top, wordSize, post_indexed)); -#else fpops(fd); -#endif // AARCH64 } void InterpreterMacroAssembler::pop_d(FloatRegister fd) { -#ifdef AARCH64 - ldr_d(fd, Address(Rstack_top, 2*wordSize, post_indexed)); -#else fpopd(fd); -#endif // AARCH64 } @@ -458,11 +406,7 @@ case ctos: // fall through case stos: // fall through case itos: pop_i(R0_tos); break; -#ifdef AARCH64 - case ltos: pop_l(R0_tos); break; -#else case ltos: pop_l(R0_tos_lo, R1_tos_hi); break; -#endif // AARCH64 #ifdef __SOFTFP__ case ftos: pop_i(R0_tos); break; case dtos: pop_l(R0_tos_lo, R1_tos_hi); break; @@ -488,36 +432,18 @@ check_stack_top_on_expansion(); } -#ifdef AARCH64 -void InterpreterMacroAssembler::push_l(Register r) { - assert(r != Rstack_top, "unpredictable instruction"); - stp(r, ZR, Address(Rstack_top, -2*wordSize, pre_indexed)); - check_stack_top_on_expansion(); -} -#else void InterpreterMacroAssembler::push_l(Register lo, Register hi) { assert_different_registers(lo, hi); assert(lo < hi, "lo must be < hi"); push(RegisterSet(lo) | RegisterSet(hi)); } -#endif // AARCH64 void InterpreterMacroAssembler::push_f() { -#ifdef AARCH64 - str_s(S0_tos, Address(Rstack_top, -wordSize, pre_indexed)); - check_stack_top_on_expansion(); -#else fpushs(S0_tos); -#endif // AARCH64 } void InterpreterMacroAssembler::push_d() { -#ifdef AARCH64 - str_d(D0_tos, Address(Rstack_top, -2*wordSize, pre_indexed)); - check_stack_top_on_expansion(); -#else fpushd(D0_tos); -#endif // AARCH64 } // Transition state -> vtos. Blows Rtemp. @@ -530,11 +456,7 @@ case ctos: // fall through case stos: // fall through case itos: push_i(R0_tos); break; -#ifdef AARCH64 - case ltos: push_l(R0_tos); break; -#else case ltos: push_l(R0_tos_lo, R1_tos_hi); break; -#endif // AARCH64 #ifdef __SOFTFP__ case ftos: push_i(R0_tos); break; case dtos: push_l(R0_tos_lo, R1_tos_hi); break; @@ -548,7 +470,6 @@ } -#ifndef AARCH64 // Converts return value in R0/R1 (interpreter calling conventions) to TOS cached value. void InterpreterMacroAssembler::convert_retval_to_tos(TosState state) { @@ -576,7 +497,6 @@ #endif // !__SOFTFP__ && !__ABI_HARD__ } -#endif // !AARCH64 // Helpers for swap and dup @@ -590,20 +510,12 @@ void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { -#ifdef AARCH64 - check_no_cached_stack_top(Rtemp); - save_stack_top(); - cut_sp_before_call(); - mov(Rparams, Rstack_top); -#endif // AARCH64 // set sender sp mov(Rsender_sp, SP); -#ifndef AARCH64 // record last_sp str(Rsender_sp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); -#endif // !AARCH64 } // Jump to from_interpreted entry of a call unless single stepping is possible @@ -619,19 +531,8 @@ // interp_only_mode if these events CAN be enabled. ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); -#ifdef AARCH64 - { - Label not_interp_only_mode; - - cbz(Rtemp, not_interp_only_mode); - indirect_jump(Address(method, Method::interpreter_entry_offset()), Rtemp); - - bind(not_interp_only_mode); - } -#else cmp(Rtemp, 0); ldr(PC, Address(method, Method::interpreter_entry_offset()), ne); -#endif // AARCH64 } indirect_jump(Address(method, Method::from_interpreted_offset()), Rtemp); @@ -658,12 +559,7 @@ bool verifyoop) { if (VerifyActivationFrameSize) { Label L; -#ifdef AARCH64 - mov(Rtemp, SP); - sub(Rtemp, FP, Rtemp); -#else sub(Rtemp, FP, SP); -#endif // AARCH64 int min_frame_size = (frame::link_offset - frame::interpreter_frame_initial_sp_offset) * wordSize; cmp(Rtemp, min_frame_size); b(L, ge); @@ -692,16 +588,10 @@ if (state == vtos) { indirect_jump(Address::indexed_ptr(RdispatchTable, R3_bytecode), Rtemp); } else { -#ifdef AARCH64 - sub(Rtemp, R3_bytecode, (Interpreter::distance_from_dispatch_table(vtos) - - Interpreter::distance_from_dispatch_table(state))); - indirect_jump(Address::indexed_ptr(RdispatchTable, Rtemp), Rtemp); -#else // on 32-bit ARM this method is faster than the one above. sub(Rtemp, RdispatchTable, (Interpreter::distance_from_dispatch_table(vtos) - Interpreter::distance_from_dispatch_table(state)) * wordSize); indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp); -#endif } } else { assert(table_mode == DispatchNormal, "invalid dispatch table mode"); @@ -897,25 +787,18 @@ // points to word before bottom of monitor block cmp(Rcur, Rbottom); // check if there are no monitors -#ifndef AARCH64 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); // prefetch monitor's object -#endif // !AARCH64 b(no_unlock, eq); bind(loop); -#ifdef AARCH64 - ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes())); -#endif // AARCH64 // check if current entry is used cbnz(Rcur_obj, exception_monitor_is_still_locked); add(Rcur, Rcur, entry_size); // otherwise advance to next entry cmp(Rcur, Rbottom); // check if bottom reached -#ifndef AARCH64 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); // prefetch monitor's object -#endif // !AARCH64 b(loop, ne); // if not at bottom then check this entry } @@ -929,15 +812,9 @@ } // remove activation -#ifdef AARCH64 - ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); - ldp(FP, LR, Address(FP)); - mov(SP, Rtemp); -#else mov(Rtemp, FP); ldmia(FP, RegisterSet(FP) | RegisterSet(LR)); ldr(SP, Address(Rtemp, frame::interpreter_frame_sender_sp_offset * wordSize)); -#endif if (ret_addr != LR) { mov(ret_addr, LR); @@ -965,7 +842,7 @@ // // Argument: R1 : Points to BasicObjectLock to be used for locking. // Must be initialized with object to lock. -// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. +// Blows volatile registers R0-R3, Rtemp, LR. Calls VM. void InterpreterMacroAssembler::lock_object(Register Rlock) { assert(Rlock == R1, "the second argument"); @@ -991,15 +868,6 @@ biased_locking_enter(Robj, Rmark/*scratched*/, R0, false, Rtemp, done, slow_case); } -#ifdef AARCH64 - assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); - ldr(Rmark, Robj); - - // Test if object is already locked - assert(markOopDesc::unlocked_value == 1, "adjust this code"); - tbz(Rmark, exact_log2(markOopDesc::unlocked_value), already_locked); - -#else // AARCH64 // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. // That would be acceptable as ether CAS or slow case path is taken in that case. @@ -1013,7 +881,6 @@ tst(Rmark, markOopDesc::unlocked_value); b(already_locked, eq); -#endif // !AARCH64 // Save old object->mark() into BasicLock's displaced header str(Rmark, Address(Rlock, mark_offset)); @@ -1059,19 +926,6 @@ // conditions into a single test: // => ((mark - SP) & (3 - os::pagesize())) == 0 -#ifdef AARCH64 - // Use the single check since the immediate is OK for AARCH64 - sub(R0, Rmark, Rstack_top); - intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); - Assembler::LogicalImmediate imm(mask, false); - ands(R0, R0, imm); - - // For recursive case store 0 into lock record. - // It is harmless to store it unconditionally as lock record contains some garbage - // value in its _displaced_header field by this moment. - str(ZR, Address(Rlock, mark_offset)); - -#else // AARCH64 // (3 - os::pagesize()) cannot be encoded as an ARM immediate operand. // Check independently the low bits and the distance to SP. // -1- test low 2 bits @@ -1082,7 +936,6 @@ // If still 'eq' then recursive locking OK: store 0 into lock record str(R0, Address(Rlock, mark_offset), eq); -#endif // AARCH64 #ifndef PRODUCT if (PrintBiasedLockingStatistics) { @@ -1106,7 +959,7 @@ // // Argument: R1: Points to BasicObjectLock structure for lock // Throw an IllegalMonitorException if object is not locked by current thread -// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. +// Blows volatile registers R0-R3, Rtemp, LR. Calls VM. void InterpreterMacroAssembler::unlock_object(Register Rlock) { assert(Rlock == R1, "the second argument"); @@ -1168,7 +1021,7 @@ // Set the method data pointer for the current bcp. -// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. +// Blows volatile registers R0-R3, Rtemp, LR. void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { assert(ProfileInterpreter, "must be profiling interpreter"); Label set_mdp; @@ -1265,22 +1118,12 @@ // Decrement the register. Set condition codes. subs(bumped_count, bumped_count, DataLayout::counter_increment); // Avoid overflow. -#ifdef AARCH64 - assert(DataLayout::counter_increment == 1, "required for cinc"); - cinc(bumped_count, bumped_count, pl); -#else add(bumped_count, bumped_count, DataLayout::counter_increment, pl); -#endif // AARCH64 } else { // Increment the register. Set condition codes. adds(bumped_count, bumped_count, DataLayout::counter_increment); // Avoid overflow. -#ifdef AARCH64 - assert(DataLayout::counter_increment == 1, "required for cinv"); - cinv(bumped_count, bumped_count, mi); // inverts 0x80..00 back to 0x7f..ff -#else sub(bumped_count, bumped_count, DataLayout::counter_increment, mi); -#endif // AARCH64 } str(bumped_count, data); } @@ -1328,7 +1171,7 @@ } -// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +// Blows volatile registers R0-R3, Rtemp, LR). void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { assert(ProfileInterpreter, "must be profiling interpreter"); assert_different_registers(return_bci, R0, R1, R2, R3, Rtemp); @@ -1542,7 +1385,7 @@ bind (done); } -// Sets mdp, blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +// Sets mdp, blows volatile registers R0-R3, Rtemp, LR). void InterpreterMacroAssembler::profile_ret(Register mdp, Register return_bci) { assert_different_registers(mdp, return_bci, Rtemp, R0, R1, R2, R3); @@ -1704,9 +1547,6 @@ void InterpreterMacroAssembler::byteswap_u32(Register r, Register rtmp1, Register rtmp2) { -#ifdef AARCH64 - rev_w(r, r); -#else if (VM_Version::supports_rev()) { rev(r, r); } else { @@ -1715,7 +1555,6 @@ andr(rtmp1, rtmp2, AsmOperand(rtmp1, lsr, 8)); eor(r, rtmp1, AsmOperand(r, ror, 8)); } -#endif // AARCH64 } @@ -1723,7 +1562,7 @@ const intx addr = (intx) (address_of_counter + offset); assert ((addr & 0x3) == 0, "address of counter should be aligned"); - const intx offset_mask = right_n_bits(AARCH64_ONLY(12 + 2) NOT_AARCH64(12)); + const intx offset_mask = right_n_bits(12); const address base = (address) (addr & ~offset_mask); const int offs = (int) (addr & offset_mask); @@ -1736,14 +1575,7 @@ if (avoid_overflow) { adds_32(val, val, 1); -#ifdef AARCH64 - Label L; - b(L, mi); - str_32(val, Address(addr_base, offs)); - bind(L); -#else str(val, Address(addr_base, offs), pl); -#endif // AARCH64 } else { add_32(val, val, 1); str_32(val, Address(addr_base, offs)); @@ -1823,17 +1655,9 @@ if (native) { // For c++ and template interpreter push both result registers on the // stack in native, we don't know the state. - // On AArch64 result registers are stored into the frame at known locations. // See frame::interpreter_frame_result for code that gets the result values from here. assert(result_lo != noreg, "result registers should be defined"); -#ifdef AARCH64 - assert(result_hi == noreg, "result_hi is not used on AArch64"); - assert(result_fp != fnoreg, "FP result register must be defined"); - - str_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); - str(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); -#else assert(result_hi != noreg, "result registers should be defined"); #ifdef __ABI_HARD__ @@ -1843,20 +1667,14 @@ #endif // __ABI_HARD__ push(RegisterSet(result_lo) | RegisterSet(result_hi)); -#endif // AARCH64 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); -#ifdef AARCH64 - ldr_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); - ldr(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); -#else pop(RegisterSet(result_lo) | RegisterSet(result_hi)); #ifdef __ABI_HARD__ fldd(result_fp, Address(SP)); add(SP, SP, 2 * wordSize); #endif // __ABI_HARD__ -#endif // AARCH64 } else { // For the template interpreter, the value on tos is the size of the @@ -1932,13 +1750,8 @@ add(scratch, scratch, increment); str_32(scratch, counter_addr); -#ifdef AARCH64 - ldr_u32(scratch2, mask_addr); - ands_w(ZR, scratch, scratch2); -#else ldr(scratch2, mask_addr); andrs(scratch, scratch, scratch2); -#endif // AARCH64 b(*where, cond); } @@ -1959,26 +1772,15 @@ // Save and restore in use caller-saved registers since they will be trashed by call_VM assert(reg1 != noreg, "must specify reg1"); assert(reg2 != noreg, "must specify reg2"); -#ifdef AARCH64 - assert(reg3 != noreg, "must specify reg3"); - stp(reg1, reg2, Address(Rstack_top, -2*wordSize, pre_indexed)); - stp(reg3, ZR, Address(Rstack_top, -2*wordSize, pre_indexed)); -#else assert(reg3 == noreg, "must not specify reg3"); push(RegisterSet(reg1) | RegisterSet(reg2)); -#endif } mov(R1, method); call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), R1); if (saveRegs) { -#ifdef AARCH64 - ldp(reg3, ZR, Address(Rstack_top, 2*wordSize, post_indexed)); - ldp(reg1, reg2, Address(Rstack_top, 2*wordSize, post_indexed)); -#else pop(RegisterSet(reg1) | RegisterSet(reg2)); -#endif } ldr(Rcounters, method_counters); --- old/src/hotspot/cpu/arm/interp_masm_arm.hpp 2018-09-17 10:30:11.614290006 -0400 +++ new/src/hotspot/cpu/arm/interp_masm_arm.hpp 2018-09-17 10:30:10.983253149 -0400 @@ -63,48 +63,12 @@ virtual void check_and_handle_earlyret(); // Interpreter-specific registers -#if defined(AARCH64) && defined(ASSERT) - -#define check_stack_top() _check_stack_top("invalid Rstack_top at " __FILE__ ":" XSTR(__LINE__)) -#define check_stack_top_on_expansion() _check_stack_top("invalid Rstack_top at " __FILE__ ":" XSTR(__LINE__), VerifyInterpreterStackTop) -#define check_extended_sp(tmp) _check_extended_sp(tmp, "SP does not match extended SP in frame at " __FILE__ ":" XSTR(__LINE__)) -#define check_no_cached_stack_top(tmp) _check_no_cached_stack_top(tmp, "stack_top is already cached in frame at " __FILE__ ":" XSTR(__LINE__)) - - void _check_stack_top(const char* msg, bool enabled = true) { - if (enabled) { - Label L; - cmp(SP, Rstack_top); - b(L, ls); - stop(msg); - bind(L); - } - } - - void _check_extended_sp(Register tmp, const char* msg) { - Label L; - ldr(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); - cmp(SP, tmp); - b(L, eq); - stop(msg); - bind(L); - } - - void _check_no_cached_stack_top(Register tmp, const char* msg) { - Label L; - ldr(tmp, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); - cbz(tmp, L); - stop(msg); - bind(L); - } - -#else inline void check_stack_top() {} inline void check_stack_top_on_expansion() {} inline void check_extended_sp(Register tmp) {} inline void check_no_cached_stack_top(Register tmp) {} -#endif // AARCH64 && ASSERT void save_bcp() { str(Rbcp, Address(FP, frame::interpreter_frame_bcp_offset * wordSize)); } void restore_bcp() { ldr(Rbcp, Address(FP, frame::interpreter_frame_bcp_offset * wordSize)); } @@ -112,13 +76,6 @@ void restore_method() { ldr(Rmethod, Address(FP, frame::interpreter_frame_method_offset * wordSize)); } void restore_dispatch(); -#ifdef AARCH64 - void save_stack_top() { check_stack_top(); str(Rstack_top, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); } - void clear_cached_stack_top() { str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); } - void restore_stack_top() { ldr(Rstack_top, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); clear_cached_stack_top(); check_stack_top(); } - void cut_sp_before_call() { align_reg(SP, Rstack_top, StackAlignmentInBytes); } - void restore_sp_after_call(Register tmp) { ldr(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); mov(SP, tmp); } -#endif // Helpers for runtime call arguments/results void get_const(Register reg) { ldr(reg, Address(Rmethod, Method::const_offset())); } @@ -145,21 +102,13 @@ void pop_ptr(Register r); void pop_i(Register r = R0_tos); -#ifdef AARCH64 - void pop_l(Register r = R0_tos); -#else void pop_l(Register lo = R0_tos_lo, Register hi = R1_tos_hi); -#endif void pop_f(FloatRegister fd); void pop_d(FloatRegister fd); void push_ptr(Register r); void push_i(Register r = R0_tos); -#ifdef AARCH64 - void push_l(Register r = R0_tos); -#else void push_l(Register lo = R0_tos_lo, Register hi = R1_tos_hi); -#endif void push_f(); void push_d(); @@ -168,7 +117,6 @@ // Transition state -> vtos. Blows Rtemp. void push(TosState state); -#ifndef AARCH64 // The following methods are overridden to allow overloaded calls to // MacroAssembler::push/pop(Register) // MacroAssembler::push/pop(RegisterSet) @@ -183,7 +131,6 @@ void convert_retval_to_tos(TosState state); // Converts TOS cached value to return value in R0/R1 (according to interpreter calling conventions). void convert_tos_to_retval(TosState state); -#endif // JVMTI ForceEarlyReturn support void load_earlyret_value(TosState state); @@ -194,12 +141,8 @@ void empty_expression_stack() { ldr(Rstack_top, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize)); check_stack_top(); -#ifdef AARCH64 - clear_cached_stack_top(); -#else // NULL last_sp until next java call str(zero_register(Rtemp), Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); -#endif // AARCH64 } // Helpers for swap and dup --- old/src/hotspot/cpu/arm/interpreterRT_arm.cpp 2018-09-17 10:30:13.191382120 -0400 +++ new/src/hotspot/cpu/arm/interpreterRT_arm.cpp 2018-09-17 10:30:12.561345321 -0400 @@ -44,13 +44,9 @@ _abi_offset = 0; _ireg = is_static() ? 2 : 1; #ifdef __ABI_HARD__ -#ifdef AARCH64 - _freg = 0; -#else _fp_slot = 0; _single_fpr_slot = 0; #endif -#endif } #ifdef SHARING_FAST_NATIVE_FINGERPRINTS @@ -126,17 +122,6 @@ } void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { -#ifdef AARCH64 - if (_ireg < GPR_PARAMS) { - Register dst = as_Register(_ireg); - __ ldr(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1))); - _ireg++; - } else { - __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1))); - __ str(Rtemp, Address(SP, _abi_offset * wordSize)); - _abi_offset++; - } -#else if (_ireg <= 2) { #if (ALIGN_WIDE_ARGUMENTS == 1) if ((_ireg & 1) != 0) { @@ -170,24 +155,9 @@ _abi_offset += 2; _ireg = 4; } -#endif // AARCH64 } void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { -#ifdef AARCH64 - __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); - __ cmp(Rtemp, 0); - __ sub(Rtemp, Rlocals, -Interpreter::local_offset_in_bytes(offset())); - if (_ireg < GPR_PARAMS) { - Register dst = as_Register(_ireg); - __ csel(dst, ZR, Rtemp, eq); - _ireg++; - } else { - __ csel(Rtemp, ZR, Rtemp, eq); - __ str(Rtemp, Address(SP, _abi_offset * wordSize)); - _abi_offset++; - } -#else if (_ireg < 4) { Register dst = as_Register(_ireg); __ ldr(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); @@ -201,7 +171,6 @@ __ str(Rtemp, Address(SP, _abi_offset * wordSize)); _abi_offset++; } -#endif // AARCH64 } #ifndef __ABI_HARD__ @@ -220,17 +189,6 @@ #else #ifndef __SOFTFP__ void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { -#ifdef AARCH64 - if (_freg < FPR_PARAMS) { - FloatRegister dst = as_FloatRegister(_freg); - __ ldr_s(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); - _freg++; - } else { - __ ldr_u32(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); - __ str_32(Rtemp, Address(SP, _abi_offset * wordSize)); - _abi_offset++; - } -#else if((_fp_slot < 16) || (_single_fpr_slot & 1)) { if ((_single_fpr_slot & 1) == 0) { _single_fpr_slot = _fp_slot; @@ -243,21 +201,9 @@ __ str(Rtemp, Address(SP, _abi_offset * wordSize)); _abi_offset++; } -#endif // AARCH64 } void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { -#ifdef AARCH64 - if (_freg < FPR_PARAMS) { - FloatRegister dst = as_FloatRegister(_freg); - __ ldr_d(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1))); - _freg++; - } else { - __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1))); - __ str(Rtemp, Address(SP, _abi_offset * wordSize)); - _abi_offset++; - } -#else if(_fp_slot <= 14) { __ fldd(as_FloatRegister(_fp_slot), Address(Rlocals, Interpreter::local_offset_in_bytes(offset()+1))); _fp_slot += 2; @@ -269,7 +215,6 @@ _abi_offset += 2; _single_fpr_slot = 16; } -#endif // AARCH64 } #endif // __SOFTFP__ #endif // __ABI_HARD__ @@ -281,14 +226,10 @@ address result_handler = Interpreter::result_handler(result_type); -#ifdef AARCH64 - __ mov_slow(R0, (address)result_handler); -#else // Check that result handlers are not real handler on ARM (0 or -1). // This ensures the signature handlers do not need symbolic information. assert((result_handler == NULL)||(result_handler==(address)0xffffffff),""); __ mov_slow(R0, (intptr_t)result_handler); -#endif __ ret(); } @@ -339,9 +280,7 @@ intptr_t* _toGP; int _last_gp; int _last_fp; -#ifndef AARCH64 int _last_single_fp; -#endif // !AARCH64 virtual void pass_int() { if(_last_gp < GPR_PARAMS) { @@ -353,13 +292,6 @@ } virtual void pass_long() { -#ifdef AARCH64 - if(_last_gp < GPR_PARAMS) { - _toGP[_last_gp++] = *(jlong *)(_from+Interpreter::local_offset_in_bytes(1)); - } else { - *_to++ = *(jlong *)(_from+Interpreter::local_offset_in_bytes(1)); - } -#else assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments"); if (_last_gp <= 2) { if(_last_gp & 1) _last_gp++; @@ -375,7 +307,6 @@ _to += 2; _last_gp = 4; } -#endif // AARCH64 _from -= 2*Interpreter::stackElementSize; } @@ -390,13 +321,6 @@ } virtual void pass_float() { -#ifdef AARCH64 - if(_last_fp < FPR_PARAMS) { - _toFP[_last_fp++] = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); - } else { - *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); - } -#else if((_last_fp < 16) || (_last_single_fp & 1)) { if ((_last_single_fp & 1) == 0) { _last_single_fp = _last_fp; @@ -407,18 +331,10 @@ } else { *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); } -#endif // AARCH64 _from -= Interpreter::stackElementSize; } virtual void pass_double() { -#ifdef AARCH64 - if(_last_fp < FPR_PARAMS) { - _toFP[_last_fp++] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); - } else { - *_to++ = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); - } -#else assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments"); if(_last_fp <= 14) { _toFP[_last_fp++] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); @@ -432,7 +348,6 @@ _to += 2; _last_single_fp = 16; } -#endif // AARCH64 _from -= 2*Interpreter::stackElementSize; } @@ -446,12 +361,10 @@ #ifdef __ABI_HARD__ _toGP = to; _toFP = _toGP + GPR_PARAMS; - _to = _toFP + AARCH64_ONLY(FPR_PARAMS) NOT_AARCH64(8*2); + _to = _toFP + (8*2); _last_gp = (is_static() ? 2 : 1); _last_fp = 0; -#ifndef AARCH64 _last_single_fp = 0; -#endif // !AARCH64 #else _to = to + (is_static() ? 2 : 1); #endif // __ABI_HARD__ --- old/src/hotspot/cpu/arm/interpreterRT_arm.hpp 2018-09-17 10:30:15.653525926 -0400 +++ new/src/hotspot/cpu/arm/interpreterRT_arm.hpp 2018-09-17 10:30:14.123436559 -0400 @@ -34,13 +34,9 @@ int _ireg; #ifdef __ABI_HARD__ -#ifdef AARCH64 - int _freg; -#else int _fp_slot; // number of FPR's with arguments loaded int _single_fpr_slot; #endif -#endif void move(int from_offset, int to_offset); void box(int from_offset, int to_offset); @@ -60,10 +56,8 @@ void generate(uint64_t fingerprint); }; -#ifndef AARCH64 // ARM provides a normalized fingerprint for native calls (to increase // sharing). See normalize_fast_native_fingerprint #define SHARING_FAST_NATIVE_FINGERPRINTS -#endif #endif // CPU_ARM_VM_INTERPRETERRT_ARM_HPP --- old/src/hotspot/cpu/arm/jniFastGetField_arm.cpp 2018-09-17 10:30:17.240618626 -0400 +++ new/src/hotspot/cpu/arm/jniFastGetField_arm.cpp 2018-09-17 10:30:16.590580660 -0400 @@ -78,26 +78,19 @@ // R1 - object handle // R2 - jfieldID - const Register Rsafepoint_counter_addr = AARCH64_ONLY(R4) NOT_AARCH64(R3); - const Register Robj = AARCH64_ONLY(R5) NOT_AARCH64(R1); - const Register Rres = AARCH64_ONLY(R6) NOT_AARCH64(R0); -#ifndef AARCH64 + const Register Rsafepoint_counter_addr = R3; + const Register Robj = R1; + const Register Rres = R0; const Register Rres_hi = R1; -#endif // !AARCH64 const Register Rsafept_cnt = Rtemp; const Register Rsafept_cnt2 = Rsafepoint_counter_addr; - const Register Rtmp1 = AARCH64_ONLY(R7) NOT_AARCH64(R3); // same as Rsafepoint_counter_addr on 32-bit ARM - const Register Rtmp2 = AARCH64_ONLY(R8) NOT_AARCH64(R2); // same as jfieldID on 32-bit ARM + const Register Rtmp1 = R3; // same as Rsafepoint_counter_addr + const Register Rtmp2 = R2; // same as jfieldID -#ifdef AARCH64 - assert_different_registers(Rsafepoint_counter_addr, Rsafept_cnt, Robj, Rres, Rtmp1, Rtmp2, R0, R1, R2, LR); - assert_different_registers(Rsafept_cnt2, Rsafept_cnt, Rres, R0, R1, R2, LR); -#else assert_different_registers(Rsafepoint_counter_addr, Rsafept_cnt, Robj, Rres, LR); assert_different_registers(Rsafept_cnt, R1, R2, Rtmp1, LR); assert_different_registers(Rsafepoint_counter_addr, Rsafept_cnt, Rres, Rres_hi, Rtmp2, LR); assert_different_registers(Rsafept_cnt2, Rsafept_cnt, Rres, Rres_hi, LR); -#endif // AARCH64 address fast_entry; @@ -112,20 +105,12 @@ Label slow_case; __ ldr_literal(Rsafepoint_counter_addr, safepoint_counter_addr); -#ifndef AARCH64 __ push(RegisterSet(R0, R3)); // save incoming arguments for slow case -#endif // !AARCH64 __ ldr_s32(Rsafept_cnt, Address(Rsafepoint_counter_addr)); __ tbnz(Rsafept_cnt, 0, slow_case); -#ifdef AARCH64 - // If mask changes we need to ensure that the inverse is still encodable as an immediate - STATIC_ASSERT(JNIHandles::weak_tag_mask == 1); - __ andr(R1, R1, ~JNIHandles::weak_tag_mask); -#else __ bic(R1, R1, JNIHandles::weak_tag_mask); -#endif if (os::is_MP()) { // Address dependency restricts memory access ordering. It's cheaper than explicit LoadLoad barrier @@ -135,10 +120,6 @@ __ ldr(Robj, Address(R1)); } -#ifdef AARCH64 - __ add(Robj, Robj, AsmOperand(R2, lsr, 2)); - Address field_addr = Address(Robj); -#else Address field_addr; if (type != T_BOOLEAN && type != T_INT @@ -152,7 +133,6 @@ } else { field_addr = Address(Robj, R2, lsr, 2); } -#endif // AARCH64 assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); speculative_load_pclist[count] = __ pc(); @@ -179,12 +159,8 @@ #ifndef __ABI_HARD__ case T_DOUBLE: #endif -#ifdef AARCH64 - __ ldr(Rres, field_addr); -#else // Safe to use ldrd since long and double fields are 8-byte aligned __ ldrd(Rres, field_addr); -#endif // AARCH64 break; #ifdef __ABI_HARD__ case T_FLOAT: @@ -200,18 +176,16 @@ if(os::is_MP()) { // Address dependency restricts memory access ordering. It's cheaper than explicit LoadLoad barrier -#if defined(__ABI_HARD__) && !defined(AARCH64) +#if defined(__ABI_HARD__) if (type == T_FLOAT || type == T_DOUBLE) { __ ldr_literal(Rsafepoint_counter_addr, safepoint_counter_addr); __ fmrrd(Rres, Rres_hi, D0); __ eor(Rtmp2, Rres, Rres); __ ldr_s32(Rsafept_cnt2, Address(Rsafepoint_counter_addr, Rtmp2)); } else -#endif // __ABI_HARD__ && !AARCH64 +#endif // __ABI_HARD__ { -#ifndef AARCH64 __ ldr_literal(Rsafepoint_counter_addr, safepoint_counter_addr); -#endif // !AARCH64 __ eor(Rtmp2, Rres, Rres); __ ldr_s32(Rsafept_cnt2, Address(Rsafepoint_counter_addr, Rtmp2)); } @@ -219,22 +193,14 @@ __ ldr_s32(Rsafept_cnt2, Address(Rsafepoint_counter_addr)); } __ cmp(Rsafept_cnt2, Rsafept_cnt); -#ifdef AARCH64 - __ b(slow_case, ne); - __ mov(R0, Rres); - __ ret(); -#else // discards saved R0 R1 R2 R3 __ add(SP, SP, 4 * wordSize, eq); __ bx(LR, eq); -#endif // AARCH64 slowcase_entry_pclist[count++] = __ pc(); __ bind(slow_case); -#ifndef AARCH64 __ pop(RegisterSet(R0, R3)); -#endif // !AARCH64 // thumb mode switch handled by MacroAssembler::jump if needed __ jump(slow_case_addr, relocInfo::none, Rtemp); --- old/src/hotspot/cpu/arm/jniTypes_arm.hpp 2018-09-17 10:30:18.822711033 -0400 +++ new/src/hotspot/cpu/arm/jniTypes_arm.hpp 2018-09-17 10:30:18.182673650 -0400 @@ -44,12 +44,10 @@ private: -#ifndef AARCH64 // 32bit Helper routines. static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; *(jint *)(to ) = from[0]; } static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } -#endif public: // Ints are stored in native format in one JavaCallArgument slot at *to. @@ -57,18 +55,11 @@ static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } -#ifdef AARCH64 - // Longs are stored in native format in one JavaCallArgument slot at *(to+1). - static inline void put_long(jlong from, intptr_t *to) { *(jlong *)(to + 1 + 0) = from; } - static inline void put_long(jlong from, intptr_t *to, int& pos) { *(jlong *)(to + 1 + pos) = from; pos += 2; } - static inline void put_long(jlong *from, intptr_t *to, int& pos) { *(jlong *)(to + 1 + pos) = *from; pos += 2; } -#else // Longs are stored in big-endian word format in two JavaCallArgument slots at *to. // The high half is in *to and the low half in *(to+1). static inline void put_long(jlong from, intptr_t *to) { put_int2r((jint *)&from, to); } static inline void put_long(jlong from, intptr_t *to, int& pos) { put_int2r((jint *)&from, to, pos); } static inline void put_long(jlong *from, intptr_t *to, int& pos) { put_int2r((jint *) from, to, pos); } -#endif // Oops are stored in native format in one JavaCallArgument slot at *to. static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } @@ -80,18 +71,11 @@ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } -#ifdef AARCH64 - // Doubles are stored in native word format in one JavaCallArgument slot at *(to+1). - static inline void put_double(jdouble from, intptr_t *to) { *(jdouble *)(to + 1 + 0) = from; } - static inline void put_double(jdouble from, intptr_t *to, int& pos) { *(jdouble *)(to + 1 + pos) = from; pos += 2; } - static inline void put_double(jdouble *from, intptr_t *to, int& pos) { *(jdouble *)(to + 1 + pos) = *from; pos += 2; } -#else // Doubles are stored in big-endian word format in two JavaCallArgument slots at *to. // The high half is in *to and the low half in *(to+1). static inline void put_double(jdouble from, intptr_t *to) { put_int2r((jint *)&from, to); } static inline void put_double(jdouble from, intptr_t *to, int& pos) { put_int2r((jint *)&from, to, pos); } static inline void put_double(jdouble *from, intptr_t *to, int& pos) { put_int2r((jint *) from, to, pos); } -#endif }; --- old/src/hotspot/cpu/arm/macroAssembler_arm.cpp 2018-09-17 10:30:20.390802621 -0400 +++ new/src/hotspot/cpu/arm/macroAssembler_arm.cpp 2018-09-17 10:30:19.755765530 -0400 @@ -97,19 +97,6 @@ } -#ifdef AARCH64 -// Note: ARM32 version is OS dependent -void MacroAssembler::breakpoint(AsmCondition cond) { - if (cond == al) { - brk(); - } else { - Label L; - b(L, inverse(cond)); - brk(); - bind(L); - } -} -#endif // AARCH64 // virtual method calling @@ -210,9 +197,6 @@ Label* L_success, Label* L_failure, bool set_cond_codes) { -#ifdef AARCH64 - NOT_IMPLEMENTED(); -#else // Note: if used by code that expects a register to be 0 on success, // this register must be temp_reg and set_cond_codes must be true @@ -313,7 +297,6 @@ } bind(L_fallthrough); -#endif } // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. @@ -342,14 +325,9 @@ } else { _fp_saved = false; } - if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM -#ifdef AARCH64 - pc_offset = mov_pc_to(tmp); - str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); -#else + if (save_last_java_pc) { str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); pc_offset = offset() + VM_Version::stored_pc_adjustment(); -#endif _pc_saved = true; } else { _pc_saved = false; @@ -369,16 +347,7 @@ if (last_java_sp == noreg) { last_java_sp = SP; // always saved } -#ifdef AARCH64 - if (last_java_sp == SP) { - mov(tmp, SP); - str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); - } else { - str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); - } -#else str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); -#endif return pc_offset; // for oopmaps } @@ -401,19 +370,15 @@ assert(number_of_arguments >= 0, "cannot have negative number of arguments"); assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); -#ifndef AARCH64 // Safer to save R9 here since callers may have been written // assuming R9 survives. This is suboptimal but is not worth // optimizing for the few platforms where R9 is scratched. push(RegisterSet(R4) | R9ifScratched); mov(R4, SP); bic(SP, SP, StackAlignmentInBytes - 1); -#endif // AARCH64 call(entry_point, relocInfo::runtime_call_type); -#ifndef AARCH64 mov(SP, R4); pop(RegisterSet(R4) | R9ifScratched); -#endif // AARCH64 } @@ -426,11 +391,6 @@ set_last_Java_frame(SP, FP, true, tmp); -#ifdef ASSERT - AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); -#endif // ASSERT - -#ifndef AARCH64 #if R9_IS_SCRATCHED // Safer to save R9 here since callers may have been written // assuming R9 survives. This is suboptimal but is not worth @@ -446,17 +406,14 @@ #else bic(SP, SP, StackAlignmentInBytes - 1); #endif // R9_IS_SCRATCHED -#endif mov(R0, Rthread); call(entry_point, relocInfo::runtime_call_type); -#ifndef AARCH64 #if R9_IS_SCRATCHED ldr(R9, Address(SP, 0)); #endif ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); -#endif reset_last_Java_frame(tmp); @@ -467,17 +424,9 @@ if (check_exceptions) { // check for pending exceptions ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); -#ifdef AARCH64 - Label L; - cbz(tmp, L); - mov_pc_to(Rexception_pc); - b(StubRoutines::forward_exception_entry()); - bind(L); -#else cmp(tmp, 0); mov(Rexception_pc, PC, ne); b(StubRoutines::forward_exception_entry(), ne); -#endif // AARCH64 } // get oop result if there is one and reset the value in the thread @@ -608,32 +557,6 @@ } void MacroAssembler::add_slow(Register rd, Register rn, int c) { -#ifdef AARCH64 - if (c == 0) { - if (rd != rn) { - mov(rd, rn); - } - return; - } - if (c < 0) { - sub_slow(rd, rn, -c); - return; - } - if (c > right_n_bits(24)) { - guarantee(rd != rn, "no large add_slow with only one register"); - mov_slow(rd, c); - add(rd, rn, rd); - } else { - int lo = c & right_n_bits(12); - int hi = (c >> 12) & right_n_bits(12); - if (lo != 0) { - add(rd, rn, lo, lsl0); - } - if (hi != 0) { - add(rd, (lo == 0) ? rn : rd, hi, lsl12); - } - } -#else // This function is used in compiler for handling large frame offsets if ((c < 0) && (((-c) & ~0x3fc) == 0)) { return sub(rd, rn, (-c)); @@ -650,30 +573,9 @@ assert(c == 0, ""); mov(rd, rn); // need to generate at least one move! } -#endif // AARCH64 } void MacroAssembler::sub_slow(Register rd, Register rn, int c) { -#ifdef AARCH64 - if (c <= 0) { - add_slow(rd, rn, -c); - return; - } - if (c > right_n_bits(24)) { - guarantee(rd != rn, "no large sub_slow with only one register"); - mov_slow(rd, c); - sub(rd, rn, rd); - } else { - int lo = c & right_n_bits(12); - int hi = (c >> 12) & right_n_bits(12); - if (lo != 0) { - sub(rd, rn, lo, lsl0); - } - if (hi != 0) { - sub(rd, (lo == 0) ? rn : rd, hi, lsl12); - } - } -#else // This function is used in compiler for handling large frame offsets if ((c < 0) && (((-c) & ~0x3fc) == 0)) { return add(rd, rn, (-c)); @@ -690,7 +592,6 @@ assert(c == 0, ""); mov(rd, rn); // need to generate at least one move! } -#endif // AARCH64 } void MacroAssembler::mov_slow(Register rd, address addr) { @@ -702,99 +603,6 @@ mov_slow(rd, (intptr_t)str); } -#ifdef AARCH64 - -// Common code for mov_slow and instr_count_for_mov_slow. -// Returns number of instructions of mov_slow pattern, -// generating it if non-null MacroAssembler is given. -int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { - // This code pattern is matched in NativeIntruction::is_mov_slow. - // Update it at modifications. - - const intx mask = right_n_bits(16); - // 1 movz instruction - for (int base_shift = 0; base_shift < 64; base_shift += 16) { - if ((c & ~(mask << base_shift)) == 0) { - if (masm != NULL) { - masm->movz(rd, ((uintx)c) >> base_shift, base_shift); - } - return 1; - } - } - // 1 movn instruction - for (int base_shift = 0; base_shift < 64; base_shift += 16) { - if (((~c) & ~(mask << base_shift)) == 0) { - if (masm != NULL) { - masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); - } - return 1; - } - } - // 1 orr instruction - { - LogicalImmediate imm(c, false); - if (imm.is_encoded()) { - if (masm != NULL) { - masm->orr(rd, ZR, imm); - } - return 1; - } - } - // 1 movz/movn + up to 3 movk instructions - int zeroes = 0; - int ones = 0; - for (int base_shift = 0; base_shift < 64; base_shift += 16) { - int part = (c >> base_shift) & mask; - if (part == 0) { - ++zeroes; - } else if (part == mask) { - ++ones; - } - } - int def_bits = 0; - if (ones > zeroes) { - def_bits = mask; - } - int inst_count = 0; - for (int base_shift = 0; base_shift < 64; base_shift += 16) { - int part = (c >> base_shift) & mask; - if (part != def_bits) { - if (masm != NULL) { - if (inst_count > 0) { - masm->movk(rd, part, base_shift); - } else { - if (def_bits == 0) { - masm->movz(rd, part, base_shift); - } else { - masm->movn(rd, ~part & mask, base_shift); - } - } - } - inst_count++; - } - } - assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); - return inst_count; -} - -void MacroAssembler::mov_slow(Register rd, intptr_t c) { -#ifdef ASSERT - int off = offset(); -#endif - (void) mov_slow_helper(rd, c, this); - assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); -} - -// Counts instructions generated by mov_slow(rd, c). -int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { - return mov_slow_helper(noreg, c, NULL); -} - -int MacroAssembler::instr_count_for_mov_slow(address c) { - return mov_slow_helper(noreg, (intptr_t)c, NULL); -} - -#else void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { if (AsmOperand::is_rotated_imm(c)) { @@ -829,25 +637,13 @@ } } -#endif // AARCH64 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, -#ifdef AARCH64 - bool patchable -#else AsmCondition cond -#endif ) { if (o == NULL) { -#ifdef AARCH64 - if (patchable) { - nop(); - } - mov(rd, ZR); -#else mov(rd, 0, cond); -#endif return; } @@ -856,12 +652,6 @@ } relocate(oop_Relocation::spec(oop_index)); -#ifdef AARCH64 - if (patchable) { - nop(); - } - ldr(rd, pc()); -#else if (VM_Version::supports_movw()) { movw(rd, 0, cond); movt(rd, 0, cond); @@ -870,16 +660,10 @@ // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). nop(); } -#endif } -void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { +void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index) { if (o == NULL) { -#ifdef AARCH64 - if (patchable) { - nop(); - } -#endif mov(rd, 0); return; } @@ -889,18 +673,6 @@ } relocate(metadata_Relocation::spec(metadata_index)); -#ifdef AARCH64 - if (patchable) { - nop(); - } -#ifdef COMPILER2 - if (!patchable && VM_Version::prefer_moves_over_load_literal()) { - mov_slow(rd, (address)o); - return; - } -#endif - ldr(rd, pc()); -#else if (VM_Version::supports_movw()) { movw(rd, ((int)o) & 0xffff); movt(rd, (unsigned int)o >> 16); @@ -909,10 +681,9 @@ // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). nop(); } -#endif // AARCH64 } -void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { +void MacroAssembler::mov_float(FloatRegister fd, jfloat c, AsmCondition cond) { Label skip_constant; union { jfloat f; @@ -920,23 +691,13 @@ } accessor; accessor.f = c; -#ifdef AARCH64 - // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow - Label L; - ldr_s(fd, target(L)); - b(skip_constant); - bind(L); - emit_int32(accessor.i); - bind(skip_constant); -#else flds(fd, Address(PC), cond); b(skip_constant); emit_int32(accessor.i); bind(skip_constant); -#endif // AARCH64 } -void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { +void MacroAssembler::mov_double(FloatRegister fd, jdouble c, AsmCondition cond) { Label skip_constant; union { jdouble d; @@ -944,55 +705,21 @@ } accessor; accessor.d = c; -#ifdef AARCH64 - // TODO-AARCH64 - try to optimize loading of double constants with fmov - Label L; - ldr_d(fd, target(L)); - b(skip_constant); - align(wordSize); - bind(L); - emit_int32(accessor.i[0]); - emit_int32(accessor.i[1]); - bind(skip_constant); -#else fldd(fd, Address(PC), cond); b(skip_constant); emit_int32(accessor.i[0]); emit_int32(accessor.i[1]); bind(skip_constant); -#endif // AARCH64 } void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { intptr_t addr = (intptr_t) address_of_global; -#ifdef AARCH64 - assert((addr & 0x3) == 0, "address should be aligned"); - - // FIXME: TODO - if (false && page_reachable_from_cache(address_of_global)) { - assert(false,"TODO: relocate"); - //relocate(); - adrp(reg, address_of_global); - ldrsw(reg, Address(reg, addr & 0xfff)); - } else { - mov_slow(reg, addr & ~0x3fff); - ldrsw(reg, Address(reg, addr & 0x3fff)); - } -#else mov_slow(reg, addr & ~0xfff); ldr(reg, Address(reg, addr & 0xfff)); -#endif } void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { -#ifdef AARCH64 - intptr_t addr = (intptr_t) address_of_global; - assert ((addr & 0x7) == 0, "address should be aligned"); - mov_slow(reg, addr & ~0x7fff); - ldr(reg, Address(reg, addr & 0x7fff)); -#else ldr_global_s32(reg, address_of_global); -#endif } void MacroAssembler::ldrb_global(Register reg, address address_of_global) { @@ -1002,14 +729,6 @@ } void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { -#ifdef AARCH64 - switch (bits) { - case 8: uxtb(rd, rn); break; - case 16: uxth(rd, rn); break; - case 32: mov_w(rd, rn); break; - default: ShouldNotReachHere(); - } -#else if (bits <= 8) { andr(rd, rn, (1 << bits) - 1); } else if (bits >= 24) { @@ -1018,24 +737,13 @@ mov(rd, AsmOperand(rn, lsl, 32 - bits)); mov(rd, AsmOperand(rd, lsr, 32 - bits)); } -#endif } void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { -#ifdef AARCH64 - switch (bits) { - case 8: sxtb(rd, rn); break; - case 16: sxth(rd, rn); break; - case 32: sxtw(rd, rn); break; - default: ShouldNotReachHere(); - } -#else mov(rd, AsmOperand(rn, lsl, 32 - bits)); mov(rd, AsmOperand(rd, asr, 32 - bits)); -#endif } -#ifndef AARCH64 void MacroAssembler::long_move(Register rd_lo, Register rd_hi, Register rn_lo, Register rn_hi, @@ -1129,7 +837,6 @@ } } } -#endif // !AARCH64 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { // This code pattern is matched in NativeIntruction::skip_verify_oop. @@ -1231,9 +938,6 @@ void MacroAssembler::null_check(Register reg, Register tmp, int offset) { if (needs_explicit_null_check(offset)) { -#ifdef AARCH64 - ldr(ZR, Address(reg)); -#else assert_different_registers(reg, tmp); if (tmp == noreg) { tmp = Rtemp; @@ -1244,7 +948,6 @@ // XXX: could we mark the code buffer as not compatible with C2 ? } ldr(tmp, Address(reg)); -#endif } } @@ -1267,7 +970,7 @@ assert_different_registers(obj, obj_end, top_addr, heap_end); } - bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance + bool load_const = VM_Version::supports_movw(); if (load_const) { mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); } else { @@ -1277,11 +980,7 @@ Label retry; bind(retry); -#ifdef AARCH64 - ldxr(obj, top_addr); -#else ldr(obj, Address(top_addr)); -#endif // AARCH64 ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); add_rc(obj_end, obj, size_expression); @@ -1292,13 +991,8 @@ cmp(obj_end, heap_end); b(slow_case, hi); -#ifdef AARCH64 - stxr(heap_end/*scratched*/, obj_end, top_addr); - cbnz_w(heap_end, retry); -#else atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); b(retry, ne); -#endif // AARCH64 } // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. @@ -1320,50 +1014,14 @@ Label loop; const Register ptr = start; -#ifdef AARCH64 - // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x - const Register size = tmp; - Label remaining, done; - - sub(size, end, start); - -#ifdef ASSERT - { Label L; - tst(size, wordSize - 1); - b(L, eq); - stop("size is not a multiple of wordSize"); - bind(L); - } -#endif // ASSERT - - subs(size, size, wordSize); - b(remaining, le); - - // Zero by 2 words per iteration. - bind(loop); - subs(size, size, 2*wordSize); - stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); - b(loop, gt); - - bind(remaining); - b(done, ne); - str(ZR, Address(ptr)); - bind(done); -#else mov(tmp, 0); bind(loop); cmp(ptr, end); str(tmp, Address(ptr, wordSize, post_indexed), lo); b(loop, lo); -#endif // AARCH64 } void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { -#ifdef AARCH64 - ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); - add_rc(tmp, tmp, size_in_bytes); - str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); -#else // Bump total bytes allocated by this thread Label done; @@ -1401,7 +1059,6 @@ // Unborrow the Rthread sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset())); -#endif // AARCH64 } void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { @@ -1411,16 +1068,9 @@ sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); strb(R0, Address(tmp)); -#ifdef AARCH64 - for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { - sub(tmp, tmp, page_size); - strb(R0, Address(tmp)); - } -#else for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { strb(R0, Address(tmp, -0xff0, pre_indexed)); } -#endif // AARCH64 } } @@ -1430,16 +1080,9 @@ mov(tmp, SP); add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); -#ifdef AARCH64 - sub(tmp, tmp, Rsize); - bind(loop); - subs(Rsize, Rsize, os::vm_page_size()); - strb(ZR, Address(tmp, Rsize)); -#else bind(loop); subs(Rsize, Rsize, 0xff0); strb(R0, Address(tmp, -0xff0, pre_indexed)); -#endif // AARCH64 b(loop, hi); } } @@ -1462,24 +1105,10 @@ ldr_literal(R0, Lmsg); // message mov(R1, SP); // register save area -#ifdef AARCH64 - ldr_literal(Rtemp, Ldebug); - br(Rtemp); -#else ldr_literal(PC, Ldebug); // call MacroAssembler::debug -#endif // AARCH64 -#if defined(COMPILER2) && defined(AARCH64) - int off = offset(); -#endif bind_literal(Lmsg); bind_literal(Ldebug); -#if defined(COMPILER2) && defined(AARCH64) - if (offset() - off == 2 * wordSize) { - // no padding, so insert nop for worst-case sizing - nop(); - } -#endif } void MacroAssembler::warn(const char* msg) { @@ -1495,12 +1124,6 @@ int push_size = save_caller_save_registers(); -#ifdef AARCH64 - // TODO-AARCH64 - get rid of extra debug parameters - mov(R1, LR); - mov(R2, FP); - add(R3, SP, push_size); -#endif ldr_literal(R0, Lmsg); // message ldr_literal(LR, Lwarn); // call warning @@ -1519,42 +1142,16 @@ int MacroAssembler::save_all_registers() { // This code pattern is matched in NativeIntruction::is_save_all_registers. // Update it at modifications. -#ifdef AARCH64 - const Register tmp = Rtemp; - raw_push(R30, ZR); - for (int i = 28; i >= 0; i -= 2) { - raw_push(as_Register(i), as_Register(i+1)); - } - mov_pc_to(tmp); - str(tmp, Address(SP, 31*wordSize)); - ldr(tmp, Address(SP, tmp->encoding()*wordSize)); - return 32*wordSize; -#else push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); return 15*wordSize; -#endif // AARCH64 } void MacroAssembler::restore_all_registers() { -#ifdef AARCH64 - for (int i = 0; i <= 28; i += 2) { - raw_pop(as_Register(i), as_Register(i+1)); - } - raw_pop(R30, ZR); -#else pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers add(SP, SP, wordSize); // discard saved PC -#endif // AARCH64 } int MacroAssembler::save_caller_save_registers() { -#ifdef AARCH64 - for (int i = 0; i <= 16; i += 2) { - raw_push(as_Register(i), as_Register(i+1)); - } - raw_push(R18, LR); - return 20*wordSize; -#else #if R9_IS_SCRATCHED // Save also R10 to preserve alignment push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); @@ -1563,22 +1160,14 @@ push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); return 6*wordSize; #endif -#endif // AARCH64 } void MacroAssembler::restore_caller_save_registers() { -#ifdef AARCH64 - raw_pop(R18, LR); - for (int i = 16; i >= 0; i -= 2) { - raw_pop(as_Register(i), as_Register(i+1)); - } -#else #if R9_IS_SCRATCHED pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); #else pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); #endif -#endif // AARCH64 } void MacroAssembler::debug(const char* msg, const intx* registers) { @@ -1592,23 +1181,14 @@ BytecodeCounter::print(); } if (os::message_box(msg, "Execution stopped, print registers?")) { -#ifdef AARCH64 - // saved registers: R0-R30, PC - const int nregs = 32; -#else // saved registers: R0-R12, LR, PC const int nregs = 15; const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; -#endif // AARCH64 - for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { - tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); + for (int i = 0; i < nregs; i++) { + tty->print_cr("%s = " INTPTR_FORMAT, regs[i]->name(), registers[i]); } -#ifdef AARCH64 - tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); -#endif // AARCH64 - // derive original SP value from the address of register save area tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); } @@ -1652,24 +1232,6 @@ } } -#ifdef AARCH64 - -// Serializes memory. -// tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM -void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { - if (!os::is_MP()) return; - - // TODO-AARCH64 investigate dsb vs dmb effects - if (order_constraint == StoreStore) { - dmb(DMB_st); - } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { - dmb(DMB_ld); - } else { - dmb(DMB_all); - } -} - -#else // Serializes memory. Potentially blows flags and reg. // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) @@ -1700,7 +1262,6 @@ } } -#endif // AARCH64 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" // on failure, so fall-through can only mean success. @@ -1723,36 +1284,6 @@ // reordering we must issue a StoreStore or Release barrier before // the CAS store. -#ifdef AARCH64 - - Register Rscratch = tmp; - Register Roop = base; - Register mark = oldval; - Register Rbox = newval; - Label loop; - - assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); - - // Instead of StoreStore here, we use store-release-exclusive below - - bind(loop); - - ldaxr(tmp, base); // acquire - cmp(tmp, oldval); - b(slow_case, ne); - stlxr(tmp, newval, base); // release - if (one_shot) { - cmp_w(tmp, 0); - } else { - cbnz_w(tmp, loop); - fallthrough_is_success = true; - } - - // MemBarAcquireLock would normally go here, but - // we already do ldaxr+stlxr above, which has - // Sequential Consistency - -#else membar(MacroAssembler::StoreStore, noreg); if (one_shot) { @@ -1770,7 +1301,6 @@ // the load and store in the CAS sequence, so play it safe and // do a full fence. membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); -#endif if (!fallthrough_is_success && !allow_fallthrough_on_failure) { b(slow_case, ne); } @@ -1785,24 +1315,6 @@ assert_different_registers(oldval,newval,base,tmp); -#ifdef AARCH64 - Label loop; - - assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); - - bind(loop); - ldxr(tmp, base); - cmp(tmp, oldval); - b(slow_case, ne); - // MemBarReleaseLock barrier - stlxr(tmp, newval, base); - if (one_shot) { - cmp_w(tmp, 0); - } else { - cbnz_w(tmp, loop); - fallthrough_is_success = true; - } -#else // MemBarReleaseLock barrier // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, // but that doesn't prevent a load or store from floating down between @@ -1818,7 +1330,6 @@ } else { atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); } -#endif if (!fallthrough_is_success && !allow_fallthrough_on_failure) { b(slow_case, ne); } @@ -1843,21 +1354,6 @@ b(done, inverse(cond)); } -#ifdef AARCH64 - raw_push(R0, R1); - raw_push(R2, ZR); - - ldr_literal(R0, counter_addr_literal); - - bind(retry); - ldxr_w(R1, R0); - add_w(R1, R1, 1); - stxr_w(R2, R1, R0); - cbnz_w(R2, retry); - - raw_pop(R2, ZR); - raw_pop(R0, R1); -#else push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); ldr_literal(R0, counter_addr_literal); @@ -1872,7 +1368,6 @@ msr(CPSR_fsxc, Rtemp); pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); -#endif // AARCH64 b(done); bind_literal(counter_addr_literal); @@ -1958,11 +1453,7 @@ orr(tmp_reg, tmp_reg, Rthread); eor(tmp_reg, tmp_reg, swap_reg); -#ifdef AARCH64 - ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); -#else bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); -#endif // AARCH64 #ifndef PRODUCT if (counters != NULL) { @@ -2012,19 +1503,12 @@ // Note that we know the owner is not ourself. Hence, success can // only happen when the owner bits is 0 -#ifdef AARCH64 - // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has - // cleared bit in the middle (cms bit). So it is loaded with separate instruction. - mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); - andr(swap_reg, swap_reg, tmp2); -#else // until the assembler can be made smarter, we need to make some assumptions about the values // so we can optimize this: assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) -#endif // AARCH64 orr(tmp_reg, swap_reg, Rthread); // new mark @@ -2052,13 +1536,8 @@ eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) // owner bits 'random'. Set them to Rthread. -#ifdef AARCH64 - mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); - andr(tmp_reg, tmp_reg, tmp2); -#else mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); -#endif // AARCH64 orr(tmp_reg, tmp_reg, Rthread); // new mark @@ -2087,13 +1566,8 @@ eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) // owner bits 'random'. Clear them -#ifdef AARCH64 - mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); - andr(tmp_reg, tmp_reg, tmp2); -#else mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); -#endif // AARCH64 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); @@ -2149,29 +1623,6 @@ ////////////////////////////////////////////////////////////////////////////////// -#ifdef AARCH64 - -void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { - switch (size_in_bytes) { - case 8: ldr(dst, src); break; - case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; - case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; - case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; - default: ShouldNotReachHere(); - } -} - -void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { - switch (size_in_bytes) { - case 8: str(src, dst); break; - case 4: str_32(src, dst); break; - case 2: strh(src, dst); break; - case 1: strb(src, dst); break; - default: ShouldNotReachHere(); - } -} - -#else void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, AsmCondition cond) { @@ -2192,7 +1643,6 @@ default: ShouldNotReachHere(); } } -#endif // AARCH64 // Look up the method for a megamorphic invokeinterface call. // The target method is determined by . @@ -2225,24 +1675,12 @@ Label loop; bind(loop); ldr(Rtmp, Address(Rscan, entry_size, post_indexed)); -#ifdef AARCH64 - Label found; - cmp(Rtmp, Rintf); - b(found, eq); - cbnz(Rtmp, loop); -#else cmp(Rtmp, Rintf); // set ZF and CF if interface is found cmn(Rtmp, 0, ne); // check if tmp == 0 and clear CF if it is b(loop, ne); -#endif // AARCH64 -#ifdef AARCH64 - b(L_no_such_interface); - bind(found); -#else // CF == 0 means we reached the end of itable without finding icklass b(L_no_such_interface, cc); -#endif // !AARCH64 if (method_result != noreg) { // Interface found at previous position of Rscan, now load the method @@ -2316,31 +1754,20 @@ } void MacroAssembler::floating_cmp(Register dst) { -#ifdef AARCH64 - NOT_TESTED(); - cset(dst, gt); // 1 if '>', else 0 - csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 -#else vmrs(dst, FPSCR); orr(dst, dst, 0x08000000); eor(dst, dst, AsmOperand(dst, lsl, 3)); mov(dst, AsmOperand(dst, asr, 30)); -#endif } void MacroAssembler::restore_default_fp_mode() { -#ifdef AARCH64 - msr(SysReg_FPCR, ZR); -#else #ifndef __SOFTFP__ // Round to Near mode, IEEE compatible, masked exceptions mov(Rtemp, 0); vmsr(FPSCR, Rtemp); #endif // !__SOFTFP__ -#endif // AARCH64 } -#ifndef AARCH64 // 24-bit word range == 26-bit byte range bool check26(int offset) { // this could be simplified, but it mimics encoding and decoding @@ -2350,7 +1777,6 @@ int decoded = encoded << 8 >> 6; return offset == decoded; } -#endif // !AARCH64 // Perform some slight adjustments so the default 32MB code cache // is fully reachable. @@ -2361,18 +1787,6 @@ return CodeCache::high_bound() - Assembler::InstructionSize; } -#ifdef AARCH64 -// Can we reach target using ADRP? -bool MacroAssembler::page_reachable_from_cache(address target) { - intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; - intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; - intptr_t addr = (intptr_t)target & ~0xfff; - - intptr_t loffset = addr - cl; - intptr_t hoffset = addr - ch; - return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); -} -#endif // Can we reach target using unconditional branch or call from anywhere // in the code cache (because code can be relocated)? @@ -2397,11 +1811,7 @@ intptr_t loffset = (intptr_t)target - (intptr_t)cl; intptr_t hoffset = (intptr_t)target - (intptr_t)ch; -#ifdef AARCH64 - return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); -#else return check26(loffset - 8) && check26(hoffset - 8); -#endif } bool MacroAssembler::reachable_from_cache(address target) { @@ -2421,11 +1831,11 @@ return _cache_fully_reachable(); } -void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { +void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch, AsmCondition cond) { assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); if (reachable_from_cache(target)) { relocate(rtype); - b(target NOT_AARCH64_ARG(cond)); + b(target, cond); return; } @@ -2435,20 +1845,6 @@ rtype = relocInfo::none; } -#ifdef AARCH64 - assert (scratch != noreg, "should be specified"); - InlinedAddress address_literal(target, rtype); - ldr_literal(scratch, address_literal); - br(scratch); - int off = offset(); - bind_literal(address_literal); -#ifdef COMPILER2 - if (offset() - off == wordSize) { - // no padding, so insert nop for worst-case sizing - nop(); - } -#endif -#else if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { // Note: this version cannot be (atomically) patched mov_slow(scratch, (intptr_t)target, cond); @@ -2464,20 +1860,19 @@ bind_literal(address_literal); bind(skip); } -#endif // AARCH64 } // Similar to jump except that: // - near calls are valid only if any destination in the cache is near // - no movt/movw (not atomically patchable) -void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { +void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch, AsmCondition cond) { assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); if (cache_fully_reachable()) { // Note: this assumes that all possible targets (the initial one // and the addressed patched to) are all in the code cache. assert(CodeCache::contains(target), "target might be too far"); relocate(rtype); - b(target NOT_AARCH64_ARG(cond)); + b(target, cond); return; } @@ -2487,21 +1882,6 @@ rtype = relocInfo::none; } -#ifdef AARCH64 - assert (scratch != noreg, "should be specified"); - InlinedAddress address_literal(target); - relocate(rtype); - ldr_literal(scratch, address_literal); - br(scratch); - int off = offset(); - bind_literal(address_literal); -#ifdef COMPILER2 - if (offset() - off == wordSize) { - // no padding, so insert nop for worst-case sizing - nop(); - } -#endif -#else { Label skip; InlinedAddress address_literal(target); @@ -2513,15 +1893,14 @@ bind_literal(address_literal); bind(skip); } -#endif // AARCH64 } -void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { +void MacroAssembler::call(address target, RelocationHolder rspec, AsmCondition cond) { Register scratch = LR; assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); if (reachable_from_cache(target)) { relocate(rspec); - bl(target NOT_AARCH64_ARG(cond)); + bl(target, cond); return; } @@ -2532,31 +1911,20 @@ rspec = RelocationHolder::none; } -#ifndef AARCH64 if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { // Note: this version cannot be (atomically) patched mov_slow(scratch, (intptr_t)target, cond); blx(scratch, cond); return; } -#endif { Label ret_addr; -#ifndef AARCH64 if (cond != al) { b(ret_addr, inverse(cond)); } -#endif -#ifdef AARCH64 - // TODO-AARCH64: make more optimal implementation - // [ Keep in sync with MacroAssembler::call_size ] - assert(rspec.type() == relocInfo::none, "call reloc not implemented"); - mov_slow(scratch, target); - blr(scratch); -#else InlinedAddress address_literal(target); relocate(rspec); adr(LR, ret_addr); @@ -2564,18 +1932,9 @@ bind_literal(address_literal); bind(ret_addr); -#endif } } -#if defined(AARCH64) && defined(COMPILER2) -int MacroAssembler::call_size(address target, bool far, bool patchable) { - // FIXME: mov_slow is variable-length - if (!far) return 1; // bl - if (patchable) return 2; // ldr; blr - return instr_count_for_mov_slow((intptr_t)target) + 1; -} -#endif int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { assert(rspec.type() == relocInfo::static_call_type || @@ -2590,38 +1949,10 @@ assert(CodeCache::contains(target), "target might be too far"); bl(target); } else { -#if defined(AARCH64) && defined(COMPILER2) - if (c2) { - // return address needs to match call_size(). - // no need to trash Rtemp - int off = offset(); - Label skip_literal; - InlinedAddress address_literal(target); - ldr_literal(LR, address_literal); - blr(LR); - int ret_addr_offset = offset(); - assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); - b(skip_literal); - int off2 = offset(); - bind_literal(address_literal); - if (offset() - off2 == wordSize) { - // no padding, so insert nop for worst-case sizing - nop(); - } - bind(skip_literal); - return ret_addr_offset; - } -#endif Label ret_addr; InlinedAddress address_literal(target); -#ifdef AARCH64 - ldr_literal(Rtemp, address_literal); - adr(LR, ret_addr); - br(Rtemp); -#else adr(LR, ret_addr); ldr_literal(PC, address_literal); -#endif bind_literal(address_literal); bind(ret_addr); } @@ -2648,47 +1979,17 @@ // Compressed pointers -#ifdef AARCH64 - -void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { - if (UseCompressedClassPointers) { - ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); - decode_klass_not_null(dst_klass); - } else { - ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); - } -} - -#else void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); } -#endif // AARCH64 // Blows src_klass. void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { -#ifdef AARCH64 - if (UseCompressedClassPointers) { - assert(src_klass != dst_oop, "not enough registers"); - encode_klass_not_null(src_klass); - str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); - return; - } -#endif // AARCH64 str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); } -#ifdef AARCH64 - -void MacroAssembler::store_klass_gap(Register dst) { - if (UseCompressedClassPointers) { - str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); - } -} - -#endif // AARCH64 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { @@ -2729,265 +2030,9 @@ } -#ifdef AARCH64 - -// Algorithm must match oop.inline.hpp encode_heap_oop. -void MacroAssembler::encode_heap_oop(Register dst, Register src) { - // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. - // Update it at modifications. - assert (UseCompressedOops, "must be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); -#ifdef ASSERT - verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); -#endif - verify_oop(src); - if (Universe::narrow_oop_base() == NULL) { - if (Universe::narrow_oop_shift() != 0) { - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - _lsr(dst, src, Universe::narrow_oop_shift()); - } else if (dst != src) { - mov(dst, src); - } - } else { - tst(src, src); - csel(dst, Rheap_base, src, eq); - sub(dst, dst, Rheap_base); - if (Universe::narrow_oop_shift() != 0) { - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - _lsr(dst, dst, Universe::narrow_oop_shift()); - } - } -} - -// Same algorithm as oop.inline.hpp decode_heap_oop. -void MacroAssembler::decode_heap_oop(Register dst, Register src) { -#ifdef ASSERT - verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); -#endif - assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - if (Universe::narrow_oop_base() != NULL) { - tst(src, src); - add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); - csel(dst, dst, ZR, ne); - } else { - _lsl(dst, src, Universe::narrow_oop_shift()); - } - verify_oop(dst); -} - -#ifdef COMPILER2 -// Algorithm must match oop.inline.hpp encode_heap_oop. -// Must preserve condition codes, or C2 encodeHeapOop_not_null rule -// must be changed. -void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { - assert (UseCompressedOops, "must be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); -#ifdef ASSERT - verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); -#endif - verify_oop(src); - if (Universe::narrow_oop_base() == NULL) { - if (Universe::narrow_oop_shift() != 0) { - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - _lsr(dst, src, Universe::narrow_oop_shift()); - } else if (dst != src) { - mov(dst, src); - } - } else { - sub(dst, src, Rheap_base); - if (Universe::narrow_oop_shift() != 0) { - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - _lsr(dst, dst, Universe::narrow_oop_shift()); - } - } -} - -// Same algorithm as oops.inline.hpp decode_heap_oop. -// Must preserve condition codes, or C2 decodeHeapOop_not_null rule -// must be changed. -void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { -#ifdef ASSERT - verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); -#endif - assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - if (Universe::narrow_oop_base() != NULL) { - add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); - } else { - _lsl(dst, src, Universe::narrow_oop_shift()); - } - verify_oop(dst); -} - -void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { - assert(UseCompressedClassPointers, "should only be used for compressed header"); - assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int klass_index = oop_recorder()->find_index(k); - RelocationHolder rspec = metadata_Relocation::spec(klass_index); - - // Relocation with special format (see relocInfo_arm.hpp). - relocate(rspec); - narrowKlass encoded_k = Klass::encode_klass(k); - movz(dst, encoded_k & 0xffff, 0); - movk(dst, (encoded_k >> 16) & 0xffff, 16); -} - -void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { - assert(UseCompressedOops, "should only be used for compressed header"); - assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int oop_index = oop_recorder()->find_index(obj); - RelocationHolder rspec = oop_Relocation::spec(oop_index); - - relocate(rspec); - movz(dst, 0xffff, 0); - movk(dst, 0xffff, 16); -} - -#endif // COMPILER2 -// Must preserve condition codes, or C2 encodeKlass_not_null rule -// must be changed. -void MacroAssembler::encode_klass_not_null(Register r) { - if (Universe::narrow_klass_base() != NULL) { - // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. - assert(r != Rheap_base, "Encoding a klass in Rheap_base"); - mov_slow(Rheap_base, Universe::narrow_klass_base()); - sub(r, r, Rheap_base); - } - if (Universe::narrow_klass_shift() != 0) { - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - _lsr(r, r, Universe::narrow_klass_shift()); - } - if (Universe::narrow_klass_base() != NULL) { - reinit_heapbase(); - } -} - -// Must preserve condition codes, or C2 encodeKlass_not_null rule -// must be changed. -void MacroAssembler::encode_klass_not_null(Register dst, Register src) { - if (dst == src) { - encode_klass_not_null(src); - return; - } - if (Universe::narrow_klass_base() != NULL) { - mov_slow(dst, (int64_t)Universe::narrow_klass_base()); - sub(dst, src, dst); - if (Universe::narrow_klass_shift() != 0) { - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - _lsr(dst, dst, Universe::narrow_klass_shift()); - } - } else { - if (Universe::narrow_klass_shift() != 0) { - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - _lsr(dst, src, Universe::narrow_klass_shift()); - } else { - mov(dst, src); - } - } -} - -// Function instr_count_for_decode_klass_not_null() counts the instructions -// generated by decode_klass_not_null(register r) and reinit_heapbase(), -// when (Universe::heap() != NULL). Hence, if the instructions they -// generate change, then this method needs to be updated. -int MacroAssembler::instr_count_for_decode_klass_not_null() { - assert(UseCompressedClassPointers, "only for compressed klass ptrs"); - assert(Universe::heap() != NULL, "java heap should be initialized"); - if (Universe::narrow_klass_base() != NULL) { - return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow - 1 + // add - instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow - } else { - if (Universe::narrow_klass_shift() != 0) { - return 1; - } - } - return 0; -} - -// Must preserve condition codes, or C2 decodeKlass_not_null rule -// must be changed. -void MacroAssembler::decode_klass_not_null(Register r) { - int off = offset(); - assert(UseCompressedClassPointers, "should only be used for compressed headers"); - assert(Universe::heap() != NULL, "java heap should be initialized"); - assert(r != Rheap_base, "Decoding a klass in Rheap_base"); - // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. - // Also do not verify_oop as this is called by verify_oop. - if (Universe::narrow_klass_base() != NULL) { - // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. - mov_slow(Rheap_base, Universe::narrow_klass_base()); - add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); - reinit_heapbase(); - } else { - if (Universe::narrow_klass_shift() != 0) { - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - _lsl(r, r, Universe::narrow_klass_shift()); - } - } - assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); -} - -// Must preserve condition codes, or C2 decodeKlass_not_null rule -// must be changed. -void MacroAssembler::decode_klass_not_null(Register dst, Register src) { - if (src == dst) { - decode_klass_not_null(src); - return; - } - - assert(UseCompressedClassPointers, "should only be used for compressed headers"); - assert(Universe::heap() != NULL, "java heap should be initialized"); - assert(src != Rheap_base, "Decoding a klass in Rheap_base"); - assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); - // Also do not verify_oop as this is called by verify_oop. - if (Universe::narrow_klass_base() != NULL) { - mov_slow(dst, Universe::narrow_klass_base()); - add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); - } else { - _lsl(dst, src, Universe::narrow_klass_shift()); - } -} - - -void MacroAssembler::reinit_heapbase() { - if (UseCompressedOops || UseCompressedClassPointers) { - if (Universe::heap() != NULL) { - mov_slow(Rheap_base, Universe::narrow_ptrs_base()); - } else { - ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); - } - } -} - -#ifdef ASSERT -void MacroAssembler::verify_heapbase(const char* msg) { - // This code pattern is matched in NativeIntruction::skip_verify_heapbase. - // Update it at modifications. - assert (UseCompressedOops, "should be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - if (CheckCompressedOops) { - Label ok; - str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); - raw_push(Rtemp, ZR); - mrs(Rtemp, Assembler::SysReg_NZCV); - str(Rtemp, Address(SP, 1 * wordSize)); - mov_slow(Rtemp, Universe::narrow_ptrs_base()); - cmp(Rheap_base, Rtemp); - b(ok, eq); - stop(msg); - bind(ok); - ldr(Rtemp, Address(SP, 1 * wordSize)); - msr(Assembler::SysReg_NZCV, Rtemp); - raw_pop(Rtemp, ZR); - str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); - } -} -#endif // ASSERT - -#endif // AARCH64 #ifdef COMPILER2 -void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) +void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2) { assert(VM_Version::supports_ldrex(), "unsupported, yet?"); @@ -3002,11 +2047,7 @@ if (UseBiasedLocking && !UseOptoBiasInlining) { Label failed; -#ifdef AARCH64 - biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); -#else biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); -#endif bind(failed); } @@ -3017,17 +2058,6 @@ // Check for recursive lock // See comments in InterpreterMacroAssembler::lock_object for // explanations on the fast recursive locking check. -#ifdef AARCH64 - intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); - Assembler::LogicalImmediate imm(mask, false); - mov(Rscratch, SP); - sub(Rscratch, Rmark, Rscratch); - ands(Rscratch, Rscratch, imm); - // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107) - str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); - b(done); - -#else // -1- test low 2 bits movs(Rscratch, AsmOperand(Rmark, lsl, 30)); // -2- test (hdr - SP) if the low two bits are 0 @@ -3037,7 +2067,6 @@ // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107) str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); b(done); -#endif bind(fast_lock); str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); @@ -3050,7 +2079,7 @@ } -void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) +void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2) { assert(VM_Version::supports_ldrex(), "unsupported, yet?"); --- old/src/hotspot/cpu/arm/macroAssembler_arm.hpp 2018-09-17 10:30:22.049899525 -0400 +++ new/src/hotspot/cpu/arm/macroAssembler_arm.hpp 2018-09-17 10:30:21.402861733 -0400 @@ -229,10 +229,6 @@ // this was subsequently modified to its present name and return type virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset); -#ifdef AARCH64 -# define NOT_IMPLEMENTED() unimplemented("NYI at " __FILE__ ":" XSTR(__LINE__)) -# define NOT_TESTED() warn("Not tested at " __FILE__ ":" XSTR(__LINE__)) -#endif void align(int modulus); @@ -275,7 +271,7 @@ // Always sets/resets sp, which default to SP if (last_sp == noreg) // Optionally sets/resets fp (use noreg to avoid setting it) - // Always sets/resets pc on AArch64; optionally sets/resets pc on 32-bit ARM depending on save_last_java_pc flag + // Optionally sets/resets pc depending on save_last_java_pc flag // Note: when saving PC, set_last_Java_frame returns PC's offset in the code section // (for oop_maps offset computation) int set_last_Java_frame(Register last_sp, Register last_fp, bool save_last_java_pc, Register tmp); @@ -401,7 +397,6 @@ void resolve_jobject(Register value, Register tmp1, Register tmp2); -#ifndef AARCH64 void nop() { mov(R0, R0); } @@ -441,7 +436,6 @@ void fpops(FloatRegister fd, AsmCondition cond = al) { fldmias(SP, FloatRegisterSet(fd), writeback, cond); } -#endif // !AARCH64 // Order access primitives enum Membar_mask_bits { @@ -451,15 +445,10 @@ LoadLoad = 1 << 0 }; -#ifdef AARCH64 - // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM - void membar(Membar_mask_bits order_constraint, Register tmp = noreg); -#else void membar(Membar_mask_bits mask, Register tmp, bool preserve_flags = true, Register load_tgt = noreg); -#endif void breakpoint(AsmCondition cond = al); void stop(const char* msg); @@ -491,47 +480,28 @@ void add_slow(Register rd, Register rn, int c); void sub_slow(Register rd, Register rn, int c); -#ifdef AARCH64 - static int mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm /* optional */); -#endif - void mov_slow(Register rd, intptr_t c NOT_AARCH64_ARG(AsmCondition cond = al)); + void mov_slow(Register rd, intptr_t c, AsmCondition cond = al); void mov_slow(Register rd, const char *string); void mov_slow(Register rd, address addr); void patchable_mov_oop(Register rd, jobject o, int oop_index) { - mov_oop(rd, o, oop_index AARCH64_ONLY_ARG(true)); + mov_oop(rd, o, oop_index); } - void mov_oop(Register rd, jobject o, int index = 0 - AARCH64_ONLY_ARG(bool patchable = false) - NOT_AARCH64_ARG(AsmCondition cond = al)); - + void mov_oop(Register rd, jobject o, int index = 0, AsmCondition cond = al); void patchable_mov_metadata(Register rd, Metadata* o, int index) { - mov_metadata(rd, o, index AARCH64_ONLY_ARG(true)); + mov_metadata(rd, o, index); } - void mov_metadata(Register rd, Metadata* o, int index = 0 AARCH64_ONLY_ARG(bool patchable = false)); + void mov_metadata(Register rd, Metadata* o, int index = 0); - void mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond = al)); - void mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond = al)); + void mov_float(FloatRegister fd, jfloat c, AsmCondition cond = al); + void mov_double(FloatRegister fd, jdouble c, AsmCondition cond = al); -#ifdef AARCH64 - int mov_pc_to(Register rd) { - Label L; - adr(rd, L); - bind(L); - return offset(); - } -#endif // Note: this variant of mov_address assumes the address moves with // the code. Do *not* implement it with non-relocated instructions, // unless PC-relative. -#ifdef AARCH64 - void mov_relative_address(Register rd, address addr) { - adr(rd, addr); - } -#else void mov_relative_address(Register rd, address addr, AsmCondition cond = al) { int offset = addr - pc() - 8; assert((offset & 3) == 0, "bad alignment"); @@ -543,7 +513,6 @@ sub(rd, PC, -offset, cond); } } -#endif // AARCH64 // Runtime address that may vary from one execution to another. The // symbolic_reference describes what the address is, allowing @@ -564,7 +533,6 @@ mov_slow(rd, (intptr_t)addr); return; } -#ifndef AARCH64 if (VM_Version::supports_movw()) { relocate(rspec); int c = (int)addr; @@ -574,15 +542,11 @@ } return; } -#endif Label skip_literal; InlinedAddress addr_literal(addr, rspec); ldr_literal(rd, addr_literal); b(skip_literal); bind_literal(addr_literal); - // AARCH64 WARNING: because of alignment padding, extra padding - // may be required to get a consistent size for C2, or rules must - // overestimate size see MachEpilogNode::size bind(skip_literal); } @@ -596,45 +560,28 @@ assert(L.rspec().type() != relocInfo::runtime_call_type, "avoid ldr_literal for calls"); assert(L.rspec().type() != relocInfo::static_call_type, "avoid ldr_literal for calls"); relocate(L.rspec()); -#ifdef AARCH64 - ldr(rd, target(L.label)); -#else ldr(rd, Address(PC, target(L.label) - pc() - 8)); -#endif } void ldr_literal(Register rd, InlinedString& L) { const char* msg = L.msg(); if (code()->consts()->contains((address)msg)) { // string address moves with the code -#ifdef AARCH64 - ldr(rd, (address)msg); -#else ldr(rd, Address(PC, ((address)msg) - pc() - 8)); -#endif return; } // Warning: use external strings with care. They are not relocated // if the code moves. If needed, use code_string to move them // to the consts section. -#ifdef AARCH64 - ldr(rd, target(L.label)); -#else ldr(rd, Address(PC, target(L.label) - pc() - 8)); -#endif } void ldr_literal(Register rd, InlinedMetadata& L) { // relocation done in the bind_literal for metadatas -#ifdef AARCH64 - ldr(rd, target(L.label)); -#else ldr(rd, Address(PC, target(L.label) - pc() - 8)); -#endif } void bind_literal(InlinedAddress& L) { - AARCH64_ONLY(align(wordSize)); bind(L.label); assert(L.rspec().type() != relocInfo::metadata_type, "Must use InlinedMetadata"); // We currently do not use oop 'bound' literals. @@ -652,13 +599,11 @@ // to detect errors. return; } - AARCH64_ONLY(align(wordSize)); bind(L.label); AbstractAssembler::emit_address((address)L.msg()); } void bind_literal(InlinedMetadata& L) { - AARCH64_ONLY(align(wordSize)); bind(L.label); relocate(metadata_Relocation::spec_for_immediate()); AbstractAssembler::emit_address((address)L.data()); @@ -667,138 +612,106 @@ void resolve_oop_handle(Register result); void load_mirror(Register mirror, Register method, Register tmp); - // Porting layer between 32-bit ARM and AArch64 - -#define COMMON_INSTR_1(common_mnemonic, aarch64_mnemonic, arm32_mnemonic, arg_type) \ +#define ARM_INSTR_1(common_mnemonic, arm32_mnemonic, arg_type) \ void common_mnemonic(arg_type arg) { \ - AARCH64_ONLY(aarch64_mnemonic) NOT_AARCH64(arm32_mnemonic) (arg); \ + arm32_mnemonic(arg); \ } -#define COMMON_INSTR_2(common_mnemonic, aarch64_mnemonic, arm32_mnemonic, arg1_type, arg2_type) \ +#define ARM_INSTR_2(common_mnemonic, arm32_mnemonic, arg1_type, arg2_type) \ void common_mnemonic(arg1_type arg1, arg2_type arg2) { \ - AARCH64_ONLY(aarch64_mnemonic) NOT_AARCH64(arm32_mnemonic) (arg1, arg2); \ + arm32_mnemonic(arg1, arg2); \ } -#define COMMON_INSTR_3(common_mnemonic, aarch64_mnemonic, arm32_mnemonic, arg1_type, arg2_type, arg3_type) \ +#define ARM_INSTR_3(common_mnemonic, arm32_mnemonic, arg1_type, arg2_type, arg3_type) \ void common_mnemonic(arg1_type arg1, arg2_type arg2, arg3_type arg3) { \ - AARCH64_ONLY(aarch64_mnemonic) NOT_AARCH64(arm32_mnemonic) (arg1, arg2, arg3); \ + arm32_mnemonic(arg1, arg2, arg3); \ } - COMMON_INSTR_1(jump, br, bx, Register) - COMMON_INSTR_1(call, blr, blx, Register) + ARM_INSTR_1(jump, bx, Register) + ARM_INSTR_1(call, blx, Register) - COMMON_INSTR_2(cbz_32, cbz_w, cbz, Register, Label&) - COMMON_INSTR_2(cbnz_32, cbnz_w, cbnz, Register, Label&) + ARM_INSTR_2(cbz_32, cbz, Register, Label&) + ARM_INSTR_2(cbnz_32, cbnz, Register, Label&) - COMMON_INSTR_2(ldr_u32, ldr_w, ldr, Register, Address) - COMMON_INSTR_2(ldr_s32, ldrsw, ldr, Register, Address) - COMMON_INSTR_2(str_32, str_w, str, Register, Address) - - COMMON_INSTR_2(mvn_32, mvn_w, mvn, Register, Register) - COMMON_INSTR_2(cmp_32, cmp_w, cmp, Register, Register) - COMMON_INSTR_2(neg_32, neg_w, neg, Register, Register) - COMMON_INSTR_2(clz_32, clz_w, clz, Register, Register) - COMMON_INSTR_2(rbit_32, rbit_w, rbit, Register, Register) - - COMMON_INSTR_2(cmp_32, cmp_w, cmp, Register, int) - COMMON_INSTR_2(cmn_32, cmn_w, cmn, Register, int) - - COMMON_INSTR_3(add_32, add_w, add, Register, Register, Register) - COMMON_INSTR_3(sub_32, sub_w, sub, Register, Register, Register) - COMMON_INSTR_3(subs_32, subs_w, subs, Register, Register, Register) - COMMON_INSTR_3(mul_32, mul_w, mul, Register, Register, Register) - COMMON_INSTR_3(and_32, andr_w, andr, Register, Register, Register) - COMMON_INSTR_3(orr_32, orr_w, orr, Register, Register, Register) - COMMON_INSTR_3(eor_32, eor_w, eor, Register, Register, Register) - - COMMON_INSTR_3(add_32, add_w, add, Register, Register, AsmOperand) - COMMON_INSTR_3(sub_32, sub_w, sub, Register, Register, AsmOperand) - COMMON_INSTR_3(orr_32, orr_w, orr, Register, Register, AsmOperand) - COMMON_INSTR_3(eor_32, eor_w, eor, Register, Register, AsmOperand) - COMMON_INSTR_3(and_32, andr_w, andr, Register, Register, AsmOperand) - - - COMMON_INSTR_3(add_32, add_w, add, Register, Register, int) - COMMON_INSTR_3(adds_32, adds_w, adds, Register, Register, int) - COMMON_INSTR_3(sub_32, sub_w, sub, Register, Register, int) - COMMON_INSTR_3(subs_32, subs_w, subs, Register, Register, int) - - COMMON_INSTR_2(tst_32, tst_w, tst, Register, unsigned int) - COMMON_INSTR_2(tst_32, tst_w, tst, Register, AsmOperand) - - COMMON_INSTR_3(and_32, andr_w, andr, Register, Register, uint) - COMMON_INSTR_3(orr_32, orr_w, orr, Register, Register, uint) - COMMON_INSTR_3(eor_32, eor_w, eor, Register, Register, uint) - - COMMON_INSTR_1(cmp_zero_float, fcmp0_s, fcmpzs, FloatRegister) - COMMON_INSTR_1(cmp_zero_double, fcmp0_d, fcmpzd, FloatRegister) - - COMMON_INSTR_2(ldr_float, ldr_s, flds, FloatRegister, Address) - COMMON_INSTR_2(str_float, str_s, fsts, FloatRegister, Address) - COMMON_INSTR_2(mov_float, fmov_s, fcpys, FloatRegister, FloatRegister) - COMMON_INSTR_2(neg_float, fneg_s, fnegs, FloatRegister, FloatRegister) - COMMON_INSTR_2(abs_float, fabs_s, fabss, FloatRegister, FloatRegister) - COMMON_INSTR_2(sqrt_float, fsqrt_s, fsqrts, FloatRegister, FloatRegister) - COMMON_INSTR_2(cmp_float, fcmp_s, fcmps, FloatRegister, FloatRegister) - - COMMON_INSTR_3(add_float, fadd_s, fadds, FloatRegister, FloatRegister, FloatRegister) - COMMON_INSTR_3(sub_float, fsub_s, fsubs, FloatRegister, FloatRegister, FloatRegister) - COMMON_INSTR_3(mul_float, fmul_s, fmuls, FloatRegister, FloatRegister, FloatRegister) - COMMON_INSTR_3(div_float, fdiv_s, fdivs, FloatRegister, FloatRegister, FloatRegister) - - COMMON_INSTR_2(ldr_double, ldr_d, fldd, FloatRegister, Address) - COMMON_INSTR_2(str_double, str_d, fstd, FloatRegister, Address) - COMMON_INSTR_2(mov_double, fmov_d, fcpyd, FloatRegister, FloatRegister) - COMMON_INSTR_2(neg_double, fneg_d, fnegd, FloatRegister, FloatRegister) - COMMON_INSTR_2(cmp_double, fcmp_d, fcmpd, FloatRegister, FloatRegister) - COMMON_INSTR_2(abs_double, fabs_d, fabsd, FloatRegister, FloatRegister) - COMMON_INSTR_2(sqrt_double, fsqrt_d, fsqrtd, FloatRegister, FloatRegister) - - COMMON_INSTR_3(add_double, fadd_d, faddd, FloatRegister, FloatRegister, FloatRegister) - COMMON_INSTR_3(sub_double, fsub_d, fsubd, FloatRegister, FloatRegister, FloatRegister) - COMMON_INSTR_3(mul_double, fmul_d, fmuld, FloatRegister, FloatRegister, FloatRegister) - COMMON_INSTR_3(div_double, fdiv_d, fdivd, FloatRegister, FloatRegister, FloatRegister) - - COMMON_INSTR_2(convert_f2d, fcvt_ds, fcvtds, FloatRegister, FloatRegister) - COMMON_INSTR_2(convert_d2f, fcvt_sd, fcvtsd, FloatRegister, FloatRegister) - - COMMON_INSTR_2(mov_fpr2gpr_float, fmov_ws, fmrs, Register, FloatRegister) - -#undef COMMON_INSTR_1 -#undef COMMON_INSTR_2 -#undef COMMON_INSTR_3 - - -#ifdef AARCH64 - - void mov(Register dst, Register src, AsmCondition cond) { - if (cond == al) { - mov(dst, src); - } else { - csel(dst, src, dst, cond); - } - } - - // Propagate other overloaded "mov" methods from Assembler. - void mov(Register dst, Register src) { Assembler::mov(dst, src); } - void mov(Register rd, int imm) { Assembler::mov(rd, imm); } - - void mov(Register dst, int imm, AsmCondition cond) { - assert(imm == 0 || imm == 1, ""); - if (imm == 0) { - mov(dst, ZR, cond); - } else if (imm == 1) { - csinc(dst, dst, ZR, inverse(cond)); - } else if (imm == -1) { - csinv(dst, dst, ZR, inverse(cond)); - } else { - fatal("illegal mov(R%d,%d,cond)", dst->encoding(), imm); - } - } + ARM_INSTR_2(ldr_u32, ldr, Register, Address) + ARM_INSTR_2(ldr_s32, ldr, Register, Address) + ARM_INSTR_2(str_32, str, Register, Address) + + ARM_INSTR_2(mvn_32, mvn, Register, Register) + ARM_INSTR_2(cmp_32, cmp, Register, Register) + ARM_INSTR_2(neg_32, neg, Register, Register) + ARM_INSTR_2(clz_32, clz, Register, Register) + ARM_INSTR_2(rbit_32, rbit, Register, Register) + + ARM_INSTR_2(cmp_32, cmp, Register, int) + ARM_INSTR_2(cmn_32, cmn, Register, int) + + ARM_INSTR_3(add_32, add, Register, Register, Register) + ARM_INSTR_3(sub_32, sub, Register, Register, Register) + ARM_INSTR_3(subs_32, subs, Register, Register, Register) + ARM_INSTR_3(mul_32, mul, Register, Register, Register) + ARM_INSTR_3(and_32, andr, Register, Register, Register) + ARM_INSTR_3(orr_32, orr, Register, Register, Register) + ARM_INSTR_3(eor_32, eor, Register, Register, Register) + + ARM_INSTR_3(add_32, add, Register, Register, AsmOperand) + ARM_INSTR_3(sub_32, sub, Register, Register, AsmOperand) + ARM_INSTR_3(orr_32, orr, Register, Register, AsmOperand) + ARM_INSTR_3(eor_32, eor, Register, Register, AsmOperand) + ARM_INSTR_3(and_32, andr, Register, Register, AsmOperand) + + + ARM_INSTR_3(add_32, add, Register, Register, int) + ARM_INSTR_3(adds_32, adds, Register, Register, int) + ARM_INSTR_3(sub_32, sub, Register, Register, int) + ARM_INSTR_3(subs_32, subs, Register, Register, int) + + ARM_INSTR_2(tst_32, tst, Register, unsigned int) + ARM_INSTR_2(tst_32, tst, Register, AsmOperand) + + ARM_INSTR_3(and_32, andr, Register, Register, uint) + ARM_INSTR_3(orr_32, orr, Register, Register, uint) + ARM_INSTR_3(eor_32, eor, Register, Register, uint) + + ARM_INSTR_1(cmp_zero_float, fcmpzs, FloatRegister) + ARM_INSTR_1(cmp_zero_double, fcmpzd, FloatRegister) + + ARM_INSTR_2(ldr_float, flds, FloatRegister, Address) + ARM_INSTR_2(str_float, fsts, FloatRegister, Address) + ARM_INSTR_2(mov_float, fcpys, FloatRegister, FloatRegister) + ARM_INSTR_2(neg_float, fnegs, FloatRegister, FloatRegister) + ARM_INSTR_2(abs_float, fabss, FloatRegister, FloatRegister) + ARM_INSTR_2(sqrt_float, fsqrts, FloatRegister, FloatRegister) + ARM_INSTR_2(cmp_float, fcmps, FloatRegister, FloatRegister) + + ARM_INSTR_3(add_float, fadds, FloatRegister, FloatRegister, FloatRegister) + ARM_INSTR_3(sub_float, fsubs, FloatRegister, FloatRegister, FloatRegister) + ARM_INSTR_3(mul_float, fmuls, FloatRegister, FloatRegister, FloatRegister) + ARM_INSTR_3(div_float, fdivs, FloatRegister, FloatRegister, FloatRegister) + + ARM_INSTR_2(ldr_double, fldd, FloatRegister, Address) + ARM_INSTR_2(str_double, fstd, FloatRegister, Address) + ARM_INSTR_2(mov_double, fcpyd, FloatRegister, FloatRegister) + ARM_INSTR_2(neg_double, fnegd, FloatRegister, FloatRegister) + ARM_INSTR_2(cmp_double, fcmpd, FloatRegister, FloatRegister) + ARM_INSTR_2(abs_double, fabsd, FloatRegister, FloatRegister) + ARM_INSTR_2(sqrt_double, fsqrtd, FloatRegister, FloatRegister) + + ARM_INSTR_3(add_double, faddd, FloatRegister, FloatRegister, FloatRegister) + ARM_INSTR_3(sub_double, fsubd, FloatRegister, FloatRegister, FloatRegister) + ARM_INSTR_3(mul_double, fmuld, FloatRegister, FloatRegister, FloatRegister) + ARM_INSTR_3(div_double, fdivd, FloatRegister, FloatRegister, FloatRegister) + + ARM_INSTR_2(convert_f2d, fcvtds, FloatRegister, FloatRegister) + ARM_INSTR_2(convert_d2f, fcvtsd, FloatRegister, FloatRegister) + + ARM_INSTR_2(mov_fpr2gpr_float, fmrs, Register, FloatRegister) + +#undef ARM_INSTR_1 +#undef ARM_INSTR_2 +#undef ARM_INSTR_3 - void movs(Register dst, Register src) { adds(dst, src, 0); } -#else // AARCH64 void tbz(Register rt, int bit, Label& L) { assert(0 <= bit && bit < BitsPerWord, "bit number is out of range"); @@ -831,166 +744,89 @@ bx(dst); } -#endif // AARCH64 Register zero_register(Register tmp) { -#ifdef AARCH64 - return ZR; -#else mov(tmp, 0); return tmp; -#endif } void logical_shift_left(Register dst, Register src, int shift) { -#ifdef AARCH64 - _lsl(dst, src, shift); -#else mov(dst, AsmOperand(src, lsl, shift)); -#endif } void logical_shift_left_32(Register dst, Register src, int shift) { -#ifdef AARCH64 - _lsl_w(dst, src, shift); -#else mov(dst, AsmOperand(src, lsl, shift)); -#endif } void logical_shift_right(Register dst, Register src, int shift) { -#ifdef AARCH64 - _lsr(dst, src, shift); -#else mov(dst, AsmOperand(src, lsr, shift)); -#endif } void arith_shift_right(Register dst, Register src, int shift) { -#ifdef AARCH64 - _asr(dst, src, shift); -#else mov(dst, AsmOperand(src, asr, shift)); -#endif } void asr_32(Register dst, Register src, int shift) { -#ifdef AARCH64 - _asr_w(dst, src, shift); -#else mov(dst, AsmOperand(src, asr, shift)); -#endif } // If holds, compares r1 and r2. Otherwise, flags are set so that does not hold. void cond_cmp(Register r1, Register r2, AsmCondition cond) { -#ifdef AARCH64 - ccmp(r1, r2, flags_for_condition(inverse(cond)), cond); -#else cmp(r1, r2, cond); -#endif } // If holds, compares r and imm. Otherwise, flags are set so that does not hold. void cond_cmp(Register r, int imm, AsmCondition cond) { -#ifdef AARCH64 - ccmp(r, imm, flags_for_condition(inverse(cond)), cond); -#else cmp(r, imm, cond); -#endif } void align_reg(Register dst, Register src, int align) { assert (is_power_of_2(align), "should be"); -#ifdef AARCH64 - andr(dst, src, ~(uintx)(align-1)); -#else bic(dst, src, align-1); -#endif } void prefetch_read(Address addr) { -#ifdef AARCH64 - prfm(pldl1keep, addr); -#else pld(addr); -#endif } void raw_push(Register r1, Register r2) { -#ifdef AARCH64 - stp(r1, r2, Address(SP, -2*wordSize, pre_indexed)); -#else assert(r1->encoding() < r2->encoding(), "should be ordered"); push(RegisterSet(r1) | RegisterSet(r2)); -#endif } void raw_pop(Register r1, Register r2) { -#ifdef AARCH64 - ldp(r1, r2, Address(SP, 2*wordSize, post_indexed)); -#else assert(r1->encoding() < r2->encoding(), "should be ordered"); pop(RegisterSet(r1) | RegisterSet(r2)); -#endif } void raw_push(Register r1, Register r2, Register r3) { -#ifdef AARCH64 - raw_push(r1, r2); - raw_push(r3, ZR); -#else assert(r1->encoding() < r2->encoding() && r2->encoding() < r3->encoding(), "should be ordered"); push(RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3)); -#endif } void raw_pop(Register r1, Register r2, Register r3) { -#ifdef AARCH64 - raw_pop(r3, ZR); - raw_pop(r1, r2); -#else assert(r1->encoding() < r2->encoding() && r2->encoding() < r3->encoding(), "should be ordered"); pop(RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3)); -#endif } // Restores registers r1 and r2 previously saved by raw_push(r1, r2, ret_addr) and returns by ret_addr. Clobbers LR. void raw_pop_and_ret(Register r1, Register r2) { -#ifdef AARCH64 - raw_pop(r1, r2, LR); - ret(); -#else raw_pop(r1, r2, PC); -#endif } void indirect_jump(Address addr, Register scratch) { -#ifdef AARCH64 - ldr(scratch, addr); - br(scratch); -#else ldr(PC, addr); -#endif } void indirect_jump(InlinedAddress& literal, Register scratch) { -#ifdef AARCH64 - ldr_literal(scratch, literal); - br(scratch); -#else ldr_literal(PC, literal); -#endif } -#ifndef AARCH64 void neg(Register dst, Register src) { rsb(dst, src, 0); } -#endif void branch_if_negative_32(Register r, Label& L) { - // Note about branch_if_negative_32() / branch_if_any_negative_32() implementation for AArch64: // tbnz is not used instead of tst & b.mi because destination may be out of tbnz range (+-32KB) // since these methods are used in LIR_Assembler::emit_arraycopy() to jump to stub entry. tst_32(r, r); @@ -998,56 +834,31 @@ } void branch_if_any_negative_32(Register r1, Register r2, Register tmp, Label& L) { -#ifdef AARCH64 - orr_32(tmp, r1, r2); - tst_32(tmp, tmp); -#else orrs(tmp, r1, r2); -#endif b(L, mi); } void branch_if_any_negative_32(Register r1, Register r2, Register r3, Register tmp, Label& L) { orr_32(tmp, r1, r2); -#ifdef AARCH64 - orr_32(tmp, tmp, r3); - tst_32(tmp, tmp); -#else orrs(tmp, tmp, r3); -#endif b(L, mi); } void add_ptr_scaled_int32(Register dst, Register r1, Register r2, int shift) { -#ifdef AARCH64 - add(dst, r1, r2, ex_sxtw, shift); -#else add(dst, r1, AsmOperand(r2, lsl, shift)); -#endif } void sub_ptr_scaled_int32(Register dst, Register r1, Register r2, int shift) { -#ifdef AARCH64 - sub(dst, r1, r2, ex_sxtw, shift); -#else sub(dst, r1, AsmOperand(r2, lsl, shift)); -#endif } // klass oop manipulations if compressed -#ifdef AARCH64 - void load_klass(Register dst_klass, Register src_oop); -#else void load_klass(Register dst_klass, Register src_oop, AsmCondition cond = al); -#endif // AARCH64 void store_klass(Register src_klass, Register dst_oop); -#ifdef AARCH64 - void store_klass_gap(Register dst); -#endif // AARCH64 // oop manipulations @@ -1058,39 +869,6 @@ void access_load_at(BasicType type, DecoratorSet decorators, Address src, Register dst, Register tmp1, Register tmp2, Register tmp3); void access_store_at(BasicType type, DecoratorSet decorators, Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null); -#ifdef AARCH64 - void encode_heap_oop(Register dst, Register src); - void encode_heap_oop(Register r) { - encode_heap_oop(r, r); - } - void decode_heap_oop(Register dst, Register src); - void decode_heap_oop(Register r) { - decode_heap_oop(r, r); - } - -#ifdef COMPILER2 - void encode_heap_oop_not_null(Register dst, Register src); - void decode_heap_oop_not_null(Register dst, Register src); - - void set_narrow_klass(Register dst, Klass* k); - void set_narrow_oop(Register dst, jobject obj); -#endif - - void encode_klass_not_null(Register r); - void encode_klass_not_null(Register dst, Register src); - void decode_klass_not_null(Register r); - void decode_klass_not_null(Register dst, Register src); - - void reinit_heapbase(); - -#ifdef ASSERT - void verify_heapbase(const char* msg); -#endif // ASSERT - - static int instr_count_for_mov_slow(intptr_t c); - static int instr_count_for_mov_slow(address addr); - static int instr_count_for_decode_klass_not_null(); -#endif // AARCH64 void ldr_global_ptr(Register reg, address address_of_global); void ldr_global_s32(Register reg, address address_of_global); @@ -1106,12 +884,7 @@ assert ((offset() & (wordSize-1)) == 0, "should be aligned by word size"); -#ifdef AARCH64 - emit_int32(address_placeholder_instruction); - emit_int32(address_placeholder_instruction); -#else AbstractAssembler::emit_address((address)address_placeholder_instruction); -#endif } void b(address target, AsmCondition cond = al) { @@ -1122,15 +895,14 @@ Assembler::b(target(L), cond); } - void bl(address target NOT_AARCH64_ARG(AsmCondition cond = al)) { - Assembler::bl(target NOT_AARCH64_ARG(cond)); + void bl(address target, AsmCondition cond = al) { + Assembler::bl(target, cond); } - void bl(Label& L NOT_AARCH64_ARG(AsmCondition cond = al)) { + void bl(Label& L, AsmCondition cond = al) { // internal calls - Assembler::bl(target(L) NOT_AARCH64_ARG(cond)); + Assembler::bl(target(L), cond); } -#ifndef AARCH64 void adr(Register dest, Label& L, AsmCondition cond = al) { int delta = target(L) - pc() - 8; if (delta >= 0) { @@ -1139,7 +911,6 @@ sub(dest, PC, -delta, cond); } } -#endif // !AARCH64 // Variable-length jump and calls. We now distinguish only the // patchable case from the other cases. Patchable must be @@ -1163,30 +934,23 @@ // specified to allow future optimizations. void jump(address target, relocInfo::relocType rtype = relocInfo::runtime_call_type, - Register scratch = AARCH64_ONLY(Rtemp) NOT_AARCH64(noreg) -#ifndef AARCH64 - , AsmCondition cond = al -#endif - ); + Register scratch = noreg, AsmCondition cond = al); void call(address target, - RelocationHolder rspec - NOT_AARCH64_ARG(AsmCondition cond = al)); + RelocationHolder rspec, AsmCondition cond = al); void call(address target, - relocInfo::relocType rtype = relocInfo::runtime_call_type - NOT_AARCH64_ARG(AsmCondition cond = al)) { - call(target, Relocation::spec_simple(rtype) NOT_AARCH64_ARG(cond)); + relocInfo::relocType rtype = relocInfo::runtime_call_type, + AsmCondition cond = al) { + call(target, Relocation::spec_simple(rtype), cond); } void jump(AddressLiteral dest) { jump(dest.target(), dest.reloc()); } -#ifndef AARCH64 void jump(address dest, relocInfo::relocType rtype, AsmCondition cond) { jump(dest, rtype, Rtemp, cond); } -#endif void call(AddressLiteral dest) { call(dest.target(), dest.reloc()); @@ -1204,10 +968,7 @@ // specified to allow future optimizations. void patchable_jump(address target, relocInfo::relocType rtype = relocInfo::runtime_call_type, - Register scratch = AARCH64_ONLY(Rtemp) NOT_AARCH64(noreg) -#ifndef AARCH64 - , AsmCondition cond = al -#endif + Register scratch = noreg, AsmCondition cond = al ); // patchable_call may scratch Rtemp @@ -1221,13 +982,7 @@ return patchable_call(target, Relocation::spec_simple(rtype), c2); } -#if defined(AARCH64) && defined(COMPILER2) - static int call_size(address target, bool far, bool patchable); -#endif -#ifdef AARCH64 - static bool page_reachable_from_cache(address target); -#endif static bool _reachable_from_cache(address target); static bool _cache_fully_reachable(); bool cache_fully_reachable(); @@ -1237,15 +992,8 @@ void sign_extend(Register rd, Register rn, int bits); inline void zap_high_non_significant_bits(Register r) { -#ifdef AARCH64 - if(ZapHighNonSignificantBits) { - movk(r, 0xBAAD, 48); - movk(r, 0xF00D, 32); - } -#endif } -#ifndef AARCH64 void long_move(Register rd_lo, Register rd_hi, Register rn_lo, Register rn_hi, AsmCondition cond = al); @@ -1259,7 +1007,6 @@ void atomic_cas(Register tmpreg1, Register tmpreg2, Register oldval, Register newval, Register base, int offset); void atomic_cas_bool(Register oldval, Register newval, Register base, int offset, Register tmpreg); void atomic_cas64(Register temp_lo, Register temp_hi, Register temp_result, Register oldval_lo, Register oldval_hi, Register newval_lo, Register newval_hi, Register base, int offset); -#endif // !AARCH64 void cas_for_lock_acquire(Register oldval, Register newval, Register base, Register tmp, Label &slow_case, bool allow_fallthrough_on_failure = false, bool one_shot = false); void cas_for_lock_release(Register oldval, Register newval, Register base, Register tmp, Label &slow_case, bool allow_fallthrough_on_failure = false, bool one_shot = false); @@ -1282,14 +1029,9 @@ // size must not exceed wordSize (i.e. 8-byte values are not supported on 32-bit ARM); // each of these calls generates exactly one load or store instruction, // so src can be pre- or post-indexed address. -#ifdef AARCH64 - void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed); - void store_sized_value(Register src, Address dst, size_t size_in_bytes); -#else // 32-bit ARM variants also support conditional execution void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, AsmCondition cond = al); void store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond = al); -#endif void lookup_interface_method(Register recv_klass, Register intf_klass, @@ -1311,11 +1053,7 @@ void ldr_literal(Register rd, AddressLiteral addr) { relocate(addr.rspec()); -#ifdef AARCH64 - ldr(rd, addr.target()); -#else ldr(rd, Address(PC, addr.target() - pc() - 8)); -#endif } void lea(Register Rd, AddressLiteral addr) { @@ -1326,46 +1064,10 @@ void restore_default_fp_mode(); #ifdef COMPILER2 -#ifdef AARCH64 - // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. - void fast_lock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3); - void fast_unlock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3); -#else void fast_lock(Register obj, Register box, Register scratch, Register scratch2); void fast_unlock(Register obj, Register box, Register scratch, Register scratch2); #endif -#endif - -#ifdef AARCH64 - -#define F(mnemonic) \ - void mnemonic(Register rt, address target) { \ - Assembler::mnemonic(rt, target); \ - } \ - void mnemonic(Register rt, Label& L) { \ - Assembler::mnemonic(rt, target(L)); \ - } - - F(cbz_w); - F(cbnz_w); - F(cbz); - F(cbnz); - -#undef F - -#define F(mnemonic) \ - void mnemonic(Register rt, int bit, address target) { \ - Assembler::mnemonic(rt, bit, target); \ - } \ - void mnemonic(Register rt, int bit, Label& L) { \ - Assembler::mnemonic(rt, bit, target(L)); \ - } - - F(tbz); - F(tbnz); -#undef F -#endif // AARCH64 }; --- old/src/hotspot/cpu/arm/macroAssembler_arm.inline.hpp 2018-09-17 10:30:23.639992399 -0400 +++ new/src/hotspot/cpu/arm/macroAssembler_arm.inline.hpp 2018-09-17 10:30:23.006955425 -0400 @@ -32,46 +32,9 @@ inline void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) { int instr = *(int*)branch; - int new_offset = (int)(target - branch NOT_AARCH64(- 8)); + int new_offset = (int)(target - branch - 8); assert((new_offset & 3) == 0, "bad alignment"); -#ifdef AARCH64 - if ((instr & (0x1f << 26)) == (0b00101 << 26)) { - // Unconditional B or BL - assert (is_offset_in_range(new_offset, 26), "offset is too large"); - *(int*)branch = (instr & ~right_n_bits(26)) | encode_offset(new_offset, 26, 0); - } else if ((instr & (0xff << 24)) == (0b01010100 << 24) && (instr & (1 << 4)) == 0) { - // Conditional B - assert (is_offset_in_range(new_offset, 19), "offset is too large"); - *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_offset(new_offset, 19, 5); - } else if ((instr & (0b111111 << 25)) == (0b011010 << 25)) { - // Compare & branch CBZ/CBNZ - assert (is_offset_in_range(new_offset, 19), "offset is too large"); - *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_offset(new_offset, 19, 5); - } else if ((instr & (0b111111 << 25)) == (0b011011 << 25)) { - // Test & branch TBZ/TBNZ - assert (is_offset_in_range(new_offset, 14), "offset is too large"); - *(int*)branch = (instr & ~(right_n_bits(14) << 5)) | encode_offset(new_offset, 14, 5); - } else if ((instr & (0b111011 << 24)) == (0b011000 << 24)) { - // LDR (literal) - unsigned opc = ((unsigned)instr >> 30); - assert (opc != 0b01 || ((uintx)target & 7) == 0, "ldr target should be aligned"); - assert (is_offset_in_range(new_offset, 19), "offset is too large"); - *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_offset(new_offset, 19, 5); - } else if (((instr & (1 << 31)) == 0) && ((instr & (0b11111 << 24)) == (0b10000 << 24))) { - // ADR - assert (is_imm_in_range(new_offset, 21, 0), "offset is too large"); - instr = (instr & ~(right_n_bits(2) << 29)) | (new_offset & 3) << 29; - *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_imm(new_offset >> 2, 19, 0, 5); - } else if((unsigned int)instr == address_placeholder_instruction) { - // address - assert (*(unsigned int *)(branch + InstructionSize) == address_placeholder_instruction, "address placeholder occupies two instructions"); - *(intx*)branch = (intx)target; - } else { - ::tty->print_cr("=============== instruction: 0x%x ================\n", instr); - Unimplemented(); // TODO-AARCH64 - } -#else if ((instr & 0x0e000000) == 0x0a000000) { // B or BL instruction assert(new_offset < 0x2000000 && new_offset > -0x2000000, "encoding constraint"); @@ -98,7 +61,6 @@ *(int*)branch = (instr & 0xff0ff000) | 1 << 20 | -new_offset; } } -#endif // AARCH64 } #endif // CPU_ARM_VM_MACROASSEMBLER_ARM_INLINE_HPP --- old/src/hotspot/cpu/arm/methodHandles_arm.cpp 2018-09-17 10:30:25.207083929 -0400 +++ new/src/hotspot/cpu/arm/methodHandles_arm.cpp 2018-09-17 10:30:24.574046954 -0400 @@ -125,15 +125,8 @@ // compiled code in threads for which the event is enabled. Check here for // interp_only_mode if these events CAN be enabled. __ ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); -#ifdef AARCH64 - Label L; - __ cbz(Rtemp, L); - __ indirect_jump(Address(Rmethod, Method::interpreter_entry_offset()), Rtemp); - __ bind(L); -#else __ cmp(Rtemp, 0); __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()), ne); -#endif // AARCH64 } const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : Method::from_interpreted_offset(); @@ -268,11 +261,7 @@ DEBUG_ONLY(rdx_param_size = noreg); } Register rbx_member = rbx_method; // MemberName ptr; incoming method ptr is dead now -#ifdef AARCH64 - __ ldr(rbx_member, Address(Rparams, Interpreter::stackElementSize, post_indexed)); -#else __ pop(rbx_member); -#endif generate_method_handle_dispatch(_masm, iid, rcx_recv, rbx_member, not_for_compiler_entry); } return entry_point; @@ -288,22 +277,15 @@ Register rbx_method = Rmethod; // eventual target of this invocation // temps used in this code are not used in *either* compiled or interpreted calling sequences Register temp1 = (for_compiler_entry ? saved_last_sp_register() : R1_tmp); - Register temp2 = AARCH64_ONLY(R9) NOT_AARCH64(R8); + Register temp2 = R8; Register temp3 = Rtemp; // R12/R16 - Register temp4 = AARCH64_ONLY(Rtemp2) NOT_AARCH64(R5); + Register temp4 = R5; if (for_compiler_entry) { assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); -#ifdef AARCH64 - assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); - assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); - assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); - assert_different_registers(temp4, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); -#else assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3); assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3); assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3); assert_different_registers(temp4, j_rarg0, j_rarg1, j_rarg2, j_rarg3); -#endif // AARCH64 } assert_different_registers(temp1, temp2, temp3, receiver_reg); assert_different_registers(temp1, temp2, temp3, temp4, member_reg); @@ -353,12 +335,7 @@ __ load_heap_oop(temp2_defc, member_clazz); load_klass_from_Class(_masm, temp2_defc, temp3, temp4); __ verify_klass_ptr(temp2_defc); -#ifdef AARCH64 - // TODO-AARCH64 - __ b(L_ok); -#else __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, noreg, L_ok); -#endif // If we get here, the type check failed! __ stop("receiver class disagrees with MemberName.clazz"); __ bind(L_ok); @@ -484,13 +461,9 @@ // the slop defends against false alarms due to fencepost errors }; -#ifdef AARCH64 -const int trace_mh_nregs = 32; // R0-R30, PC -#else const int trace_mh_nregs = 15; const Register trace_mh_regs[trace_mh_nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; -#endif // AARCH64 void trace_method_handle_stub(const char* adaptername, intptr_t* saved_regs, @@ -501,7 +474,7 @@ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH intptr_t* entry_sp = (intptr_t*) &saved_regs[trace_mh_nregs]; // just after the saved regs intptr_t* saved_sp = (intptr_t*) saved_regs[Rsender_sp->encoding()]; // save of Rsender_sp - intptr_t* last_sp = (intptr_t*) saved_bp[AARCH64_ONLY(frame::interpreter_frame_stack_top_offset) NOT_AARCH64(frame::interpreter_frame_last_sp_offset)]; + intptr_t* last_sp = (intptr_t*) saved_bp[frame::interpreter_frame_last_sp_offset]; intptr_t* base_sp = last_sp; intptr_t mh_reg = (intptr_t)saved_regs[R5_mh->encoding()]; @@ -517,13 +490,9 @@ tty->print(" reg dump: "); int i; for (i = 0; i < trace_mh_nregs; i++) { - if (i > 0 && i % AARCH64_ONLY(2) NOT_AARCH64(4) == 0) + if (i > 0 && i % 4 == 0) tty->print("\n + dump: "); -#ifdef AARCH64 - const char* reg_name = (i == trace_mh_nregs-1) ? "pc" : as_Register(i)->name(); -#else const char* reg_name = trace_mh_regs[i]->name(); -#endif tty->print(" %s: " INTPTR_FORMAT, reg_name, p2i((void *)saved_regs[i])); } tty->cr(); --- old/src/hotspot/cpu/arm/nativeInst_arm.hpp 2018-09-17 10:30:26.771175284 -0400 +++ new/src/hotspot/cpu/arm/nativeInst_arm.hpp 2018-09-17 10:30:26.142138543 -0400 @@ -30,11 +30,7 @@ #include "runtime/os.hpp" -#ifdef AARCH64 -#include "nativeInst_arm_64.hpp" -#else #include "nativeInst_arm_32.hpp" -#endif #endif // CPU_ARM_VM_NATIVEINST_ARM_HPP --- old/src/hotspot/cpu/arm/register_arm.cpp 2018-09-17 10:30:28.338266814 -0400 +++ new/src/hotspot/cpu/arm/register_arm.cpp 2018-09-17 10:30:27.705229839 -0400 @@ -32,12 +32,6 @@ const char* RegisterImpl::name() const { const char* names[number_of_registers] = { -#ifdef AARCH64 - "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", - "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", - "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", - "x24", "x25", "x26", "x27", "x28", "fp", "lr", "xzr", "sp" -#else "r0", "r1", "r2", "r3", "r4", "r5", "r6", #if (FP_REG_NUM == 7) "fp", @@ -51,19 +45,12 @@ "r11", #endif "r12", "sp", "lr", "pc" -#endif // AARCH64 }; return is_valid() ? names[encoding()] : "noreg"; } const char* FloatRegisterImpl::name() const { const char* names[number_of_registers] = { -#ifdef AARCH64 - "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", - "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", - "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" -#else "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", @@ -74,7 +61,6 @@ "s48", "s49?","s50", "s51?","s52", "s53?","s54", "s55?", "s56", "s57?","s58", "s59?","s60", "s61?","s62", "s63?" #endif -#endif // AARCH64 }; return is_valid() ? names[encoding()] : "fnoreg"; } --- old/src/hotspot/cpu/arm/register_arm.hpp 2018-09-17 10:30:29.907358460 -0400 +++ new/src/hotspot/cpu/arm/register_arm.hpp 2018-09-17 10:30:29.272321370 -0400 @@ -66,7 +66,6 @@ #define R9_IS_SCRATCHED 0 #endif -#ifndef AARCH64 // FP_REG_NUM // // The ARM ABI does not state which register is used for the frame pointer. @@ -77,7 +76,6 @@ // Default: FP is R11 #define FP_REG_NUM 11 #endif -#endif // AARCH64 // ALIGN_WIDE_ARGUMENTS // @@ -113,32 +111,6 @@ #define R14 ((Register)14) #define R15 ((Register)15) -#ifdef AARCH64 - -#define R16 ((Register)16) -#define R17 ((Register)17) -#define R18 ((Register)18) -#define R19 ((Register)19) -#define R20 ((Register)20) -#define R21 ((Register)21) -#define R22 ((Register)22) -#define R23 ((Register)23) -#define R24 ((Register)24) -#define R25 ((Register)25) -#define R26 ((Register)26) -#define R27 ((Register)27) -#define R28 ((Register)28) -#define R29 ((Register)29) -#define R30 ((Register)30) -#define ZR ((Register)31) -#define SP ((Register)32) - -#define FP R29 -#define LR R30 - -#define altFP_7_11 R7 - -#else // !AARCH64 #define FP ((Register)FP_REG_NUM) @@ -158,7 +130,6 @@ #define LR R14 #define PC R15 -#endif // !AARCH64 class RegisterImpl; @@ -171,11 +142,7 @@ class RegisterImpl : public AbstractRegisterImpl { public: enum { -#ifdef AARCH64 - number_of_gprs = 31, - zr_sp_encoding = 31, -#endif - number_of_registers = AARCH64_ONLY(number_of_gprs + 2) NOT_AARCH64(16) + number_of_registers = 16 }; Register successor() const { return as_Register(encoding() + 1); } @@ -188,19 +155,10 @@ int encoding() const { assert(is_valid(), "invalid register"); return value(); } const char* name() const; -#ifdef AARCH64 - int encoding_with_zr() const { assert (is_valid_gpr_or_zr(), "invalid register"); return (this == ZR) ? zr_sp_encoding : value(); } - int encoding_with_sp() const { assert (is_valid_gpr_or_sp(), "invalid register"); return (this == SP) ? zr_sp_encoding : value(); } -#endif // testers bool is_valid() const { return 0 <= value() && value() < number_of_registers; } -#ifdef AARCH64 - bool is_valid_gpr() const { return (0 <= value() && value() < number_of_gprs); } - bool is_valid_gpr_or_zr() const { return is_valid_gpr() || (this == ZR); } - bool is_valid_gpr_or_sp() const { return is_valid_gpr() || (this == SP); } -#endif }; CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); @@ -217,11 +175,7 @@ class FloatRegisterImpl : public AbstractRegisterImpl { public: enum { -#ifdef AARCH64 - number_of_registers = 32 -#else number_of_registers = NOT_COMPILER2(32) COMPILER2_PRESENT(64) -#endif }; inline friend FloatRegister as_FloatRegister(int encoding); @@ -234,7 +188,6 @@ const char* name() const; -#ifndef AARCH64 int hi_bits() const { return (encoding() >> 1) & 0xf; } @@ -246,54 +199,10 @@ int hi_bit() const { return encoding() >> 5; } -#endif // !AARCH64 }; CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg, (-1)); -#ifdef AARCH64 - -CONSTANT_REGISTER_DECLARATION(FloatRegister, V0, ( 0)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V1, ( 1)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V2, ( 2)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V3, ( 3)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V4, ( 4)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V5, ( 5)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V6, ( 6)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V7, ( 7)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V8, ( 8)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V9, ( 9)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V10, (10)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V11, (11)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V12, (12)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V13, (13)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V14, (14)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V15, (15)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V16, (16)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V17, (17)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V18, (18)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V19, (19)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V20, (20)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V21, (21)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V22, (22)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V23, (23)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V24, (24)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V25, (25)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V26, (26)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V27, (27)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V28, (28)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V29, (29)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V30, (30)); -CONSTANT_REGISTER_DECLARATION(FloatRegister, V31, (31)); - -#define S0 V0 -#define S1_reg V1 -#define Stemp V31 - -#define D0 V0 -#define D1 V1 - -#else // AARCH64 /* * S1-S6 are named with "_reg" suffix to avoid conflict with @@ -366,16 +275,15 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, D30, (60)); CONSTANT_REGISTER_DECLARATION(FloatRegister, D31, (62)); -#endif // AARCH64 class ConcreteRegisterImpl : public AbstractRegisterImpl { public: enum { log_vmregs_per_word = LogBytesPerWord - LogBytesPerInt, // VMRegs are of 4-byte size #ifdef COMPILER2 - log_bytes_per_fpr = AARCH64_ONLY(4) NOT_AARCH64(2), // quad vectors + log_bytes_per_fpr = 2, // quad vectors #else - log_bytes_per_fpr = AARCH64_ONLY(3) NOT_AARCH64(2), // double vectors + log_bytes_per_fpr = 2, // double vectors #endif log_words_per_fpr = log_bytes_per_fpr - LogBytesPerWord, words_per_fpr = 1 << log_words_per_fpr, @@ -388,17 +296,13 @@ max_gpr0 = num_gpr, num_fpr = FloatRegisterImpl::number_of_registers << log_vmregs_per_fpr, max_fpr0 = max_gpr0 + num_fpr, - number_of_registers = num_gpr + num_fpr + - // TODO-AARCH64 revise - 1+1 // APSR and FPSCR so that c2's REG_COUNT <= ConcreteRegisterImpl::number_of_registers + number_of_registers = num_gpr + num_fpr + 1+1 // APSR and FPSCR so that c2's REG_COUNT <= ConcreteRegisterImpl::number_of_registers }; static const int max_gpr; static const int max_fpr; }; -// TODO-AARCH64 revise the following definitions - class VFPSystemRegisterImpl; typedef VFPSystemRegisterImpl* VFPSystemRegister; class VFPSystemRegisterImpl : public AbstractRegisterImpl { @@ -414,33 +318,21 @@ /* * Register definitions shared across interpreter and compiler */ -#define Rexception_obj AARCH64_ONLY(R19) NOT_AARCH64(R4) -#define Rexception_pc AARCH64_ONLY(R20) NOT_AARCH64(R5) - -#ifdef AARCH64 -#define Rheap_base R27 -#endif // AARCH64 +#define Rexception_obj R4 +#define Rexception_pc R5 /* * Interpreter register definitions common to C++ and template interpreters. */ -#ifdef AARCH64 -#define Rlocals R23 -#define Rmethod R26 -#define Rthread R28 -#define Rtemp R16 -#define Rtemp2 R17 -#else #define Rlocals R8 #define Rmethod R9 #define Rthread R10 #define Rtemp R12 -#endif // AARCH64 // Interpreter calling conventions -#define Rparams AARCH64_ONLY(R8) NOT_AARCH64(SP) -#define Rsender_sp AARCH64_ONLY(R19) NOT_AARCH64(R4) +#define Rparams SP +#define Rsender_sp R4 // JSR292 // Note: R5_mh is needed only during the call setup, including adapters @@ -479,25 +371,23 @@ #define D1_tmp D1 // Temporary registers saved across VM calls (according to C calling conventions) -#define Rtmp_save0 AARCH64_ONLY(R19) NOT_AARCH64(R4) -#define Rtmp_save1 AARCH64_ONLY(R20) NOT_AARCH64(R5) +#define Rtmp_save0 R4 +#define Rtmp_save1 R5 // Cached TOS value #define R0_tos R0 -#ifndef AARCH64 #define R0_tos_lo R0 #define R1_tos_hi R1 -#endif #define S0_tos S0 #define D0_tos D0 // Dispatch table -#define RdispatchTable AARCH64_ONLY(R22) NOT_AARCH64(R6) +#define RdispatchTable R6 // Bytecode pointer -#define Rbcp AARCH64_ONLY(R24) NOT_AARCH64(altFP_7_11) +#define Rbcp altFP_7_11 // Pre-loaded next bytecode for the dispatch #define R3_bytecode R3 @@ -507,7 +397,7 @@ #define R4_ArrayIndexOutOfBounds_index R4 // Interpreter expression stack top -#define Rstack_top AARCH64_ONLY(R25) NOT_AARCH64(SP) +#define Rstack_top SP /* * Linux 32-bit ARM C ABI Register calling conventions @@ -529,28 +419,14 @@ * R13 (SP) Stack Pointer callee * R14 (LR) Link register * R15 (PC) Program Counter - * - * TODO-AARCH64: document AArch64 ABI - * */ #define c_rarg0 R0 #define c_rarg1 R1 #define c_rarg2 R2 #define c_rarg3 R3 -#ifdef AARCH64 -#define c_rarg4 R4 -#define c_rarg5 R5 -#define c_rarg6 R6 -#define c_rarg7 R7 -#endif -#ifdef AARCH64 -#define GPR_PARAMS 8 -#define FPR_PARAMS 8 -#else #define GPR_PARAMS 4 -#endif // Java ABI @@ -560,11 +436,5 @@ #define j_rarg2 c_rarg2 #define j_rarg3 c_rarg3 -#ifdef AARCH64 -#define j_rarg4 c_rarg4 -#define j_rarg5 c_rarg5 -#define j_rarg6 c_rarg6 -#define j_rarg7 c_rarg7 -#endif #endif // CPU_ARM_VM_REGISTER_ARM_HPP --- old/src/hotspot/cpu/arm/register_definitions_arm.cpp 2018-09-17 10:30:31.476450107 -0400 +++ new/src/hotspot/cpu/arm/register_definitions_arm.cpp 2018-09-17 10:30:30.844413192 -0400 @@ -31,42 +31,6 @@ REGISTER_DEFINITION(Register, noreg); REGISTER_DEFINITION(FloatRegister, fnoreg); -#ifdef AARCH64 - -REGISTER_DEFINITION(FloatRegister, V0); -REGISTER_DEFINITION(FloatRegister, V1); -REGISTER_DEFINITION(FloatRegister, V2); -REGISTER_DEFINITION(FloatRegister, V3); -REGISTER_DEFINITION(FloatRegister, V4); -REGISTER_DEFINITION(FloatRegister, V5); -REGISTER_DEFINITION(FloatRegister, V6); -REGISTER_DEFINITION(FloatRegister, V7); -REGISTER_DEFINITION(FloatRegister, V8); -REGISTER_DEFINITION(FloatRegister, V9); -REGISTER_DEFINITION(FloatRegister, V10); -REGISTER_DEFINITION(FloatRegister, V11); -REGISTER_DEFINITION(FloatRegister, V12); -REGISTER_DEFINITION(FloatRegister, V13); -REGISTER_DEFINITION(FloatRegister, V14); -REGISTER_DEFINITION(FloatRegister, V15); -REGISTER_DEFINITION(FloatRegister, V16); -REGISTER_DEFINITION(FloatRegister, V17); -REGISTER_DEFINITION(FloatRegister, V18); -REGISTER_DEFINITION(FloatRegister, V19); -REGISTER_DEFINITION(FloatRegister, V20); -REGISTER_DEFINITION(FloatRegister, V21); -REGISTER_DEFINITION(FloatRegister, V22); -REGISTER_DEFINITION(FloatRegister, V23); -REGISTER_DEFINITION(FloatRegister, V24); -REGISTER_DEFINITION(FloatRegister, V25); -REGISTER_DEFINITION(FloatRegister, V26); -REGISTER_DEFINITION(FloatRegister, V27); -REGISTER_DEFINITION(FloatRegister, V28); -REGISTER_DEFINITION(FloatRegister, V29); -REGISTER_DEFINITION(FloatRegister, V30); -REGISTER_DEFINITION(FloatRegister, V31); - -#else // AARCH64 REGISTER_DEFINITION(FloatRegister, S0); REGISTER_DEFINITION(FloatRegister, S1_reg); @@ -134,4 +98,3 @@ REGISTER_DEFINITION(FloatRegister, D30); REGISTER_DEFINITION(FloatRegister, D31); -#endif //AARCH64 --- old/src/hotspot/cpu/arm/relocInfo_arm.cpp 2018-09-17 10:30:33.041541521 -0400 +++ new/src/hotspot/cpu/arm/relocInfo_arm.cpp 2018-09-17 10:30:32.406504430 -0400 @@ -35,21 +35,6 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { NativeMovConstReg* ni = nativeMovConstReg_at(addr()); -#if defined(AARCH64) && defined(COMPILER2) - if (ni->is_movz()) { - assert(type() == relocInfo::oop_type, "!"); - if (verify_only) { - uintptr_t d = ni->data(); - guarantee((d >> 32) == 0, "not narrow oop"); - narrowOop no = d; - oop o = CompressedOops::decode(no); - guarantee(cast_from_oop(o) == (intptr_t)x, "instructions must match"); - } else { - ni->set_data((intptr_t)x); - } - return; - } -#endif if (verify_only) { guarantee(ni->data() == (intptr_t)(x + o), "instructions must match"); } else { @@ -69,21 +54,16 @@ RawNativeInstruction* ni = rawNativeInstruction_at(pc); -#if (!defined(AARCH64)) - if (NOT_AARCH64(ni->is_add_lr()) AARCH64_ONLY(ni->is_adr_aligned_lr())) { - // On arm32, skip the optional 'add LR, PC, #offset' + if (ni->is_add_lr()) { + // Skip the optional 'add LR, PC, #offset' // (Allowing the jump support code to handle fat_call) pc = ni->next_raw_instruction_address(); ni = nativeInstruction_at(pc); } -#endif - if (AARCH64_ONLY(ni->is_call()) NOT_AARCH64(ni->is_bl())) { - // For arm32, fat_call are handled by is_jump for the new 'ni', + if (ni->is_bl()) { + // Fat_call are handled by is_jump for the new 'ni', // requiring only to support is_bl. - // - // For AARCH64, skipping a leading adr is not sufficient - // to reduce calls to a simple bl. return rawNativeCall_at(pc)->destination(adj); } @@ -98,21 +78,16 @@ address pc = addr(); NativeInstruction* ni = nativeInstruction_at(pc); -#if (!defined(AARCH64)) - if (NOT_AARCH64(ni->is_add_lr()) AARCH64_ONLY(ni->is_adr_aligned_lr())) { - // On arm32, skip the optional 'add LR, PC, #offset' + if (ni->is_add_lr()) { + // Skip the optional 'add LR, PC, #offset' // (Allowing the jump support code to handle fat_call) pc = ni->next_raw_instruction_address(); ni = nativeInstruction_at(pc); } -#endif - if (AARCH64_ONLY(ni->is_call()) NOT_AARCH64(ni->is_bl())) { - // For arm32, fat_call are handled by is_jump for the new 'ni', + if (ni->is_bl()) { + // Fat_call are handled by is_jump for the new 'ni', // requiring only to support is_bl. - // - // For AARCH64, skipping a leading adr is not sufficient - // to reduce calls to a simple bl. rawNativeCall_at(pc)->set_destination(x); return; } @@ -138,15 +113,6 @@ void metadata_Relocation::pd_fix_value(address x) { assert(! addr_in_const(), "Do not use"); -#ifdef AARCH64 -#ifdef COMPILER2 - NativeMovConstReg* ni = nativeMovConstReg_at(addr()); - if (ni->is_mov_slow()) { - return; - } -#endif - set_value(x); -#else if (!VM_Version::supports_movw()) { set_value(x); #ifdef ASSERT @@ -165,5 +131,4 @@ // assert(ni->data() == (int)x, "metadata relocation mismatch"); #endif } -#endif // !AARCH64 } --- old/src/hotspot/cpu/arm/runtime_arm.cpp 2018-09-17 10:30:34.617633576 -0400 +++ new/src/hotspot/cpu/arm/runtime_arm.cpp 2018-09-17 10:30:33.985596661 -0400 @@ -126,15 +126,8 @@ // Restore SP from its saved reg (FP) if the exception PC is a MethodHandle call site. __ ldr(Rtemp, Address(Rthread, JavaThread::is_method_handle_return_offset())); -#ifdef AARCH64 - Label skip; - __ cbz(Rtemp, skip); - __ mov(SP, Rmh_SP_save); - __ bind(skip); -#else __ cmp(Rtemp, 0); __ mov(SP, Rmh_SP_save, ne); -#endif // R0 contains handler address // Since this may be the deopt blob we must set R5 to look like we returned --- old/src/hotspot/cpu/arm/sharedRuntime_arm.cpp 2018-09-17 10:30:36.192725574 -0400 +++ new/src/hotspot/cpu/arm/sharedRuntime_arm.cpp 2018-09-17 10:30:35.545687782 -0400 @@ -62,46 +62,6 @@ // branch to the runtime. The slot at R14/R30_offset is for the value of LR // in case it's live in the method we are coming from. -#ifdef AARCH64 - - // - // On AArch64 registers save area has the following layout: - // - // |---------------------| - // | return address (LR) | - // | FP | - // |---------------------| - // | V31 | - // | ... | - // | V0 | - // |---------------------| - // | padding | - // | R30 (LR live value) | - // |---------------------| - // | R27 | - // | ... | - // | R0 | - // |---------------------| <-- SP - // - - enum RegisterLayout { - number_of_saved_gprs = 28, - number_of_saved_fprs = FloatRegisterImpl::number_of_registers, - words_per_fpr = ConcreteRegisterImpl::words_per_fpr, - - R0_offset = 0, - R30_offset = R0_offset + number_of_saved_gprs, - D0_offset = R30_offset + 2, - FP_offset = D0_offset + number_of_saved_fprs * words_per_fpr, - LR_offset = FP_offset + 1, - - reg_save_size = LR_offset + 1, - }; - - static const int Rmethod_offset; - static const int Rtemp_offset; - -#else enum RegisterLayout { fpu_save_size = FloatRegisterImpl::number_of_registers, @@ -139,7 +99,6 @@ // (altFP_7_11 is the one amoung R7 and R11 which is not FP) #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11) -#endif // AARCH64 // When LR may be live in the nmethod from which we are comming // then lr_saved is true, the return address is saved before the @@ -154,10 +113,6 @@ }; -#ifdef AARCH64 -const int RegisterSaver::Rmethod_offset = RegisterSaver::R0_offset + Rmethod->encoding(); -const int RegisterSaver::Rtemp_offset = RegisterSaver::R0_offset + Rtemp->encoding(); -#endif // AARCH64 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, @@ -168,47 +123,6 @@ OopMapSet *oop_maps = new OopMapSet(); OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0); -#ifdef AARCH64 - assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned"); - - if (lr_saved) { - // LR was stashed here, so that jump could use it as a scratch reg - __ ldr(LR, Address(SP, 0)); - // There are two words on the stack top: - // [SP + 0]: placeholder for FP - // [SP + wordSize]: saved return address - __ str(FP, Address(SP, 0)); - } else { - __ raw_push(FP, LR); - } - - __ sub(SP, SP, (reg_save_size - 2) * wordSize); - - for (int i = 0; i < number_of_saved_gprs; i += 2) { - int offset = R0_offset + i; - __ stp(as_Register(i), as_Register(i+1), Address(SP, offset * wordSize)); - map->set_callee_saved(VMRegImpl::stack2reg((offset + 0) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg((offset + 1) * VMRegImpl::slots_per_word), as_Register(i+1)->as_VMReg()); - } - - __ str(R30, Address(SP, R30_offset * wordSize)); - map->set_callee_saved(VMRegImpl::stack2reg(R30_offset * VMRegImpl::slots_per_word), R30->as_VMReg()); - - for (int i = 0; i < number_of_saved_fprs; i += 2) { - int offset1 = D0_offset + i * words_per_fpr; - int offset2 = offset1 + words_per_fpr; - Address base(SP, offset1 * wordSize); - if (words_per_fpr == 2) { - // pair of "wide" quad vector registers - __ stp_q(as_FloatRegister(i), as_FloatRegister(i+1), base); - } else { - // pair of double vector registers - __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), base); - } - map->set_callee_saved(VMRegImpl::stack2reg(offset1 * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(offset2 * VMRegImpl::slots_per_word), as_FloatRegister(i+1)->as_VMReg()); - } -#else if (lr_saved) { __ push(RegisterSet(FP)); } else { @@ -252,38 +166,11 @@ map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next()); } } -#endif // AARCH64 return map; } void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) { -#ifdef AARCH64 - for (int i = 0; i < number_of_saved_gprs; i += 2) { - __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize)); - } - - __ ldr(R30, Address(SP, R30_offset * wordSize)); - - for (int i = 0; i < number_of_saved_fprs; i += 2) { - Address base(SP, (D0_offset + i * words_per_fpr) * wordSize); - if (words_per_fpr == 2) { - // pair of "wide" quad vector registers - __ ldp_q(as_FloatRegister(i), as_FloatRegister(i+1), base); - } else { - // pair of double vector registers - __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), base); - } - } - - __ add(SP, SP, (reg_save_size - 2) * wordSize); - - if (restore_lr) { - __ raw_pop(FP, LR); - } else { - __ ldr(FP, Address(SP, 0)); - } -#else if (HaveVFP) { __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback); if (VM_Version::has_vfp3_32()) { @@ -303,58 +190,8 @@ } else { __ pop(RegisterSet(FP)); } -#endif // AARCH64 -} - -#ifdef AARCH64 - -static void push_result_registers(MacroAssembler* masm, BasicType ret_type) { - if (ret_type == T_DOUBLE || ret_type == T_FLOAT) { - __ str_d(D0, Address(SP, -2*wordSize, pre_indexed)); - } else { - __ raw_push(R0, ZR); - } -} - -static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) { - if (ret_type == T_DOUBLE || ret_type == T_FLOAT) { - __ ldr_d(D0, Address(SP, 2*wordSize, post_indexed)); - } else { - __ raw_pop(R0, ZR); - } } -static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) { - __ raw_push(R0, R1); - __ raw_push(R2, R3); - __ raw_push(R4, R5); - __ raw_push(R6, R7); - - assert(FPR_PARAMS == 8, "adjust this code"); - assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be"); - - if (fp_regs_in_arguments > 6) __ stp_d(V6, V7, Address(SP, -2 * wordSize, pre_indexed)); - if (fp_regs_in_arguments > 4) __ stp_d(V4, V5, Address(SP, -2 * wordSize, pre_indexed)); - if (fp_regs_in_arguments > 2) __ stp_d(V2, V3, Address(SP, -2 * wordSize, pre_indexed)); - if (fp_regs_in_arguments > 0) __ stp_d(V0, V1, Address(SP, -2 * wordSize, pre_indexed)); -} - -static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) { - assert(FPR_PARAMS == 8, "adjust this code"); - assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be"); - - if (fp_regs_in_arguments > 0) __ ldp_d(V0, V1, Address(SP, 2 * wordSize, post_indexed)); - if (fp_regs_in_arguments > 2) __ ldp_d(V2, V3, Address(SP, 2 * wordSize, post_indexed)); - if (fp_regs_in_arguments > 4) __ ldp_d(V4, V5, Address(SP, 2 * wordSize, post_indexed)); - if (fp_regs_in_arguments > 6) __ ldp_d(V6, V7, Address(SP, 2 * wordSize, post_indexed)); - - __ raw_pop(R6, R7); - __ raw_pop(R4, R5); - __ raw_pop(R2, R3); - __ raw_pop(R0, R1); -} - -#else // AARCH64 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) { #ifdef __ABI_HARD__ @@ -405,7 +242,6 @@ __ pop(RegisterSet(R0, R3)); } -#endif // AARCH64 // Is vector's size (in bytes) bigger than a size saved by default? @@ -429,73 +265,6 @@ VMRegPair *regs2, int total_args_passed) { assert(regs2 == NULL, "not needed on arm"); -#ifdef AARCH64 - int slot = 0; // counted in 32-bit VMReg slots - int reg = 0; - int fp_reg = 0; - for (int i = 0; i < total_args_passed; i++) { - switch (sig_bt[i]) { - case T_SHORT: - case T_CHAR: - case T_BYTE: - case T_BOOLEAN: - case T_INT: - if (reg < GPR_PARAMS) { - Register r = as_Register(reg); - regs[i].set1(r->as_VMReg()); - reg++; - } else { - regs[i].set1(VMRegImpl::stack2reg(slot)); - slot+=2; - } - break; - case T_LONG: - assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" ); - // fall through - case T_ARRAY: - case T_OBJECT: - case T_ADDRESS: - if (reg < GPR_PARAMS) { - Register r = as_Register(reg); - regs[i].set2(r->as_VMReg()); - reg++; - } else { - regs[i].set2(VMRegImpl::stack2reg(slot)); - slot+=2; - } - break; - case T_FLOAT: - if (fp_reg < FPR_PARAMS) { - FloatRegister r = as_FloatRegister(fp_reg); - regs[i].set1(r->as_VMReg()); - fp_reg++; - } else { - regs[i].set1(VMRegImpl::stack2reg(slot)); - slot+=2; - } - break; - case T_DOUBLE: - assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" ); - if (fp_reg < FPR_PARAMS) { - FloatRegister r = as_FloatRegister(fp_reg); - regs[i].set2(r->as_VMReg()); - fp_reg++; - } else { - regs[i].set2(VMRegImpl::stack2reg(slot)); - slot+=2; - } - break; - case T_VOID: - assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); - regs[i].set_bad(); - break; - default: - ShouldNotReachHere(); - } - } - return slot; - -#else // AARCH64 int slot = 0; int ireg = 0; @@ -592,17 +361,12 @@ } } return slot; -#endif // AARCH64 } int SharedRuntime::java_calling_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed, int is_outgoing) { -#ifdef AARCH64 - // C calling convention on AArch64 is good enough. - return c_calling_convention(sig_bt, regs, NULL, total_args_passed); -#else #ifdef __SOFTFP__ // soft float is the same as the C calling convention. return c_calling_convention(sig_bt, regs, NULL, total_args_passed); @@ -685,7 +449,6 @@ if (slot & 1) slot++; return slot; -#endif // AARCH64 } static void patch_callers_callsite(MacroAssembler *masm) { @@ -694,25 +457,15 @@ __ ldr(Rtemp, Address(Rmethod, Method::code_offset())); __ cbz(Rtemp, skip); -#ifdef AARCH64 - push_param_registers(masm, FPR_PARAMS); - __ raw_push(LR, ZR); -#else // Pushing an even number of registers for stack alignment. // Selecting R9, which had to be saved anyway for some platforms. __ push(RegisterSet(R0, R3) | R9 | LR); -#endif // AARCH64 __ mov(R0, Rmethod); __ mov(R1, LR); __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)); -#ifdef AARCH64 - __ raw_pop(LR, ZR); - pop_param_registers(masm, FPR_PARAMS); -#else __ pop(RegisterSet(R0, R3) | R9 | LR); -#endif // AARCH64 __ bind(skip); } @@ -739,57 +492,6 @@ Address callee_target_addr(Rthread, JavaThread::callee_target_offset()); __ str(Rmethod, callee_target_addr); -#ifdef AARCH64 - - assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rmethod); - assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rparams); - - if (comp_args_on_stack) { - __ sub_slow(SP, SP, align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes)); - } - - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered"); - - int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1; - Address source_addr(Rparams, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i)); - - VMReg r = regs[i].first(); - bool full_word = regs[i].second()->is_valid(); - - if (r->is_stack()) { - if (full_word) { - __ ldr(tmp, source_addr); - __ str(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); - } else { - __ ldr_w(tmp, source_addr); - __ str_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); - } - } else if (r->is_Register()) { - if (full_word) { - __ ldr(r->as_Register(), source_addr); - } else { - __ ldr_w(r->as_Register(), source_addr); - } - } else if (r->is_FloatRegister()) { - if (sig_bt[i] == T_DOUBLE) { - __ ldr_d(r->as_FloatRegister(), source_addr); - } else { - __ ldr_s(r->as_FloatRegister(), source_addr); - } - } else { - assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be"); - } - } - - __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset())); - __ br(tmp); - -#else assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod); @@ -851,7 +553,6 @@ __ ldr(Rmethod, callee_target_addr); __ ldr(PC, Address(Rmethod, Method::from_compiled_offset())); -#endif // AARCH64 } static void gen_c2i_adapter(MacroAssembler *masm, @@ -866,56 +567,6 @@ __ mov(Rsender_sp, SP); // not yet saved -#ifdef AARCH64 - - int extraspace = align_up(total_args_passed * Interpreter::stackElementSize, StackAlignmentInBytes); - if (extraspace) { - __ sub(SP, SP, extraspace); - } - - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - - int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1; - Address dest_addr(SP, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i)); - - VMReg r = regs[i].first(); - bool full_word = regs[i].second()->is_valid(); - - if (r->is_stack()) { - if (full_word) { - __ ldr(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace)); - __ str(tmp, dest_addr); - } else { - __ ldr_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace)); - __ str_w(tmp, dest_addr); - } - } else if (r->is_Register()) { - if (full_word) { - __ str(r->as_Register(), dest_addr); - } else { - __ str_w(r->as_Register(), dest_addr); - } - } else if (r->is_FloatRegister()) { - if (sig_bt[i] == T_DOUBLE) { - __ str_d(r->as_FloatRegister(), dest_addr); - } else { - __ str_s(r->as_FloatRegister(), dest_addr); - } - } else { - assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be"); - } - } - - __ mov(Rparams, SP); - - __ ldr(tmp, Address(Rmethod, Method::interpreter_entry_offset())); - __ br(tmp); - -#else int extraspace = total_args_passed * Interpreter::stackElementSize; if (extraspace) { @@ -965,7 +616,6 @@ __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset())); -#endif // AARCH64 } AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, @@ -981,26 +631,17 @@ Label skip_fixup; const Register receiver = R0; const Register holder_klass = Rtemp; // XXX should be OK for C2 but not 100% sure - const Register receiver_klass = AARCH64_ONLY(R8) NOT_AARCH64(R4); + const Register receiver_klass = R4; __ load_klass(receiver_klass, receiver); __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset())); __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_metadata_offset())); __ cmp(receiver_klass, holder_klass); -#ifdef AARCH64 - Label ic_miss; - __ b(ic_miss, ne); - __ ldr(Rtemp, Address(Rmethod, Method::code_offset())); - __ cbz(Rtemp, skip_fixup); - __ bind(ic_miss); - __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp); -#else __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq); __ cmp(Rtemp, 0, eq); __ b(skip_fixup, eq); __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne); -#endif // AARCH64 address c2i_entry = __ pc(); gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); @@ -1201,10 +842,6 @@ __ bind(verified); int vep_offset = __ pc() - start; -#ifdef AARCH64 - // Extra nop for MT-safe patching in NativeJump::patch_verified_entry - __ nop(); -#endif // AARCH64 if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { // Object.hashCode, System.identityHashCode can pull the hashCode from the header word @@ -1217,15 +854,8 @@ if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) { assert(method->is_static(), "method should be static"); // return 0 for null reference input, return val = R0 = obj_reg = 0 -#ifdef AARCH64 - Label Continue; - __ cbnz(obj_reg, Continue); - __ ret(); - __ bind(Continue); -#else __ cmp(obj_reg, 0); __ bx(LR, eq); -#endif } __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes())); @@ -1238,16 +868,9 @@ __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case); } -#ifdef AARCH64 - __ ands(Rtemp, Rtemp, (uintx)markOopDesc::hash_mask_in_place); - __ b(slow_case, eq); - __ logical_shift_right(R0, Rtemp, markOopDesc::hash_shift); - __ ret(); -#else __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place); __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne); __ bx(LR, ne); -#endif // AARCH64 __ bind(slow_case); } @@ -1279,12 +902,7 @@ assert(i != 0, "Incoming receiver is always in a register"); __ ldr(Rtemp, Address(FP, reg2offset_in(src))); __ cmp(Rtemp, 0); -#ifdef AARCH64 - __ add(Rtemp, FP, reg2offset_in(src)); - __ csel(Rtemp, ZR, Rtemp, eq); -#else __ add(Rtemp, FP, reg2offset_in(src), ne); -#endif // AARCH64 __ str(Rtemp, Address(SP, reg2offset_out(dst))); int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots(); map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); @@ -1297,14 +915,6 @@ } oop_handle_offset += VMRegImpl::slots_per_word; -#ifdef AARCH64 - __ cmp(src->as_Register(), 0); - __ add(Rtemp, SP, offset); - __ csel(dst->is_stack() ? Rtemp : dst->as_Register(), ZR, Rtemp, eq); - if (dst->is_stack()) { - __ str(Rtemp, Address(SP, reg2offset_out(dst))); - } -#else if (dst->is_stack()) { __ movs(Rtemp, src->as_Register()); __ add(Rtemp, SP, offset, ne); @@ -1313,30 +923,12 @@ __ movs(dst->as_Register(), src->as_Register()); __ add(dst->as_Register(), SP, offset, ne); } -#endif // AARCH64 } } case T_VOID: break; -#ifdef AARCH64 - case T_FLOAT: - case T_DOUBLE: { - VMReg src = in_regs[i].first(); - VMReg dst = out_regs[i + extra_args].first(); - if (src->is_stack()) { - assert(dst->is_stack(), "must be"); - __ ldr(Rtemp, Address(FP, reg2offset_in(src))); - __ str(Rtemp, Address(SP, reg2offset_out(dst))); - } else { - assert(src->is_FloatRegister() && dst->is_FloatRegister(), "must be"); - assert(src->as_FloatRegister() == dst->as_FloatRegister(), "must be"); - fp_regs_in_arguments++; - } - break; - } -#else // AARCH64 #ifdef __SOFTFP__ case T_DOUBLE: @@ -1509,7 +1101,6 @@ break; } #endif // __ABI_HARD__ -#endif // AARCH64 default: { assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args"); @@ -1544,16 +1135,11 @@ assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin"); oop_maps->add_gc_map(pc_offset, map); -#ifndef AARCH64 // Order last_Java_pc store with the thread state transition (to _thread_in_native) __ membar(MacroAssembler::StoreStore, Rtemp); -#endif // !AARCH64 // RedefineClasses() tracing support for obsolete method entry if (log_is_enabled(Trace, redefine, class, obsolete)) { -#ifdef AARCH64 - __ NOT_TESTED(); -#endif __ save_caller_save_registers(); __ mov(R0, Rthread); __ mov_metadata(R1, method()); @@ -1561,10 +1147,10 @@ __ restore_caller_save_registers(); } - const Register sync_handle = AARCH64_ONLY(R20) NOT_AARCH64(R5); - const Register sync_obj = AARCH64_ONLY(R21) NOT_AARCH64(R6); - const Register disp_hdr = AARCH64_ONLY(R22) NOT_AARCH64(altFP_7_11); - const Register tmp = AARCH64_ONLY(R23) NOT_AARCH64(R8); + const Register sync_handle = R5; + const Register sync_obj = R6; + const Register disp_hdr = altFP_7_11; + const Register tmp = R8; Label slow_lock, slow_lock_biased, lock_done, fast_lock; if (method->is_synchronized()) { @@ -1578,35 +1164,6 @@ } const Register mark = tmp; -#ifdef AARCH64 - __ sub(disp_hdr, FP, lock_slot_fp_offset); - assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions"); - - __ ldr(mark, sync_obj); - - // Test if object is already locked - assert(markOopDesc::unlocked_value == 1, "adjust this code"); - __ tbnz(mark, exact_log2(markOopDesc::unlocked_value), fast_lock); - - // Check for recursive lock - // See comments in InterpreterMacroAssembler::lock_object for - // explanations on the fast recursive locking check. - __ mov(Rtemp, SP); - __ sub(Rtemp, mark, Rtemp); - intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); - Assembler::LogicalImmediate imm(mask, false); - __ ands(Rtemp, Rtemp, imm); - __ b(slow_lock, ne); - - // Recursive locking: store 0 into a lock record - __ str(ZR, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); - __ b(lock_done); - - __ bind(fast_lock); - __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); - - __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock); -#else // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. // That would be acceptable as either CAS or slow case path is taken in that case @@ -1633,7 +1190,6 @@ __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock); -#endif // AARCH64 __ bind(lock_done); } @@ -1643,13 +1199,7 @@ // Perform thread state transition __ mov(Rtemp, _thread_in_native); -#ifdef AARCH64 - // stlr instruction is used to force all preceding writes to be observed prior to thread state change - __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset())); - __ stlr_w(Rtemp, Rtemp2); -#else __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset())); -#endif // AARCH64 // Finally, call the native method __ call(method->native_function()); @@ -1709,37 +1259,10 @@ __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset())); __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM -#ifdef AARCH64 - __ str_32(ZR, Address(LR, JNIHandleBlock::top_offset_in_bytes())); - if (CheckJNICalls) { - __ str(ZR, Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset())); - } - - - switch (ret_type) { - case T_BOOLEAN: - __ tst(R0, 0xff); - __ cset(R0, ne); - break; - case T_CHAR : __ zero_extend(R0, R0, 16); break; - case T_BYTE : __ sign_extend(R0, R0, 8); break; - case T_SHORT : __ sign_extend(R0, R0, 16); break; - case T_INT : // fall through - case T_LONG : // fall through - case T_VOID : // fall through - case T_FLOAT : // fall through - case T_DOUBLE : /* nothing to do */ break; - case T_OBJECT : // fall through - case T_ARRAY : break; // See JNIHandles::resolve below - default: - ShouldNotReachHere(); - } -#else __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes())); if (CheckJNICalls) { __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset())); } -#endif // AARCH64 // Unbox oop result, e.g. JNIHandles::resolve value in R0. if (ret_type == T_OBJECT || ret_type == T_ARRAY) { @@ -1752,23 +1275,12 @@ __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset())); __ mov(SP, FP); -#ifdef AARCH64 - Label except; - __ cbnz(Rtemp, except); - __ raw_pop(FP, LR); - __ ret(); - - __ bind(except); - // Pop the frame and forward the exception. Rexception_pc contains return address. - __ raw_pop(FP, Rexception_pc); -#else __ cmp(Rtemp, 0); // Pop the frame and return if no exception pending __ pop(RegisterSet(FP) | RegisterSet(PC), eq); // Pop the frame and forward the exception. Rexception_pc contains return address. __ ldr(FP, Address(SP, wordSize, post_indexed), ne); __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne); -#endif // AARCH64 __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp); // Safepoint operation and/or pending suspend request is in progress. @@ -1848,9 +1360,6 @@ // activation for use during deoptimization int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords; -#ifdef AARCH64 - extra_locals_size = align_up(extra_locals_size, StackAlignmentInBytes/BytesPerWord); -#endif // AARCH64 return extra_locals_size; } @@ -1863,11 +1372,7 @@ //------------------------------generate_deopt_blob---------------------------- void SharedRuntime::generate_deopt_blob() { ResourceMark rm; -#ifdef AARCH64 - CodeBuffer buffer("deopt_blob", 1024+256, 1); -#else CodeBuffer buffer("deopt_blob", 1024, 1024); -#endif int frame_size_in_words; OopMapSet* oop_maps; int reexecute_offset; @@ -1876,9 +1381,9 @@ MacroAssembler* masm = new MacroAssembler(&buffer); Label cont; - const Register Rkind = AARCH64_ONLY(R21) NOT_AARCH64(R9); // caller-saved on 32bit - const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6); - const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11); + const Register Rkind = R9; // caller-saved + const Register Rublock = R6; + const Register Rsender = altFP_7_11; assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp); address start = __ pc(); @@ -1968,9 +1473,7 @@ // This frame is going away. Fetch return value, so we can move it to // a new frame. __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize)); -#ifndef AARCH64 __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize)); -#endif // !AARCH64 #ifndef __SOFTFP__ __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize)); #endif @@ -1982,34 +1485,7 @@ __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); -#ifdef AARCH64 - // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller. - // They are needed for correct stack walking during stack overflow handling. - // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block). - __ sub(Rtemp, Rtemp, 2*wordSize); - __ add(SP, SP, Rtemp, ex_uxtx); - __ raw_pop(FP, LR); - -#ifdef ASSERT - { Label L; - __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); - __ cmp(FP, Rtemp); - __ b(L, eq); - __ stop("FP restored from deoptimized frame does not match FP stored in unroll block"); - __ bind(L); - } - { Label L; - __ ldr(Rtemp, Address(R2)); - __ cmp(LR, Rtemp); - __ b(L, eq); - __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block"); - __ bind(L); - } -#endif // ASSERT - -#else __ add(SP, SP, Rtemp); -#endif // AARCH64 #ifdef ASSERT // Compilers generate code that bang the stack by as much as the @@ -2017,7 +1493,6 @@ // trigger a fault. Verify that it does not on non product builds. // See if it is enough stack to push deoptimized frames if (UseStackBanging) { -#ifndef AARCH64 // The compiled method that we are deoptimizing was popped from the stack. // If the stack bang results in a stack overflow, we don't return to the // method that is being deoptimized. The stack overflow exception is @@ -2025,14 +1500,12 @@ // from the caller in LR and restore FP. __ ldr(LR, Address(R2, 0)); __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); -#endif // !AARCH64 __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); __ arm_stack_overflow_check(R8, Rtemp); } #endif __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); -#ifndef AARCH64 // Pick up the initial fp we should save // XXX Note: was ldr(FP, Address(FP)); @@ -2044,15 +1517,10 @@ // Deoptimization::fetch_unroll_info computes the right FP value and // stores it in Rublock.initial_info. This has been activated for ARM. __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); -#endif // !AARCH64 __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes())); __ mov(Rsender, SP); -#ifdef AARCH64 - __ sub(SP, SP, Rtemp, ex_uxtx); -#else __ sub(SP, SP, Rtemp); -#endif // AARCH64 // Push interpreter frames in a loop Label loop; @@ -2064,19 +1532,11 @@ __ mov(FP, SP); __ sub(Rtemp, Rtemp, 2*wordSize); -#ifdef AARCH64 - __ sub(SP, SP, Rtemp, ex_uxtx); -#else __ sub(SP, SP, Rtemp); -#endif // AARCH64 __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); -#ifdef AARCH64 - __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); -#else __ mov(LR, 0); __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); -#endif // AARCH64 __ subs(R8, R8, 1); // decrement counter __ mov(Rsender, SP); @@ -2090,15 +1550,12 @@ // Restore frame locals after moving the frame __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize)); -#ifndef AARCH64 __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize)); -#endif // !AARCH64 #ifndef __SOFTFP__ __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize)); #endif // !__SOFTFP__ -#ifndef AARCH64 #ifdef ASSERT // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved) { Label L; @@ -2109,7 +1566,6 @@ __ bind(L); } #endif -#endif // Call unpack_frames with proper arguments __ mov(R0, Rthread); @@ -2126,9 +1582,7 @@ // Collect return values, pop self-frame and jump to interpreter __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize)); -#ifndef AARCH64 __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize)); -#endif // !AARCH64 // Interpreter floats controlled by __SOFTFP__, but compiler // float return value registers controlled by __ABI_HARD__ // This matters for vfp-sflt builds. @@ -2145,12 +1599,7 @@ #endif // !__SOFTFP__ __ mov(SP, FP); -#ifdef AARCH64 - __ raw_pop(FP, LR); - __ ret(); -#else __ pop(RegisterSet(FP) | RegisterSet(PC)); -#endif // AARCH64 __ flush(); @@ -2179,8 +1628,8 @@ #endif // bypassed when code generation useless MacroAssembler* masm = new MacroAssembler(&buffer); - const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6); - const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11); + const Register Rublock = R6; + const Register Rsender = altFP_7_11; assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp); // @@ -2236,34 +1685,7 @@ __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); -#ifdef AARCH64 - // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller. - // They are needed for correct stack walking during stack overflow handling. - // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block). - __ sub(Rtemp, Rtemp, 2*wordSize); - __ add(SP, SP, Rtemp, ex_uxtx); - __ raw_pop(FP, LR); - -#ifdef ASSERT - { Label L; - __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); - __ cmp(FP, Rtemp); - __ b(L, eq); - __ stop("FP restored from deoptimized frame does not match FP stored in unroll block"); - __ bind(L); - } - { Label L; - __ ldr(Rtemp, Address(R2)); - __ cmp(LR, Rtemp); - __ b(L, eq); - __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block"); - __ bind(L); - } -#endif // ASSERT - -#else __ add(SP, SP, Rtemp); -#endif //AARCH64 // See if it is enough stack to push deoptimized frames #ifdef ASSERT @@ -2271,7 +1693,6 @@ // interpreter would need. So this stack banging should never // trigger a fault. Verify that it does not on non product builds. if (UseStackBanging) { -#ifndef AARCH64 // The compiled method that we are deoptimizing was popped from the stack. // If the stack bang results in a stack overflow, we don't return to the // method that is being deoptimized. The stack overflow exception is @@ -2279,7 +1700,6 @@ // from the caller in LR and restore FP. __ ldr(LR, Address(R2, 0)); __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); -#endif // !AARCH64 __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); __ arm_stack_overflow_check(R8, Rtemp); } @@ -2287,15 +1707,9 @@ __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes())); __ mov(Rsender, SP); -#ifdef AARCH64 - __ sub(SP, SP, Rtemp, ex_uxtx); -#else __ sub(SP, SP, Rtemp); -#endif -#ifndef AARCH64 // __ ldr(FP, Address(FP)); __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); -#endif // AARCH64 // Push interpreter frames in a loop Label loop; @@ -2307,19 +1721,11 @@ __ mov(FP, SP); __ sub(Rtemp, Rtemp, 2*wordSize); -#ifdef AARCH64 - __ sub(SP, SP, Rtemp, ex_uxtx); -#else __ sub(SP, SP, Rtemp); -#endif // AARCH64 __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); -#ifdef AARCH64 - __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); -#else __ mov(LR, 0); __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); -#endif // AARCH64 __ subs(R8, R8, 1); // decrement counter __ mov(Rsender, SP); __ b(loop, ne); @@ -2338,12 +1744,7 @@ __ reset_last_Java_frame(Rtemp); __ mov(SP, FP); -#ifdef AARCH64 - __ raw_pop(FP, LR); - __ ret(); -#else __ pop(RegisterSet(FP) | RegisterSet(PC)); -#endif masm->flush(); _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */); @@ -2372,12 +1773,8 @@ oop_maps = new OopMapSet(); if (!cause_return) { -#ifdef AARCH64 - __ raw_push(LR, LR); -#else __ sub(SP, SP, 4); // make room for LR which may still be live // here if we are coming from a c2 method -#endif // AARCH64 } OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return); @@ -2402,20 +1799,6 @@ __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset())); __ cmp(Rtemp, 0); -#ifdef AARCH64 - RegisterSaver::restore_live_registers(masm, cause_return); - Register ret_addr = cause_return ? LR : Rtemp; - if (!cause_return) { - __ raw_pop(FP, ret_addr); - } - - Label throw_exception; - __ b(throw_exception, ne); - __ br(ret_addr); - - __ bind(throw_exception); - __ mov(Rexception_pc, ret_addr); -#else // AARCH64 if (!cause_return) { RegisterSaver::restore_live_registers(masm, false); __ pop(PC, eq); @@ -2425,7 +1808,6 @@ __ bx(LR, eq); __ mov(Rexception_pc, LR); } -#endif // AARCH64 __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp); --- old/src/hotspot/cpu/arm/stubGenerator_arm.cpp 2018-09-17 10:30:37.793819090 -0400 +++ new/src/hotspot/cpu/arm/stubGenerator_arm.cpp 2018-09-17 10:30:37.160782116 -0400 @@ -85,21 +85,13 @@ // Hard coded choices (XXX: could be changed to a command line option) #define ArmCopyPlatform DEFAULT_ARRAYCOPY_CONFIG -#ifdef AARCH64 -#define ArmCopyCacheLineSize 64 -#else #define ArmCopyCacheLineSize 32 // not worth optimizing to 64 according to measured gains -#endif // AARCH64 - -// TODO-AARCH64: tune and revise AArch64 arraycopy optimizations // configuration for each kind of loop typedef struct { int pld_distance; // prefetch distance (0 => no prefetch, <0: prefetch_before); -#ifndef AARCH64 bool split_ldm; // if true, split each STM in STMs with fewer registers bool split_stm; // if true, split each LTM in LTMs with fewer registers -#endif // !AARCH64 } arraycopy_loop_config; // configuration for all loops @@ -114,14 +106,6 @@ // configured platforms static arraycopy_platform_config arraycopy_configurations[] = { // configuration parameters for arraycopy loops -#ifdef AARCH64 - { - {-256 }, // forward aligned - {-128 }, // backward aligned - {-256 }, // forward shifted - {-128 } // backward shifted - } -#else // Configurations were chosen based on manual analysis of benchmark // results, minimizing overhead with respect to best results on the @@ -171,7 +155,6 @@ {-160, false, false }, // forward shifted {-160, true, true } // backward shifted } -#endif // AARCH64 }; class StubGenerator: public StubCodeGenerator { @@ -190,100 +173,6 @@ StubCodeMark mark(this, "StubRoutines", "call_stub"); address start = __ pc(); -#ifdef AARCH64 - const int saved_regs_size = 192; - - __ stp(FP, LR, Address(SP, -saved_regs_size, pre_indexed)); - __ mov(FP, SP); - - int sp_offset = 16; - assert(frame::entry_frame_call_wrapper_offset * wordSize == sp_offset, "adjust this code"); - __ stp(R0, ZR, Address(SP, sp_offset)); sp_offset += 16; - - const int saved_result_and_result_type_offset = sp_offset; - __ stp(R1, R2, Address(SP, sp_offset)); sp_offset += 16; - __ stp(R19, R20, Address(SP, sp_offset)); sp_offset += 16; - __ stp(R21, R22, Address(SP, sp_offset)); sp_offset += 16; - __ stp(R23, R24, Address(SP, sp_offset)); sp_offset += 16; - __ stp(R25, R26, Address(SP, sp_offset)); sp_offset += 16; - __ stp(R27, R28, Address(SP, sp_offset)); sp_offset += 16; - - __ stp_d(V8, V9, Address(SP, sp_offset)); sp_offset += 16; - __ stp_d(V10, V11, Address(SP, sp_offset)); sp_offset += 16; - __ stp_d(V12, V13, Address(SP, sp_offset)); sp_offset += 16; - __ stp_d(V14, V15, Address(SP, sp_offset)); sp_offset += 16; - assert (sp_offset == saved_regs_size, "adjust this code"); - - __ mov(Rmethod, R3); - __ mov(Rthread, R7); - __ reinit_heapbase(); - - { // Pass parameters - Label done_parameters, pass_parameters; - - __ mov(Rparams, SP); - __ cbz_w(R6, done_parameters); - - __ sub(Rtemp, SP, R6, ex_uxtw, LogBytesPerWord); - __ align_reg(SP, Rtemp, StackAlignmentInBytes); - __ add(Rparams, SP, R6, ex_uxtw, LogBytesPerWord); - - __ bind(pass_parameters); - __ subs_w(R6, R6, 1); - __ ldr(Rtemp, Address(R5, wordSize, post_indexed)); - __ str(Rtemp, Address(Rparams, -wordSize, pre_indexed)); - __ b(pass_parameters, ne); - - __ bind(done_parameters); - -#ifdef ASSERT - { - Label L; - __ cmp(SP, Rparams); - __ b(L, eq); - __ stop("SP does not match Rparams"); - __ bind(L); - } -#endif - } - - __ mov(Rsender_sp, SP); - __ blr(R4); - return_address = __ pc(); - - __ mov(SP, FP); - - __ ldp(R1, R2, Address(SP, saved_result_and_result_type_offset)); - - { // Handle return value - Label cont; - __ str(R0, Address(R1)); - - __ cmp_w(R2, T_DOUBLE); - __ ccmp_w(R2, T_FLOAT, Assembler::flags_for_condition(eq), ne); - __ b(cont, ne); - - __ str_d(V0, Address(R1)); - __ bind(cont); - } - - sp_offset = saved_result_and_result_type_offset + 16; - __ ldp(R19, R20, Address(SP, sp_offset)); sp_offset += 16; - __ ldp(R21, R22, Address(SP, sp_offset)); sp_offset += 16; - __ ldp(R23, R24, Address(SP, sp_offset)); sp_offset += 16; - __ ldp(R25, R26, Address(SP, sp_offset)); sp_offset += 16; - __ ldp(R27, R28, Address(SP, sp_offset)); sp_offset += 16; - - __ ldp_d(V8, V9, Address(SP, sp_offset)); sp_offset += 16; - __ ldp_d(V10, V11, Address(SP, sp_offset)); sp_offset += 16; - __ ldp_d(V12, V13, Address(SP, sp_offset)); sp_offset += 16; - __ ldp_d(V14, V15, Address(SP, sp_offset)); sp_offset += 16; - assert (sp_offset == saved_regs_size, "adjust this code"); - - __ ldp(FP, LR, Address(SP, saved_regs_size, post_indexed)); - __ ret(); - -#else // AARCH64 assert(frame::entry_frame_call_wrapper_offset == 0, "adjust this code"); @@ -358,7 +247,6 @@ #endif __ pop(RegisterSet(FP) | RegisterSet(PC)); -#endif // AARCH64 return start; } @@ -406,7 +294,6 @@ } -#ifndef AARCH64 // Integer division shared routine // Input: @@ -795,7 +682,6 @@ } -#endif // AARCH64 #ifdef COMPILER2 // Support for uint StubRoutine::Arm::partial_subtype_check( Klass sub, Klass super ); @@ -883,12 +769,7 @@ // Return failure __ bind(L_fail); -#ifdef AARCH64 - // count_temp is 0, can't use ZR here - __ adds(R0, count_temp, 1); // sets the flags -#else __ movs(R0, 1); // sets the flags -#endif __ raw_pop(saved_set); __ ret(); } @@ -925,11 +806,7 @@ Label exit, error; InlinedAddress verify_oop_count((address) StubRoutines::verify_oop_count_addr()); -#ifdef AARCH64 - __ mrs(flags, Assembler::SysReg_NZCV); -#else __ mrs(Assembler::CPSR, flags); -#endif // AARCH64 __ ldr_literal(tmp1, verify_oop_count); __ ldr_s32(tmp2, Address(tmp1)); @@ -956,11 +833,7 @@ // return if everything seems ok __ bind(exit); -#ifdef AARCH64 - __ msr(Assembler::SysReg_NZCV, flags); -#else __ msr(Assembler::CPSR_f, flags); -#endif // AARCH64 __ ret(); @@ -1006,9 +879,7 @@ const Register to = R1; const Register count = R2; const Register to_from = tmp1; // to - from -#ifndef AARCH64 const Register byte_count = (log2_elem_size == 0) ? count : tmp2; // count << log2_elem_size -#endif // AARCH64 assert_different_registers(from, to, count, tmp1, tmp2); // no_overlap version works if 'to' lower (unsigned) than 'from' @@ -1016,114 +887,24 @@ BLOCK_COMMENT("Array Overlap Test:"); __ subs(to_from, to, from); -#ifndef AARCH64 if (log2_elem_size != 0) { __ mov(byte_count, AsmOperand(count, lsl, log2_elem_size)); } -#endif // !AARCH64 if (NOLp == NULL) __ b(no_overlap_target,lo); else __ b((*NOLp), lo); -#ifdef AARCH64 - __ subs(ZR, to_from, count, ex_sxtw, log2_elem_size); -#else __ cmp(to_from, byte_count); -#endif // AARCH64 if (NOLp == NULL) __ b(no_overlap_target, ge); else __ b((*NOLp), ge); } -#ifdef AARCH64 - // TODO-AARCH64: revise usages of bulk_* methods (probably ldp`s and stp`s should interlace) - - // Loads [from, from + count*wordSize) into regs[0], regs[1], ..., regs[count-1] - // and increases 'from' by count*wordSize. - void bulk_load_forward(Register from, const Register regs[], int count) { - assert (count > 0 && count % 2 == 0, "count must be positive even number"); - int bytes = count * wordSize; - - int offset = 0; - __ ldp(regs[0], regs[1], Address(from, bytes, post_indexed)); - offset += 2*wordSize; - - for (int i = 2; i < count; i += 2) { - __ ldp(regs[i], regs[i+1], Address(from, -bytes + offset)); - offset += 2*wordSize; - } - - assert (offset == bytes, "must be"); - } - - // Stores regs[0], regs[1], ..., regs[count-1] to [to, to + count*wordSize) - // and increases 'to' by count*wordSize. - void bulk_store_forward(Register to, const Register regs[], int count) { - assert (count > 0 && count % 2 == 0, "count must be positive even number"); - int bytes = count * wordSize; - int offset = 0; - __ stp(regs[0], regs[1], Address(to, bytes, post_indexed)); - offset += 2*wordSize; - - for (int i = 2; i < count; i += 2) { - __ stp(regs[i], regs[i+1], Address(to, -bytes + offset)); - offset += 2*wordSize; - } - - assert (offset == bytes, "must be"); - } - - // Loads [from - count*wordSize, from) into regs[0], regs[1], ..., regs[count-1] - // and decreases 'from' by count*wordSize. - // Note that the word with lowest address goes to regs[0]. - void bulk_load_backward(Register from, const Register regs[], int count) { - assert (count > 0 && count % 2 == 0, "count must be positive even number"); - int bytes = count * wordSize; - - int offset = 0; - - for (int i = count - 2; i > 0; i -= 2) { - offset += 2*wordSize; - __ ldp(regs[i], regs[i+1], Address(from, -offset)); - } - - offset += 2*wordSize; - __ ldp(regs[0], regs[1], Address(from, -bytes, pre_indexed)); - - assert (offset == bytes, "must be"); - } - - // Stores regs[0], regs[1], ..., regs[count-1] into [to - count*wordSize, to) - // and decreases 'to' by count*wordSize. - // Note that regs[0] value goes into the memory with lowest address. - void bulk_store_backward(Register to, const Register regs[], int count) { - assert (count > 0 && count % 2 == 0, "count must be positive even number"); - int bytes = count * wordSize; - - int offset = 0; - - for (int i = count - 2; i > 0; i -= 2) { - offset += 2*wordSize; - __ stp(regs[i], regs[i+1], Address(to, -offset)); - } - - offset += 2*wordSize; - __ stp(regs[0], regs[1], Address(to, -bytes, pre_indexed)); - - assert (offset == bytes, "must be"); - } -#endif // AARCH64 - - // TODO-AARCH64: rearrange in-loop prefetches: // probably we should choose between "prefetch-store before or after store", not "before or after load". void prefetch(Register from, Register to, int offset, int to_delta = 0) { __ prefetch_read(Address(from, offset)); -#ifdef AARCH64 - // Next line commented out to avoid significant loss of performance in memory copy - JDK-8078120 - // __ prfm(pstl1keep, Address(to, offset + to_delta)); -#endif // AARCH64 } // Generate the inner loop for forward aligned array copy @@ -1137,14 +918,14 @@ // Return the minimum initial value for count // // Notes: - // - 'from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA, required for AArch64) + // - 'from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA) // - 'to' aligned on wordSize // - 'count' must be greater or equal than the returned value // // Increases 'from' and 'to' by count*bytes_per_count. // // Scratches 'count', R3. - // On AArch64 also scratches R4-R10; on 32-bit ARM R4-R10 are preserved (saved/restored). + // R4-R10 are preserved (saved/restored). // int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count) { assert (from == R0 && to == R1 && count == R2, "adjust the implementation below"); @@ -1154,7 +935,6 @@ int pld_offset = config->pld_distance; const int count_per_loop = bytes_per_loop / bytes_per_count; -#ifndef AARCH64 bool split_read= config->split_ldm; bool split_write= config->split_stm; @@ -1167,7 +947,6 @@ // BGE NEONCopyPLD __ push(RegisterSet(R4,R10)); -#endif // !AARCH64 const bool prefetch_before = pld_offset < 0; const bool prefetch_after = pld_offset > 0; @@ -1200,12 +979,7 @@ }; } -#ifdef AARCH64 - const Register data_regs[8] = {R3, R4, R5, R6, R7, R8, R9, R10}; -#endif // AARCH64 { - // LDM (32-bit ARM) / LDP (AArch64) copy of 'bytes_per_loop' bytes - // 32-bit ARM note: we have tried implementing loop unrolling to skip one // PLD with 64 bytes cache line but the gain was not significant. @@ -1218,9 +992,6 @@ __ BIND(L_skip_pld); } -#ifdef AARCH64 - bulk_load_forward(from, data_regs, 8); -#else if (split_read) { // Split the register set in two sets so that there is less // latency between LDM and STM (R3-R6 available while R7-R10 @@ -1231,7 +1002,6 @@ } else { __ ldmia(from, RegisterSet(R3, R10), writeback); } -#endif // AARCH64 __ subs_32(count, count, count_per_loop); @@ -1239,16 +1009,12 @@ prefetch(from, to, pld_offset, bytes_per_loop); } -#ifdef AARCH64 - bulk_store_forward(to, data_regs, 8); -#else if (split_write) { __ stmia(to, RegisterSet(R3, R6), writeback); __ stmia(to, RegisterSet(R7, R10), writeback); } else { __ stmia(to, RegisterSet(R3, R10), writeback); } -#endif // AARCH64 __ b(L_copy_loop, ge); @@ -1264,70 +1030,6 @@ // __ add(count, count, ...); // addition useless for the bit tests assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits"); -#ifdef AARCH64 - assert (bytes_per_loop == 64, "adjust the code below"); - assert (bytes_per_count <= 8, "adjust the code below"); - - { - Label L; - __ tbz(count, exact_log2(32/bytes_per_count), L); - - bulk_load_forward(from, data_regs, 4); - bulk_store_forward(to, data_regs, 4); - - __ bind(L); - } - - { - Label L; - __ tbz(count, exact_log2(16/bytes_per_count), L); - - bulk_load_forward(from, data_regs, 2); - bulk_store_forward(to, data_regs, 2); - - __ bind(L); - } - - { - Label L; - __ tbz(count, exact_log2(8/bytes_per_count), L); - - __ ldr(R3, Address(from, 8, post_indexed)); - __ str(R3, Address(to, 8, post_indexed)); - - __ bind(L); - } - - if (bytes_per_count <= 4) { - Label L; - __ tbz(count, exact_log2(4/bytes_per_count), L); - - __ ldr_w(R3, Address(from, 4, post_indexed)); - __ str_w(R3, Address(to, 4, post_indexed)); - - __ bind(L); - } - - if (bytes_per_count <= 2) { - Label L; - __ tbz(count, exact_log2(2/bytes_per_count), L); - - __ ldrh(R3, Address(from, 2, post_indexed)); - __ strh(R3, Address(to, 2, post_indexed)); - - __ bind(L); - } - - if (bytes_per_count <= 1) { - Label L; - __ tbz(count, 0, L); - - __ ldrb(R3, Address(from, 1, post_indexed)); - __ strb(R3, Address(to, 1, post_indexed)); - - __ bind(L); - } -#else __ tst(count, 16 / bytes_per_count); __ ldmia(from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes __ stmia(to, RegisterSet(R3, R6), writeback, ne); @@ -1355,7 +1057,6 @@ } __ pop(RegisterSet(R4,R10)); -#endif // AARCH64 return count_per_loop; } @@ -1372,14 +1073,14 @@ // Return the minimum initial value for count // // Notes: - // - 'end_from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA, required for AArch64) + // - 'end_from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA) // - 'end_to' aligned on wordSize // - 'count' must be greater or equal than the returned value // // Decreases 'end_from' and 'end_to' by count*bytes_per_count. // // Scratches 'count', R3. - // On AArch64 also scratches R4-R10; on 32-bit ARM R4-R10 are preserved (saved/restored). + // ARM R4-R10 are preserved (saved/restored). // int generate_backward_aligned_copy_loop(Register end_from, Register end_to, Register count, int bytes_per_count) { assert (end_from == R0 && end_to == R1 && count == R2, "adjust the implementation below"); @@ -1390,14 +1091,12 @@ arraycopy_loop_config *config=&arraycopy_configurations[ArmCopyPlatform].backward_aligned; int pld_offset = config->pld_distance; -#ifndef AARCH64 bool split_read= config->split_ldm; bool split_write= config->split_stm; // See the forward copy variant for additional comments. __ push(RegisterSet(R4,R10)); -#endif // !AARCH64 __ sub_32(count, count, count_per_loop); @@ -1423,12 +1122,7 @@ }; } -#ifdef AARCH64 - const Register data_regs[8] = {R3, R4, R5, R6, R7, R8, R9, R10}; -#endif // AARCH64 { - // LDM (32-bit ARM) / LDP (AArch64) copy of 'bytes_per_loop' bytes - // 32-bit ARM note: we have tried implementing loop unrolling to skip one // PLD with 64 bytes cache line but the gain was not significant. @@ -1441,16 +1135,12 @@ __ BIND(L_skip_pld); } -#ifdef AARCH64 - bulk_load_backward(end_from, data_regs, 8); -#else if (split_read) { __ ldmdb(end_from, RegisterSet(R7, R10), writeback); __ ldmdb(end_from, RegisterSet(R3, R6), writeback); } else { __ ldmdb(end_from, RegisterSet(R3, R10), writeback); } -#endif // AARCH64 __ subs_32(count, count, count_per_loop); @@ -1458,16 +1148,12 @@ prefetch(end_from, end_to, -(wordSize + pld_offset), -bytes_per_loop); } -#ifdef AARCH64 - bulk_store_backward(end_to, data_regs, 8); -#else if (split_write) { __ stmdb(end_to, RegisterSet(R7, R10), writeback); __ stmdb(end_to, RegisterSet(R3, R6), writeback); } else { __ stmdb(end_to, RegisterSet(R3, R10), writeback); } -#endif // AARCH64 __ b(L_copy_loop, ge); @@ -1482,70 +1168,6 @@ // __ add(count, count, ...); // addition useless for the bit tests assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits"); -#ifdef AARCH64 - assert (bytes_per_loop == 64, "adjust the code below"); - assert (bytes_per_count <= 8, "adjust the code below"); - - { - Label L; - __ tbz(count, exact_log2(32/bytes_per_count), L); - - bulk_load_backward(end_from, data_regs, 4); - bulk_store_backward(end_to, data_regs, 4); - - __ bind(L); - } - - { - Label L; - __ tbz(count, exact_log2(16/bytes_per_count), L); - - bulk_load_backward(end_from, data_regs, 2); - bulk_store_backward(end_to, data_regs, 2); - - __ bind(L); - } - - { - Label L; - __ tbz(count, exact_log2(8/bytes_per_count), L); - - __ ldr(R3, Address(end_from, -8, pre_indexed)); - __ str(R3, Address(end_to, -8, pre_indexed)); - - __ bind(L); - } - - if (bytes_per_count <= 4) { - Label L; - __ tbz(count, exact_log2(4/bytes_per_count), L); - - __ ldr_w(R3, Address(end_from, -4, pre_indexed)); - __ str_w(R3, Address(end_to, -4, pre_indexed)); - - __ bind(L); - } - - if (bytes_per_count <= 2) { - Label L; - __ tbz(count, exact_log2(2/bytes_per_count), L); - - __ ldrh(R3, Address(end_from, -2, pre_indexed)); - __ strh(R3, Address(end_to, -2, pre_indexed)); - - __ bind(L); - } - - if (bytes_per_count <= 1) { - Label L; - __ tbz(count, 0, L); - - __ ldrb(R3, Address(end_from, -1, pre_indexed)); - __ strb(R3, Address(end_to, -1, pre_indexed)); - - __ bind(L); - } -#else __ tst(count, 16 / bytes_per_count); __ ldmdb(end_from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes __ stmdb(end_to, RegisterSet(R3, R6), writeback, ne); @@ -1573,7 +1195,6 @@ } __ pop(RegisterSet(R4,R10)); -#endif // AARCH64 return count_per_loop; } @@ -1581,7 +1202,7 @@ // Generate the inner loop for shifted forward array copy (unaligned copy). // It can be used when bytes_per_count < wordSize, i.e. - // byte/short copy on 32-bit ARM, byte/short/int/compressed-oop copy on AArch64. + // byte/short copy // // Arguments // from: start src address, 64 bits aligned @@ -1594,11 +1215,11 @@ // Return the minimum initial value for count // // Notes: - // - 'from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA, required for AArch64) + // - 'from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA) // - 'to' aligned on wordSize // - 'count' must be greater or equal than the returned value // - 'lsr_shift' + 'lsl_shift' = BitsPerWord - // - 'bytes_per_count' is 1 or 2 on 32-bit ARM; 1, 2 or 4 on AArch64 + // - 'bytes_per_count' is 1 or 2 // // Increases 'to' by count*bytes_per_count. // @@ -1622,10 +1243,8 @@ arraycopy_loop_config *config=&arraycopy_configurations[ArmCopyPlatform].forward_shifted; int pld_offset = config->pld_distance; -#ifndef AARCH64 bool split_read= config->split_ldm; bool split_write= config->split_stm; -#endif // !AARCH64 const bool prefetch_before = pld_offset < 0; const bool prefetch_after = pld_offset > 0; @@ -1666,12 +1285,6 @@ __ b(L_last_read, lt); } -#ifdef AARCH64 - const Register data_regs[9] = {R3, R4, R5, R6, R7, R8, R9, R10, R12}; - __ logical_shift_right(R3, R12, lsr_shift); // part of R12 not yet written - __ subs_32(count, count, count_per_loop); - bulk_load_forward(from, &data_regs[1], 8); -#else // read 32 bytes if (split_read) { // if write is not split, use less registers in first set to reduce locking @@ -1686,7 +1299,6 @@ __ ldmia(from, RegisterSet(R4, R10) | R12, writeback); // Note: small latency on R4 __ subs(count, count, count_per_loop); } -#endif // AARCH64 if (prefetch_after) { // do it after the 1st ldm/ldp anyway (no locking issues with early STM/STP) @@ -1701,12 +1313,10 @@ __ orr(R5, R5, AsmOperand(R6, lsl, lsl_shift)); __ logical_shift_right(R6, R6, lsr_shift); __ orr(R6, R6, AsmOperand(R7, lsl, lsl_shift)); -#ifndef AARCH64 if (split_write) { // write the first half as soon as possible to reduce stm locking __ stmia(to, RegisterSet(R3, R6), writeback, prefetch_before ? gt : ge); } -#endif // !AARCH64 __ logical_shift_right(R7, R7, lsr_shift); __ orr(R7, R7, AsmOperand(R8, lsl, lsl_shift)); __ logical_shift_right(R8, R8, lsr_shift); @@ -1716,23 +1326,17 @@ __ logical_shift_right(R10, R10, lsr_shift); __ orr(R10, R10, AsmOperand(R12, lsl, lsl_shift)); -#ifdef AARCH64 - bulk_store_forward(to, data_regs, 8); -#else if (split_write) { __ stmia(to, RegisterSet(R7, R10), writeback, prefetch_before ? gt : ge); } else { __ stmia(to, RegisterSet(R3, R10), writeback, prefetch_before ? gt : ge); } -#endif // AARCH64 __ b(L_shifted_loop, gt); // no need to loop if 0 (when count need not be precise modulo bytes_per_loop) if (prefetch_before) { // the first loop may end earlier, allowing to skip pld at the end __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count); -#ifndef AARCH64 __ stmia(to, RegisterSet(R3, R10), writeback); // stmia was skipped -#endif // !AARCH64 __ b(L_skip_pld, ge); __ adds_32(count, count, ((bytes_per_loop + pld_offset) / bytes_per_count) + count_per_loop); } @@ -1740,90 +1344,6 @@ __ BIND(L_last_read); __ b(L_done, eq); -#ifdef AARCH64 - assert(bytes_per_count < 8, "adjust the code below"); - - __ logical_shift_right(R3, R12, lsr_shift); - - { - Label L; - __ tbz(count, exact_log2(32/bytes_per_count), L); - bulk_load_forward(from, &data_regs[1], 4); - __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift)); - __ logical_shift_right(R4, R4, lsr_shift); - __ orr(R4, R4, AsmOperand(R5, lsl, lsl_shift)); - __ logical_shift_right(R5, R5, lsr_shift); - __ orr(R5, R5, AsmOperand(R6, lsl, lsl_shift)); - __ logical_shift_right(R6, R6, lsr_shift); - __ orr(R6, R6, AsmOperand(R7, lsl, lsl_shift)); - bulk_store_forward(to, data_regs, 4); - __ logical_shift_right(R3, R7, lsr_shift); - __ bind(L); - } - - { - Label L; - __ tbz(count, exact_log2(16/bytes_per_count), L); - bulk_load_forward(from, &data_regs[1], 2); - __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift)); - __ logical_shift_right(R4, R4, lsr_shift); - __ orr(R4, R4, AsmOperand(R5, lsl, lsl_shift)); - bulk_store_forward(to, data_regs, 2); - __ logical_shift_right(R3, R5, lsr_shift); - __ bind(L); - } - - { - Label L; - __ tbz(count, exact_log2(8/bytes_per_count), L); - __ ldr(R4, Address(from, 8, post_indexed)); - __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift)); - __ str(R3, Address(to, 8, post_indexed)); - __ logical_shift_right(R3, R4, lsr_shift); - __ bind(L); - } - - const int have_bytes = lsl_shift/BitsPerByte; // number of already read bytes in R3 - - // It remains less than wordSize to write. - // Do not check count if R3 already has maximal number of loaded elements (one less than wordSize). - if (have_bytes < wordSize - bytes_per_count) { - Label L; - __ andr(count, count, (uintx)(8/bytes_per_count-1)); // make count exact - __ cmp_32(count, have_bytes/bytes_per_count); // do we have enough bytes to store? - __ b(L, le); - __ ldr(R4, Address(from, 8, post_indexed)); - __ orr(R3, R3, AsmOperand(R4, lsl, lsl_shift)); - __ bind(L); - } - - { - Label L; - __ tbz(count, exact_log2(4/bytes_per_count), L); - __ str_w(R3, Address(to, 4, post_indexed)); - if (bytes_per_count < 4) { - __ logical_shift_right(R3, R3, 4*BitsPerByte); - } - __ bind(L); - } - - if (bytes_per_count <= 2) { - Label L; - __ tbz(count, exact_log2(2/bytes_per_count), L); - __ strh(R3, Address(to, 2, post_indexed)); - if (bytes_per_count < 2) { - __ logical_shift_right(R3, R3, 2*BitsPerByte); - } - __ bind(L); - } - - if (bytes_per_count <= 1) { - Label L; - __ tbz(count, exact_log2(1/bytes_per_count), L); - __ strb(R3, Address(to, 1, post_indexed)); - __ bind(L); - } -#else switch (bytes_per_count) { case 2: __ mov(R3, AsmOperand(R12, lsr, lsr_shift)); @@ -1906,7 +1426,6 @@ __ strb(R3, Address(to, 1, post_indexed), ne); // one last byte break; } -#endif // AARCH64 __ BIND(L_done); return 0; // no minimum @@ -1914,7 +1433,7 @@ // Generate the inner loop for shifted backward array copy (unaligned copy). // It can be used when bytes_per_count < wordSize, i.e. - // byte/short copy on 32-bit ARM, byte/short/int/compressed-oop copy on AArch64. + // byte/short copy // // Arguments // end_from: end src address, 64 bits aligned @@ -1927,11 +1446,11 @@ // Return the minimum initial value for count // // Notes: - // - 'end_from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA, required for AArch64) + // - 'end_from' aligned on 64-bit (recommended for 32-bit ARM in case this speeds up LDMIA) // - 'end_to' aligned on wordSize // - 'count' must be greater or equal than the returned value // - 'lsr_shift' + 'lsl_shift' = 'BitsPerWord' - // - 'bytes_per_count' is 1 or 2 on 32-bit ARM; 1, 2 or 4 on AArch64 + // - 'bytes_per_count' is 1 or 2 on 32-bit ARM // // Decreases 'end_to' by count*bytes_per_count. // @@ -1955,10 +1474,8 @@ arraycopy_loop_config *config=&arraycopy_configurations[ArmCopyPlatform].backward_shifted; int pld_offset = config->pld_distance; -#ifndef AARCH64 bool split_read= config->split_ldm; bool split_write= config->split_stm; -#endif // !AARCH64 const bool prefetch_before = pld_offset < 0; @@ -2001,11 +1518,6 @@ __ b(L_last_read, lt); } -#ifdef AARCH64 - __ logical_shift_left(R12, R3, lsl_shift); - const Register data_regs[9] = {R3, R4, R5, R6, R7, R8, R9, R10, R12}; - bulk_load_backward(end_from, data_regs, 8); -#else if (split_read) { __ ldmdb(end_from, RegisterSet(R7, R10), writeback); __ mov(R12, AsmOperand(R3, lsl, lsl_shift)); // part of R3 not yet written @@ -2014,7 +1526,6 @@ __ mov(R12, AsmOperand(R3, lsl, lsl_shift)); // part of R3 not yet written __ ldmdb(end_from, RegisterSet(R3, R10), writeback); } -#endif // AARCH64 __ subs_32(count, count, count_per_loop); @@ -2034,35 +1545,27 @@ __ orr(R7, R7, AsmOperand(R6, lsr, lsr_shift)); __ logical_shift_left(R6, R6, lsl_shift); __ orr(R6, R6, AsmOperand(R5, lsr, lsr_shift)); -#ifndef AARCH64 if (split_write) { // store early to reduce locking issues __ stmdb(end_to, RegisterSet(R6, R10) | R12, writeback, prefetch_before ? gt : ge); } -#endif // !AARCH64 __ logical_shift_left(R5, R5, lsl_shift); __ orr(R5, R5, AsmOperand(R4, lsr, lsr_shift)); __ logical_shift_left(R4, R4, lsl_shift); __ orr(R4, R4, AsmOperand(R3, lsr, lsr_shift)); -#ifdef AARCH64 - bulk_store_backward(end_to, &data_regs[1], 8); -#else if (split_write) { __ stmdb(end_to, RegisterSet(R4, R5), writeback, prefetch_before ? gt : ge); } else { __ stmdb(end_to, RegisterSet(R4, R10) | R12, writeback, prefetch_before ? gt : ge); } -#endif // AARCH64 __ b(L_shifted_loop, gt); // no need to loop if 0 (when count need not be precise modulo bytes_per_loop) if (prefetch_before) { // the first loop may end earlier, allowing to skip pld at the end __ cmn_32(count, ((bytes_per_loop + pld_offset)/bytes_per_count)); -#ifndef AARCH64 __ stmdb(end_to, RegisterSet(R4, R10) | R12, writeback); // stmdb was skipped -#endif // !AARCH64 __ b(L_skip_pld, ge); __ adds_32(count, count, ((bytes_per_loop + pld_offset) / bytes_per_count) + count_per_loop); } @@ -2070,99 +1573,6 @@ __ BIND(L_last_read); __ b(L_done, eq); -#ifdef AARCH64 - assert(bytes_per_count < 8, "adjust the code below"); - - __ logical_shift_left(R12, R3, lsl_shift); - - { - Label L; - __ tbz(count, exact_log2(32/bytes_per_count), L); - bulk_load_backward(end_from, &data_regs[4], 4); - - __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift)); - __ logical_shift_left(R10, R10, lsl_shift); - __ orr(R10, R10, AsmOperand(R9, lsr, lsr_shift)); - __ logical_shift_left(R9, R9, lsl_shift); - __ orr(R9, R9, AsmOperand(R8, lsr, lsr_shift)); - __ logical_shift_left(R8, R8, lsl_shift); - __ orr(R8, R8, AsmOperand(R7, lsr, lsr_shift)); - - bulk_store_backward(end_to, &data_regs[5], 4); - __ logical_shift_left(R12, R7, lsl_shift); - __ bind(L); - } - - { - Label L; - __ tbz(count, exact_log2(16/bytes_per_count), L); - bulk_load_backward(end_from, &data_regs[6], 2); - - __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift)); - __ logical_shift_left(R10, R10, lsl_shift); - __ orr(R10, R10, AsmOperand(R9, lsr, lsr_shift)); - - bulk_store_backward(end_to, &data_regs[7], 2); - __ logical_shift_left(R12, R9, lsl_shift); - __ bind(L); - } - - { - Label L; - __ tbz(count, exact_log2(8/bytes_per_count), L); - __ ldr(R10, Address(end_from, -8, pre_indexed)); - __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift)); - __ str(R12, Address(end_to, -8, pre_indexed)); - __ logical_shift_left(R12, R10, lsl_shift); - __ bind(L); - } - - const int have_bytes = lsr_shift/BitsPerByte; // number of already read bytes in R12 - - // It remains less than wordSize to write. - // Do not check count if R12 already has maximal number of loaded elements (one less than wordSize). - if (have_bytes < wordSize - bytes_per_count) { - Label L; - __ andr(count, count, (uintx)(8/bytes_per_count-1)); // make count exact - __ cmp_32(count, have_bytes/bytes_per_count); // do we have enough bytes to store? - __ b(L, le); - __ ldr(R10, Address(end_from, -8, pre_indexed)); - __ orr(R12, R12, AsmOperand(R10, lsr, lsr_shift)); - __ bind(L); - } - - assert (bytes_per_count <= 4, "must be"); - - { - Label L; - __ tbz(count, exact_log2(4/bytes_per_count), L); - __ logical_shift_right(R9, R12, (wordSize-4)*BitsPerByte); - __ str_w(R9, Address(end_to, -4, pre_indexed)); // Write 4 MSB - if (bytes_per_count < 4) { - __ logical_shift_left(R12, R12, 4*BitsPerByte); // Promote remaining bytes to MSB - } - __ bind(L); - } - - if (bytes_per_count <= 2) { - Label L; - __ tbz(count, exact_log2(2/bytes_per_count), L); - __ logical_shift_right(R9, R12, (wordSize-2)*BitsPerByte); - __ strh(R9, Address(end_to, -2, pre_indexed)); // Write 2 MSB - if (bytes_per_count < 2) { - __ logical_shift_left(R12, R12, 2*BitsPerByte); // Promote remaining bytes to MSB - } - __ bind(L); - } - - if (bytes_per_count <= 1) { - Label L; - __ tbz(count, exact_log2(1/bytes_per_count), L); - __ logical_shift_right(R9, R12, (wordSize-1)*BitsPerByte); - __ strb(R9, Address(end_to, -1, pre_indexed)); // Write 1 MSB - __ bind(L); - } -#else switch(bytes_per_count) { case 2: __ mov(R12, AsmOperand(R3, lsl, lsl_shift)); // part of R3 not yet written @@ -2246,7 +1656,6 @@ break; } -#endif // AARCH64 __ BIND(L_done); return 0; // no minimum @@ -2261,27 +1670,6 @@ } } -#ifdef AARCH64 - // Loads one 'size_in_bytes'-sized value from 'from' in given direction, i.e. - // if forward: loads value at from and increases from by size - // if !forward: loads value at from-size_in_bytes and decreases from by size - void load_one(Register rd, Register from, int size_in_bytes, bool forward) { - assert_different_registers(from, rd); - Address addr = get_addr_with_indexing(from, size_in_bytes, forward); - __ load_sized_value(rd, addr, size_in_bytes, false); - } - - // Stores one 'size_in_bytes'-sized value to 'to' in given direction (see load_one) - void store_one(Register rd, Register to, int size_in_bytes, bool forward) { - assert_different_registers(to, rd); - Address addr = get_addr_with_indexing(to, size_in_bytes, forward); - __ store_sized_value(rd, addr, size_in_bytes); - } -#else - // load_one and store_one are the same as for AArch64 except for - // *) Support for condition execution - // *) Second value register argument for 8-byte values - void load_one(Register rd, Register from, int size_in_bytes, bool forward, AsmCondition cond = al, Register rd2 = noreg) { assert_different_registers(from, rd, rd2); if (size_in_bytes < 8) { @@ -2315,7 +1703,6 @@ } } } -#endif // AARCH64 // Copies data from 'from' to 'to' in specified direction to align 'from' by 64 bits. // (on 32-bit ARM 64-bit alignment is better for LDM). @@ -2336,36 +1723,6 @@ // Returns maximum number of bytes which may be copied. int align_src(Register from, Register to, Register count, Register tmp, int bytes_per_count, bool forward) { assert_different_registers(from, to, count, tmp); -#ifdef AARCH64 - // TODO-AARCH64: replace by simple loop? - Label Laligned_by_2, Laligned_by_4, Laligned_by_8; - - if (bytes_per_count == 1) { - __ tbz(from, 0, Laligned_by_2); - __ sub_32(count, count, 1); - load_one(tmp, from, 1, forward); - store_one(tmp, to, 1, forward); - } - - __ BIND(Laligned_by_2); - - if (bytes_per_count <= 2) { - __ tbz(from, 1, Laligned_by_4); - __ sub_32(count, count, 2/bytes_per_count); - load_one(tmp, from, 2, forward); - store_one(tmp, to, 2, forward); - } - - __ BIND(Laligned_by_4); - - if (bytes_per_count <= 4) { - __ tbz(from, 2, Laligned_by_8); - __ sub_32(count, count, 4/bytes_per_count); - load_one(tmp, from, 4, forward); - store_one(tmp, to, 4, forward); - } - __ BIND(Laligned_by_8); -#else // AARCH64 if (bytes_per_count < 8) { Label L_align_src; __ BIND(L_align_src); @@ -2378,7 +1735,6 @@ __ b(L_align_src, ne); // if bytes_per_count == 4, then 0 or 1 loop iterations are enough } } -#endif // AARCH64 return 7/bytes_per_count; } @@ -2398,19 +1754,6 @@ assert_different_registers(from, to, count, tmp); __ align(OptoLoopAlignment); -#ifdef AARCH64 - Label L_small_array_done, L_small_array_loop; - __ BIND(entry); - __ cbz_32(count, L_small_array_done); - - __ BIND(L_small_array_loop); - __ subs_32(count, count, 1); - load_one(tmp, from, bytes_per_count, forward); - store_one(tmp, to, bytes_per_count, forward); - __ b(L_small_array_loop, gt); - - __ BIND(L_small_array_done); -#else Label L_small_loop; __ BIND(L_small_loop); store_one(tmp, to, bytes_per_count, forward, al, tmp2); @@ -2418,7 +1761,6 @@ __ subs(count, count, 1); load_one(tmp, from, bytes_per_count, forward, ge, tmp2); __ b(L_small_loop, ge); -#endif // AARCH64 } // Aligns 'to' by reading one word from 'from' and writting its part to 'to'. @@ -2500,7 +1842,7 @@ assert (0 < to_remainder && to_remainder < wordSize, "to_remainder is invalid"); - const Register tmp = forward ? R3 : R12; // TODO-AARCH64: on cojoint_short R4 was used for tmp + const Register tmp = forward ? R3 : R12; assert_different_registers(from, to, count, Rval, tmp); int required_to_align = align_dst(to, count, Rval, tmp, to_remainder, bytes_per_count, forward); @@ -2534,7 +1876,7 @@ // shifts 'to' by the number of copied bytes // // Scratches 'from', 'count', R3 and R12. - // On AArch64 also scratches R4-R10, on 32-bit ARM saves them to use. + // R4-R10 saved for use. int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward) { const Register Rval = forward ? R12 : R3; // as generate_{forward,backward}_shifted_copy_loop expect @@ -2544,100 +1886,6 @@ // Note: if {seq} is a sequence of numbers, L{seq} means that if the execution reaches this point, // then the remainder of 'to' divided by wordSize is one of elements of {seq}. -#ifdef AARCH64 - // TODO-AARCH64: simplify, tune - - load_one(Rval, from, wordSize, forward); - - Label L_loop_finished; - - switch (bytes_per_count) { - case 4: - min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward); - break; - case 2: - { - Label L2, L4, L6; - - __ tbz(to, 1, L4); - __ tbz(to, 2, L2); - - __ BIND(L6); - int min_copy6 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 6, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L2); - int min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L4); - int min_copy4 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward); - - min_copy = MAX2(MAX2(min_copy2, min_copy4), min_copy6); - break; - } - case 1: - { - Label L1, L2, L3, L4, L5, L6, L7; - Label L15, L26; - Label L246; - - __ tbz(to, 0, L246); - __ tbz(to, 1, L15); - __ tbz(to, 2, L3); - - __ BIND(L7); - int min_copy7 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 7, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L246); - __ tbnz(to, 1, L26); - - __ BIND(L4); - int min_copy4 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 4, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L15); - __ tbz(to, 2, L1); - - __ BIND(L5); - int min_copy5 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 5, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L3); - int min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L26); - __ tbz(to, 2, L2); - - __ BIND(L6); - int min_copy6 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 6, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L1); - int min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L2); - int min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); - - - min_copy = MAX2(min_copy1, min_copy2); - min_copy = MAX2(min_copy, min_copy3); - min_copy = MAX2(min_copy, min_copy4); - min_copy = MAX2(min_copy, min_copy5); - min_copy = MAX2(min_copy, min_copy6); - min_copy = MAX2(min_copy, min_copy7); - break; - } - default: - ShouldNotReachHere(); - break; - } - __ BIND(L_loop_finished); - -#else __ push(RegisterSet(R4,R10)); load_one(Rval, from, wordSize, forward); @@ -2694,7 +1942,6 @@ } __ pop(RegisterSet(R4,R10)); -#endif // AARCH64 return min_copy; } @@ -2776,7 +2023,7 @@ Label L_small_array; __ cmp_32(count, small_copy_limit); - __ b(L_small_array, le); // TODO-AARCH64: le vs lt + __ b(L_small_array, le); // Otherwise proceed with large implementation. @@ -2864,7 +2111,7 @@ // if 'forward' then 'to' == upper bound, else 'to' == beginning of the modified region // count: total number of copied elements, 32-bit int // - // Blows all volatile (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) and 'to', 'count', 'tmp' registers. + // Blows all volatile R0-R3, Rtemp, LR) and 'to', 'count', 'tmp' registers. void oop_arraycopy_stub_epilogue_helper(Register to, Register count, Register tmp, bool status, bool forward, DecoratorSet decorators) { assert_different_registers(to, count, tmp); @@ -2883,12 +2130,7 @@ __ mov(R0, 0); // OK } -#ifdef AARCH64 - __ raw_pop(LR, ZR); - __ ret(); -#else __ pop(PC); -#endif // AARCH64 } @@ -2939,11 +2181,7 @@ const int callee_saved_regs = 3; // R0-R2 // LR is used later to save barrier args -#ifdef AARCH64 - __ raw_push(LR, ZR); -#else __ push(LR); -#endif // AARCH64 DecoratorSet decorators = IN_HEAP | IS_ARRAY; if (disjoint) { @@ -3021,13 +2259,8 @@ } if (!to_is_aligned) { - // !to_is_aligned <=> UseCompressedOops && AArch64 __ BIND(L_unaligned_dst); -#ifdef AARCH64 - assert (UseCompressedOops, "unaligned oop array copy may be requested only with UseCompressedOops"); -#else ShouldNotReachHere(); -#endif // AARCH64 int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward); assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count"); @@ -3060,10 +2293,6 @@ __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); -#ifdef AARCH64 - __ NOT_IMPLEMENTED(); - start = NULL; -#else const Register tmp = Rtemp; // bump this on entry, not on exit: @@ -3085,7 +2314,6 @@ __ jump(StubRoutines::_jshort_arraycopy, relocInfo::runtime_call_type, tmp, eq); __ jump(StubRoutines::_jbyte_arraycopy, relocInfo::runtime_call_type, tmp); -#endif return start; } @@ -3185,7 +2413,7 @@ // to: R1 // count: R2 treated as signed 32-bit int // ckoff: R3 (super_check_offset) - // ckval: R4 (AArch64) / SP[0] (32-bit ARM) (super_klass) + // ckval: R4 (super_klass) // ret: R0 zero for success; (-1^K) where K is partial transfer count (32-bit) // address generate_checkcast_copy(const char * name) { @@ -3200,7 +2428,7 @@ const Register R3_ckoff = R3; // super_check_offset const Register R4_ckval = R4; // super_klass - const int callee_saved_regs = AARCH64_ONLY(5) NOT_AARCH64(4); // LR saved differently + const int callee_saved_regs = 4; // LR saved differently Label load_element, store_element, do_epilogue, fail; @@ -3208,52 +2436,34 @@ __ zap_high_non_significant_bits(R2); -#ifdef AARCH64 - __ raw_push(LR, ZR); - __ raw_push(R19, R20); -#else int pushed = 0; __ push(LR); pushed+=1; -#endif // AARCH64 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST; BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->arraycopy_prologue(_masm, decorators, true, to, count, callee_saved_regs); -#ifndef AARCH64 const RegisterSet caller_saved_regs = RegisterSet(R4,R6) | RegisterSet(R8,R9) | altFP_7_11; __ push(caller_saved_regs); assert(caller_saved_regs.size() == 6, "check the count"); pushed+=6; __ ldr(R4_ckval,Address(SP, wordSize*pushed)); // read the argument that was on the stack -#endif // !AARCH64 // Save arguments for barrier generation (after the pre barrier): // - must be a caller saved register and not LR // - ARM32: avoid R10 in case RThread is needed - const Register saved_count = AARCH64_ONLY(R19) NOT_AARCH64(altFP_7_11); -#ifdef AARCH64 - __ mov_w(saved_count, count); - __ cbnz_w(count, load_element); // and test count -#else + const Register saved_count = altFP_7_11; __ movs(saved_count, count); // and test count __ b(load_element,ne); -#endif // AARCH64 // nothing to copy __ mov(R0, 0); -#ifdef AARCH64 - __ raw_pop(R19, R20); - __ raw_pop(LR, ZR); - __ ret(); -#else __ pop(caller_saved_regs); __ pop(PC); -#endif // AARCH64 // ======== begin loop ======== // (Loop is rotated; its entry is load_element.) @@ -3290,7 +2500,7 @@ __ BIND(do_epilogue); - Register copied = AARCH64_ONLY(R20) NOT_AARCH64(R4); // saved + Register copied = R4; // saved Label L_not_copied; __ subs_32(copied, saved_count, count); // copied count (in saved reg) @@ -3306,17 +2516,10 @@ __ BIND(L_not_copied); __ cmp_32(copied, saved_count); // values preserved in saved registers -#ifdef AARCH64 - __ csinv(R0, ZR, copied, eq); // 0 if all copied else NOT(copied) - __ raw_pop(R19, R20); - __ raw_pop(LR, ZR); - __ ret(); -#else __ mov(R0, 0, eq); // 0 if all copied __ mvn(R0, copied, ne); // else NOT(copied) __ pop(caller_saved_regs); __ pop(PC); -#endif // AARCH64 return start; } @@ -3360,7 +2563,7 @@ // R1 - src_pos (32-bit int) // R2 - dst oop // R3 - dst_pos (32-bit int) - // R4 (AArch64) / SP[0] (32-bit ARM) - element count (32-bit int) + // R4 - element count (32-bit int) // // Output: (32-bit int) // R0 == 0 - success @@ -3378,7 +2581,7 @@ // registers used as temp const Register R5_src_klass = R5; // source array klass const Register R6_dst_klass = R6; // destination array klass - const Register R_lh = AARCH64_ONLY(R7) NOT_AARCH64(altFP_7_11); // layout handler + const Register R_lh = altFP_7_11; // layout handler const Register R8_temp = R8; __ align(CodeEntryAlignment); @@ -3389,21 +2592,17 @@ __ zap_high_non_significant_bits(R3); __ zap_high_non_significant_bits(R4); -#ifndef AARCH64 int pushed = 0; const RegisterSet saved_regs = RegisterSet(R4,R6) | RegisterSet(R8,R9) | altFP_7_11; __ push(saved_regs); assert(saved_regs.size() == 6, "check the count"); pushed+=6; -#endif // !AARCH64 // bump this on entry, not on exit: inc_counter_np(SharedRuntime::_generic_array_copy_ctr, R5, R12); const Register length = R4; // elements count -#ifndef AARCH64 __ ldr(length, Address(SP,4*pushed)); -#endif // !AARCH64 //----------------------------------------------------------------------- @@ -3496,43 +2695,6 @@ // 'from', 'to', 'count' registers should be set in this order // since they are the same as 'src', 'src_pos', 'dst'. -#ifdef AARCH64 - - BLOCK_COMMENT("choose copy loop based on element size and scale indexes"); - Label Lbyte, Lshort, Lint, Llong; - - __ cbz(R12_elsize, Lbyte); - - assert (LogBytesPerShort < LogBytesPerInt && LogBytesPerInt < LogBytesPerLong, "must be"); - __ cmp(R12_elsize, LogBytesPerInt); - __ b(Lint, eq); - __ b(Llong, gt); - - __ BIND(Lshort); - __ add_ptr_scaled_int32(from, src, src_pos, LogBytesPerShort); - __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerShort); - __ mov(count, length); - __ b(StubRoutines::_jshort_arraycopy); - - __ BIND(Lint); - __ add_ptr_scaled_int32(from, src, src_pos, LogBytesPerInt); - __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerInt); - __ mov(count, length); - __ b(StubRoutines::_jint_arraycopy); - - __ BIND(Lbyte); - __ add_ptr_scaled_int32(from, src, src_pos, 0); - __ add_ptr_scaled_int32(to, dst, dst_pos, 0); - __ mov(count, length); - __ b(StubRoutines::_jbyte_arraycopy); - - __ BIND(Llong); - __ add_ptr_scaled_int32(from, src, src_pos, LogBytesPerLong); - __ add_ptr_scaled_int32(to, dst, dst_pos, LogBytesPerLong); - __ mov(count, length); - __ b(StubRoutines::_jlong_arraycopy); - -#else // AARCH64 BLOCK_COMMENT("scale indexes to element size"); __ add(from, src, AsmOperand(src_pos, lsl, R12_elsize)); // src_addr @@ -3556,7 +2718,6 @@ __ b(StubRoutines::_jlong_arraycopy); -#endif // AARCH64 } // ObjArrayKlass @@ -3586,9 +2747,7 @@ __ BIND(L_plain_copy); __ mov(count, length); -#ifndef AARCH64 __ pop(saved_regs); // XXX optim: avoid later push in oop_arraycopy ? -#endif // !AARCH64 __ b(StubRoutines::_oop_arraycopy); } @@ -3628,28 +2787,24 @@ __ ldr_u32(sco_temp, Address(R6_dst_klass, sco_offset)); generate_type_check(R5_src_klass, sco_temp, R6_dst_klass, R8_temp, R9, - AARCH64_ONLY(R10) NOT_AARCH64(R12), + R12, L_plain_copy); // Fetch destination element klass from the ObjArrayKlass header. int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); // the checkcast_copy loop needs two extra arguments: - const Register Rdst_elem_klass = AARCH64_ONLY(R4) NOT_AARCH64(R3); + const Register Rdst_elem_klass = R3; __ ldr(Rdst_elem_klass, Address(R6_dst_klass, ek_offset)); // dest elem klass -#ifndef AARCH64 __ pop(saved_regs); // XXX optim: avoid later push in oop_arraycopy ? __ str(Rdst_elem_klass, Address(SP,0)); // dest elem klass argument -#endif // !AARCH64 __ ldr_u32(R3, Address(Rdst_elem_klass, sco_offset)); // sco of elem klass __ b(StubRoutines::_checkcast_arraycopy); } __ BIND(L_failed); -#ifndef AARCH64 __ pop(saved_regs); -#endif // !AARCH64 __ mvn(R0, 0); // failure, with 0 copied __ ret(); @@ -3683,11 +2838,7 @@ break; case 8: // int64_t -#ifdef AARCH64 - __ ldr(R1, Address(R0)); -#else Unimplemented(); -#endif // AARCH64 break; default: @@ -3765,467 +2916,8 @@ } -#ifndef AARCH64 #define COMPILE_CRYPTO #include "stubRoutinesCrypto_arm.cpp" -#else - -#ifdef COMPILER2 - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // - address generate_aescrypt_encryptBlock() { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); - - Label L_doLast; - - const Register from = c_rarg0; // source array address - const Register to = c_rarg1; // destination array address - const Register key = c_rarg2; // key array address - const Register keylen = R8; - - address start = __ pc(); - __ stp(FP, LR, Address(SP, -2 * wordSize, pre_indexed)); - __ mov(FP, SP); - - __ ldr_w(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - - __ vld1(V0, Address(from), MacroAssembler::VELEM_SIZE_8, 128); // get 16 bytes of input - - __ vld1(V1, V2, V3, V4, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - - int quad = 1; - __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V3, V3, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V4, V4, MacroAssembler::VELEM_SIZE_8, quad); - __ aese(V0, V1); - __ aesmc(V0, V0); - __ aese(V0, V2); - __ aesmc(V0, V0); - __ aese(V0, V3); - __ aesmc(V0, V0); - __ aese(V0, V4); - __ aesmc(V0, V0); - - __ vld1(V1, V2, V3, V4, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V3, V3, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V4, V4, MacroAssembler::VELEM_SIZE_8, quad); - __ aese(V0, V1); - __ aesmc(V0, V0); - __ aese(V0, V2); - __ aesmc(V0, V0); - __ aese(V0, V3); - __ aesmc(V0, V0); - __ aese(V0, V4); - __ aesmc(V0, V0); - - __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad); - - __ cmp_w(keylen, 44); - __ b(L_doLast, eq); - - __ aese(V0, V1); - __ aesmc(V0, V0); - __ aese(V0, V2); - __ aesmc(V0, V0); - - __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad); - - __ cmp_w(keylen, 52); - __ b(L_doLast, eq); - - __ aese(V0, V1); - __ aesmc(V0, V0); - __ aese(V0, V2); - __ aesmc(V0, V0); - - __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad); - - __ BIND(L_doLast); - - __ aese(V0, V1); - __ aesmc(V0, V0); - __ aese(V0, V2); - - __ vld1(V1, Address(key), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad); - __ eor(V0, V0, V1, MacroAssembler::VELEM_SIZE_8, quad); - - __ vst1(V0, Address(to), MacroAssembler::VELEM_SIZE_8, 128); - - __ mov(R0, 0); - - __ mov(SP, FP); - __ ldp(FP, LR, Address(SP, 2 * wordSize, post_indexed)); - __ ret(LR); - - return start; - } - - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // - address generate_aescrypt_decryptBlock() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); - Label L_doLast; - - const Register from = c_rarg0; // source array address - const Register to = c_rarg1; // destination array address - const Register key = c_rarg2; // key array address - const Register keylen = R8; - - address start = __ pc(); - __ stp(FP, LR, Address(SP, -2 * wordSize, pre_indexed)); - __ mov(FP, SP); - - __ ldr_w(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - - __ vld1(V0, Address(from), MacroAssembler::VELEM_SIZE_8, 128); // get 16 bytes of input - - __ vld1(V5, Address(key, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - - int quad = 1; - __ rev32(V5, V5, MacroAssembler::VELEM_SIZE_8, quad); - - __ vld1(V1, V2, V3, V4, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V3, V3, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V4, V4, MacroAssembler::VELEM_SIZE_8, quad); - __ aesd(V0, V1); - __ aesimc(V0, V0); - __ aesd(V0, V2); - __ aesimc(V0, V0); - __ aesd(V0, V3); - __ aesimc(V0, V0); - __ aesd(V0, V4); - __ aesimc(V0, V0); - - __ vld1(V1, V2, V3, V4, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V3, V3, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V4, V4, MacroAssembler::VELEM_SIZE_8, quad); - __ aesd(V0, V1); - __ aesimc(V0, V0); - __ aesd(V0, V2); - __ aesimc(V0, V0); - __ aesd(V0, V3); - __ aesimc(V0, V0); - __ aesd(V0, V4); - __ aesimc(V0, V0); - - __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad); - - __ cmp_w(keylen, 44); - __ b(L_doLast, eq); - - __ aesd(V0, V1); - __ aesimc(V0, V0); - __ aesd(V0, V2); - __ aesimc(V0, V0); - - __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad); - - __ cmp_w(keylen, 52); - __ b(L_doLast, eq); - - __ aesd(V0, V1); - __ aesimc(V0, V0); - __ aesd(V0, V2); - __ aesimc(V0, V0); - - __ vld1(V1, V2, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V1, V1, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V2, V2, MacroAssembler::VELEM_SIZE_8, quad); - - __ BIND(L_doLast); - - __ aesd(V0, V1); - __ aesimc(V0, V0); - __ aesd(V0, V2); - - __ eor(V0, V0, V5, MacroAssembler::VELEM_SIZE_8, quad); - - __ vst1(V0, Address(to), MacroAssembler::VELEM_SIZE_8, 128); - - __ mov(R0, 0); - - __ mov(SP, FP); - __ ldp(FP, LR, Address(SP, 2 * wordSize, post_indexed)); - __ ret(LR); - - - return start; - } - - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // c_rarg3 - r vector byte array address - // c_rarg4 - input length - // - // Output: - // x0 - input length - // - address generate_cipherBlockChaining_encryptAESCrypt() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); - - Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52; - - const Register from = c_rarg0; // source array address - const Register to = c_rarg1; // destination array address - const Register key = c_rarg2; // key array address - const Register rvec = c_rarg3; // r byte array initialized from initvector array address - // and left with the results of the last encryption block - const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) - const Register keylen = R8; - - address start = __ pc(); - __ stp(FP, LR, Address(SP, -2 * wordSize, pre_indexed)); - __ mov(FP, SP); - - __ mov(R9, len_reg); - __ ldr_w(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - - __ vld1(V0, Address(rvec), MacroAssembler::VELEM_SIZE_8, 128); - - __ cmp_w(keylen, 52); - __ b(L_loadkeys_44, cc); - __ b(L_loadkeys_52, eq); - - __ vld1(V17, V18, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - - int quad = 1; - __ rev32(V17, V17, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V18, V18, MacroAssembler::VELEM_SIZE_8, quad); - __ BIND(L_loadkeys_52); - __ vld1(V19, V20, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V19, V19, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V20, V20, MacroAssembler::VELEM_SIZE_8, quad); - __ BIND(L_loadkeys_44); - __ vld1(V21, V22, V23, V24, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V21, V21, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V22, V22, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V23, V23, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V24, V24, MacroAssembler::VELEM_SIZE_8, quad); - __ vld1(V25, V26, V27, V28, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V25, V25, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V26, V26, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V27, V27, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V28, V28, MacroAssembler::VELEM_SIZE_8, quad); - __ vld1(V29, V30, V31, Address(key), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V29, V29, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V30, V30, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V31, V31, MacroAssembler::VELEM_SIZE_8, quad); - - __ BIND(L_aes_loop); - __ vld1(V1, Address(from, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ eor(V0, V0, V1, MacroAssembler::VELEM_SIZE_8, quad); - - __ b(L_rounds_44, cc); - __ b(L_rounds_52, eq); - - __ aese(V0, V17); - __ aesmc(V0, V0); - __ aese(V0, V18); - __ aesmc(V0, V0); - __ BIND(L_rounds_52); - __ aese(V0, V19); - __ aesmc(V0, V0); - __ aese(V0, V20); - __ aesmc(V0, V0); - __ BIND(L_rounds_44); - __ aese(V0, V21); - __ aesmc(V0, V0); - __ aese(V0, V22); - __ aesmc(V0, V0); - __ aese(V0, V23); - __ aesmc(V0, V0); - __ aese(V0, V24); - __ aesmc(V0, V0); - __ aese(V0, V25); - __ aesmc(V0, V0); - __ aese(V0, V26); - __ aesmc(V0, V0); - __ aese(V0, V27); - __ aesmc(V0, V0); - __ aese(V0, V28); - __ aesmc(V0, V0); - __ aese(V0, V29); - __ aesmc(V0, V0); - __ aese(V0, V30); - __ eor(V0, V0, V31, MacroAssembler::VELEM_SIZE_8, quad); - - __ vst1(V0, Address(to, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ sub(len_reg, len_reg, 16); - __ cbnz(len_reg, L_aes_loop); - - __ vst1(V0, Address(rvec), MacroAssembler::VELEM_SIZE_8, 128); - - __ mov(R0, R9); - - __ mov(SP, FP); - __ ldp(FP, LR, Address(SP, 2 * wordSize, post_indexed)); - __ ret(LR); - - return start; - } - - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // c_rarg3 - r vector byte array address - // c_rarg4 - input length - // - // Output: - // rax - input length - // - address generate_cipherBlockChaining_decryptAESCrypt() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); - - Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52; - - const Register from = c_rarg0; // source array address - const Register to = c_rarg1; // destination array address - const Register key = c_rarg2; // key array address - const Register rvec = c_rarg3; // r byte array initialized from initvector array address - // and left with the results of the last encryption block - const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) - const Register keylen = R8; - - address start = __ pc(); - __ stp(FP, LR, Address(SP, -2 * wordSize, pre_indexed)); - __ mov(FP, SP); - - __ mov(R9, len_reg); - __ ldr_w(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - - __ vld1(V2, Address(rvec), MacroAssembler::VELEM_SIZE_8, 128); - - __ vld1(V31, Address(key, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - - int quad = 1; - __ rev32(V31, V31, MacroAssembler::VELEM_SIZE_8, quad); - - __ cmp_w(keylen, 52); - __ b(L_loadkeys_44, cc); - __ b(L_loadkeys_52, eq); - - __ vld1(V17, V18, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V17, V17, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V18, V18, MacroAssembler::VELEM_SIZE_8, quad); - __ BIND(L_loadkeys_52); - __ vld1(V19, V20, Address(key, 32, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V19, V19, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V20, V20, MacroAssembler::VELEM_SIZE_8, quad); - __ BIND(L_loadkeys_44); - __ vld1(V21, V22, V23, V24, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V21, V21, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V22, V22, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V23, V23, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V24, V24, MacroAssembler::VELEM_SIZE_8, quad); - __ vld1(V25, V26, V27, V28, Address(key, 64, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V25, V25, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V26, V26, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V27, V27, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V28, V28, MacroAssembler::VELEM_SIZE_8, quad); - __ vld1(V29, V30, Address(key), MacroAssembler::VELEM_SIZE_8, 128); - __ rev32(V29, V29, MacroAssembler::VELEM_SIZE_8, quad); - __ rev32(V30, V30, MacroAssembler::VELEM_SIZE_8, quad); - - __ BIND(L_aes_loop); - __ vld1(V0, Address(from, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ orr(V1, V0, V0, MacroAssembler::VELEM_SIZE_8, quad); - - __ b(L_rounds_44, cc); - __ b(L_rounds_52, eq); - - __ aesd(V0, V17); - __ aesimc(V0, V0); - __ aesd(V0, V17); - __ aesimc(V0, V0); - __ BIND(L_rounds_52); - __ aesd(V0, V19); - __ aesimc(V0, V0); - __ aesd(V0, V20); - __ aesimc(V0, V0); - __ BIND(L_rounds_44); - __ aesd(V0, V21); - __ aesimc(V0, V0); - __ aesd(V0, V22); - __ aesimc(V0, V0); - __ aesd(V0, V23); - __ aesimc(V0, V0); - __ aesd(V0, V24); - __ aesimc(V0, V0); - __ aesd(V0, V25); - __ aesimc(V0, V0); - __ aesd(V0, V26); - __ aesimc(V0, V0); - __ aesd(V0, V27); - __ aesimc(V0, V0); - __ aesd(V0, V28); - __ aesimc(V0, V0); - __ aesd(V0, V29); - __ aesimc(V0, V0); - __ aesd(V0, V30); - __ eor(V0, V0, V31, MacroAssembler::VELEM_SIZE_8, quad); - __ eor(V0, V0, V2, MacroAssembler::VELEM_SIZE_8, quad); - - __ vst1(V0, Address(to, 16, post_indexed), MacroAssembler::VELEM_SIZE_8, 128); - __ orr(V2, V1, V1, MacroAssembler::VELEM_SIZE_8, quad); - - __ sub(len_reg, len_reg, 16); - __ cbnz(len_reg, L_aes_loop); - - __ vst1(V2, Address(rvec), MacroAssembler::VELEM_SIZE_8, 128); - - __ mov(R0, R9); - - __ mov(SP, FP); - __ ldp(FP, LR, Address(SP, 2 * wordSize, post_indexed)); - __ ret(LR); - - return start; - } - -#endif // COMPILER2 -#endif // AARCH64 private: @@ -4298,7 +2990,6 @@ // stub for throwing stack overflow error used both by interpreter and compiler StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); -#ifndef AARCH64 // integer division used both by interpreter and compiler StubRoutines::Arm::_idiv_irem_entry = generate_idiv_irem(); @@ -4308,7 +2999,6 @@ StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); StubRoutines::_atomic_load_long_entry = generate_atomic_load_long(); StubRoutines::_atomic_store_long_entry = generate_atomic_store_long(); -#endif // !AARCH64 } void generate_all() { @@ -4338,24 +3028,10 @@ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, &StubRoutines::_safefetch32_continuation_pc); -#ifdef AARCH64 - generate_safefetch("SafeFetchN", wordSize, &StubRoutines::_safefetchN_entry, - &StubRoutines::_safefetchN_fault_pc, - &StubRoutines::_safefetchN_continuation_pc); -#ifdef COMPILER2 - if (UseAESIntrinsics) { - StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); - StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); - StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); - StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); - } -#endif -#else assert (sizeof(int) == wordSize, "32-bit architecture"); StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry; StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc; StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc; -#endif // AARCH64 #ifdef COMPILE_CRYPTO // generate AES intrinsics code --- old/src/hotspot/cpu/arm/stubRoutines_arm.cpp 2018-09-17 10:30:39.424914358 -0400 +++ new/src/hotspot/cpu/arm/stubRoutines_arm.cpp 2018-09-17 10:30:38.791877384 -0400 @@ -27,13 +27,9 @@ #include "runtime/frame.inline.hpp" #include "runtime/stubRoutines.hpp" -#ifndef AARCH64 address StubRoutines::Arm::_idiv_irem_entry = NULL; -#endif address StubRoutines::Arm::_partial_subtype_check = NULL; -#ifndef AARCH64 address StubRoutines::_atomic_load_long_entry = NULL; address StubRoutines::_atomic_store_long_entry = NULL; -#endif --- old/src/hotspot/cpu/arm/stubRoutines_arm.hpp 2018-09-17 10:30:41.059009802 -0400 +++ new/src/hotspot/cpu/arm/stubRoutines_arm.hpp 2018-09-17 10:30:40.429973061 -0400 @@ -40,16 +40,12 @@ private: -#ifndef AARCH64 static address _idiv_irem_entry; -#endif static address _partial_subtype_check; public: -#ifndef AARCH64 static address idiv_irem_entry() { return _idiv_irem_entry; } -#endif static address partial_subtype_check() { return _partial_subtype_check; } }; @@ -57,13 +53,11 @@ return return_pc == _call_stub_return_address; } -#ifndef AARCH64 static address _atomic_load_long_entry; static address _atomic_store_long_entry; static address atomic_load_long_entry() { return _atomic_load_long_entry; } static address atomic_store_long_entry() { return _atomic_store_long_entry; } -#endif #endif // CPU_ARM_VM_STUBROUTINES_ARM_HPP --- old/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp 2018-09-17 10:30:42.630101565 -0400 +++ new/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp 2018-09-17 10:30:41.993064358 -0400 @@ -65,7 +65,7 @@ address entry = __ pc(); // callee-save register for saving LR, shared with generate_native_entry - const Register Rsaved_ret_addr = AARCH64_ONLY(R21) NOT_AARCH64(Rtmp_save0); + const Register Rsaved_ret_addr = Rtmp_save0; __ mov(Rsaved_ret_addr, LR); @@ -73,24 +73,6 @@ __ mov(R2, Rlocals); __ mov(R3, SP); -#ifdef AARCH64 - // expand expr. stack and extended SP to avoid cutting SP in call_VM - __ mov(Rstack_top, SP); - __ str(Rstack_top, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); - __ check_stack_top(); - - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), R1, R2, R3, false); - - __ ldp(ZR, c_rarg1, Address(SP, 2*wordSize, post_indexed)); - __ ldp(c_rarg2, c_rarg3, Address(SP, 2*wordSize, post_indexed)); - __ ldp(c_rarg4, c_rarg5, Address(SP, 2*wordSize, post_indexed)); - __ ldp(c_rarg6, c_rarg7, Address(SP, 2*wordSize, post_indexed)); - - __ ldp_d(V0, V1, Address(SP, 2*wordSize, post_indexed)); - __ ldp_d(V2, V3, Address(SP, 2*wordSize, post_indexed)); - __ ldp_d(V4, V5, Address(SP, 2*wordSize, post_indexed)); - __ ldp_d(V6, V7, Address(SP, 2*wordSize, post_indexed)); -#else // Safer to save R9 (when scratched) since callers may have been // written assuming R9 survives. This is suboptimal but @@ -110,7 +92,6 @@ // eliminate any gain imposed by avoiding 8 double word loads. __ fldmiad(SP, FloatRegisterSet(D0, 8), writeback); #endif // __ABI_HARD__ -#endif // AARCH64 __ ret(Rsaved_ret_addr); @@ -129,10 +110,6 @@ address TemplateInterpreterGenerator::generate_abstract_entry(void) { address entry_point = __ pc(); -#ifdef AARCH64 - __ restore_sp_after_call(Rtemp); - __ restore_stack_top(); -#endif __ empty_expression_stack(); @@ -274,16 +251,11 @@ __ interp_verify_oop(R0_tos, state, __FILE__, __LINE__); -#ifdef AARCH64 - __ restore_sp_after_call(Rtemp); // Restore SP to extended SP - __ restore_stack_top(); -#else // Restore stack bottom in case i2c adjusted stack __ ldr(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); // and NULL it as marker that SP is now tos until next java call __ mov(Rtemp, (int)NULL_WORD); __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); -#endif // AARCH64 __ restore_method(); __ restore_bcp(); @@ -299,9 +271,7 @@ __ check_stack_top(); __ add(Rstack_top, Rstack_top, AsmOperand(Rtemp, lsl, Interpreter::logStackElementSize)); -#ifndef AARCH64 __ convert_retval_to_tos(state); -#endif // !AARCH64 __ check_and_handle_popframe(); __ check_and_handle_earlyret(); @@ -317,15 +287,10 @@ __ interp_verify_oop(R0_tos, state, __FILE__, __LINE__); -#ifdef AARCH64 - __ restore_sp_after_call(Rtemp); // Restore SP to extended SP - __ restore_stack_top(); -#else // The stack is not extended by deopt but we must NULL last_sp as this // entry is like a "return". __ mov(Rtemp, 0); __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); -#endif // AARCH64 __ restore_method(); __ restore_bcp(); @@ -351,32 +316,6 @@ } address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) { -#ifdef AARCH64 - address entry = __ pc(); - switch (type) { - case T_BOOLEAN: - __ tst(R0, 0xff); - __ cset(R0, ne); - break; - case T_CHAR : __ zero_extend(R0, R0, 16); break; - case T_BYTE : __ sign_extend(R0, R0, 8); break; - case T_SHORT : __ sign_extend(R0, R0, 16); break; - case T_INT : // fall through - case T_LONG : // fall through - case T_VOID : // fall through - case T_FLOAT : // fall through - case T_DOUBLE : /* nothing to do */ break; - case T_OBJECT : - // retrieve result from frame - __ ldr(R0, Address(FP, frame::interpreter_frame_oop_temp_offset * wordSize)); - // and verify it - __ verify_oop(R0); - break; - default : ShouldNotReachHere(); - } - __ ret(); - return entry; -#else // Result handlers are not used on 32-bit ARM // since the returned value is already in appropriate format. __ should_not_reach_here(); // to avoid empty code block @@ -384,7 +323,6 @@ // The result handler non-zero indicates an object is returned and this is // used in the native entry code. return type == T_OBJECT ? (address)(-1) : NULL; -#endif // AARCH64 } address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, address runtime_entry) { @@ -466,11 +404,7 @@ __ add(Ricnt, Ricnt, InvocationCounter::count_increment); -#ifdef AARCH64 - __ andr(Rbcnt, Rbcnt, (unsigned int)InvocationCounter::count_mask_value); // mask out the status bits -#else __ bic(Rbcnt, Rbcnt, ~InvocationCounter::count_mask_value); // mask out the status bits -#endif // AARCH64 __ str_32(Ricnt, invocation_counter); // save invocation count __ add(Ricnt, Ricnt, Rbcnt); // add both counters @@ -522,13 +456,12 @@ // Registers on entry: // // R3 = number of additional locals - // R11 = max expression stack slots (AArch64 only) // Rthread // Rmethod // Registers used: R0, R1, R2, Rtemp. const Register Radditional_locals = R3; - const Register RmaxStack = AARCH64_ONLY(R11) NOT_AARCH64(R2); + const Register RmaxStack = R2; // monitor entry size const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; @@ -545,10 +478,8 @@ __ ldr(R0, Address(Rthread, Thread::stack_base_offset())); __ ldr(R1, Address(Rthread, Thread::stack_size_offset())); -#ifndef AARCH64 __ ldr(Rtemp, Address(Rmethod, Method::const_offset())); __ ldrh(RmaxStack, Address(Rtemp, ConstMethod::max_stack_offset())); -#endif // !AARCH64 __ sub_slow(Rtemp, SP, overhead_size + reserved_pages + guard_pages + Method::extra_stack_words()); // reserve space for additional locals @@ -562,16 +493,8 @@ __ cmp(Rtemp, R0); -#ifdef AARCH64 - Label L; - __ b(L, hi); - __ mov(SP, Rsender_sp); // restore SP - __ b(StubRoutines::throw_StackOverflowError_entry()); - __ bind(L); -#else __ mov(SP, Rsender_sp, ls); // restore SP __ b(StubRoutines::throw_StackOverflowError_entry(), ls); -#endif // AARCH64 } @@ -595,26 +518,15 @@ // get synchronization object { Label done; __ ldr_u32(Rtemp, Address(Rmethod, Method::access_flags_offset())); -#ifdef AARCH64 - __ ldr(R0, Address(Rlocals, Interpreter::local_offset_in_bytes(0))); // get receiver (assume this is frequent case) - __ tbz(Rtemp, JVM_ACC_STATIC_BIT, done); -#else __ tst(Rtemp, JVM_ACC_STATIC); __ ldr(R0, Address(Rlocals, Interpreter::local_offset_in_bytes(0)), eq); // get receiver (assume this is frequent case) __ b(done, eq); -#endif // AARCH64 __ load_mirror(R0, Rmethod, Rtemp); __ bind(done); } // add space for monitor & lock -#ifdef AARCH64 - __ check_extended_sp(Rtemp); - __ sub(SP, SP, entry_size); // adjust extended SP - __ mov(Rtemp, SP); - __ str(Rtemp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); -#endif // AARCH64 __ sub(Rstack_top, Rstack_top, entry_size); __ check_stack_top_on_expansion(); @@ -627,90 +539,6 @@ __ lock_object(R1); } -#ifdef AARCH64 - -// -// Generate a fixed interpreter frame. This is identical setup for interpreted methods -// and for native methods hence the shared code. -// -// On entry: -// R10 = ConstMethod -// R11 = max expr. stack (in slots), if !native_call -// -// On exit: -// Rbcp, Rstack_top are initialized, SP is extended -// -void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { - // Incoming registers - const Register RconstMethod = R10; - const Register RmaxStack = R11; - // Temporary registers - const Register RextendedSP = R0; - const Register Rcache = R1; - const Register Rmdp = ProfileInterpreter ? R2 : ZR; - - // Generates the following stack layout (stack grows up in this picture): - // - // [ expr. stack bottom ] - // [ saved Rbcp ] - // [ current Rlocals ] - // [ cache ] - // [ mdx ] - // [ mirror ] - // [ Method* ] - // [ extended SP ] - // [ expr. stack top ] - // [ sender_sp ] - // [ saved FP ] <--- FP - // [ saved LR ] - - // initialize fixed part of activation frame - __ stp(FP, LR, Address(SP, -2*wordSize, pre_indexed)); - __ mov(FP, SP); // establish new FP - - // setup Rbcp - if (native_call) { - __ mov(Rbcp, ZR); // bcp = 0 for native calls - } else { - __ add(Rbcp, RconstMethod, in_bytes(ConstMethod::codes_offset())); // get codebase - } - - // Rstack_top & RextendedSP - __ sub(Rstack_top, SP, 10*wordSize); - if (native_call) { - __ sub(RextendedSP, Rstack_top, align_up(wordSize, StackAlignmentInBytes)); // reserve 1 slot for exception handling - } else { - __ sub(RextendedSP, Rstack_top, AsmOperand(RmaxStack, lsl, Interpreter::logStackElementSize)); - __ align_reg(RextendedSP, RextendedSP, StackAlignmentInBytes); - } - __ mov(SP, RextendedSP); - __ check_stack_top(); - - // Load Rmdp - if (ProfileInterpreter) { - __ ldr(Rtemp, Address(Rmethod, Method::method_data_offset())); - __ tst(Rtemp, Rtemp); - __ add(Rtemp, Rtemp, in_bytes(MethodData::data_offset())); - __ csel(Rmdp, ZR, Rtemp, eq); - } - - // Load Rcache - __ ldr(Rtemp, Address(RconstMethod, ConstMethod::constants_offset())); - __ ldr(Rcache, Address(Rtemp, ConstantPool::cache_offset_in_bytes())); - // Get mirror and store it in the frame as GC root for this Method* - __ load_mirror(Rtemp, Rmethod, Rtemp); - - // Build fixed frame - __ stp(Rstack_top, Rbcp, Address(FP, -10*wordSize)); - __ stp(Rlocals, Rcache, Address(FP, -8*wordSize)); - __ stp(Rmdp, Rtemp, Address(FP, -6*wordSize)); - __ stp(Rmethod, RextendedSP, Address(FP, -4*wordSize)); - __ stp(ZR, Rsender_sp, Address(FP, -2*wordSize)); - assert(frame::interpreter_frame_initial_sp_offset == -10, "interpreter frame broken"); - assert(frame::interpreter_frame_stack_top_offset == -2, "stack top broken"); -} - -#else // AARCH64 // // Generate a fixed interpreter frame. This is identical setup for interpreted methods @@ -772,7 +600,6 @@ __ str(SP, Address(SP, 0)); // set expression stack bottom } -#endif // AARCH64 // End of helpers @@ -801,7 +628,6 @@ // // Stack layout immediately at entry // -// [ optional padding(*)] <--- SP (AArch64) // [ parameter n ] <--- Rparams (SP on 32-bit ARM) // ... // [ parameter 1 ] @@ -815,7 +641,6 @@ // local variables follow incoming parameters immediately; i.e. // the return address is saved at the end of the locals. // -// [ reserved stack (*) ] <--- SP (AArch64) // [ expr. stack ] <--- Rstack_top (SP on 32-bit ARM) // [ monitor entry ] // ... @@ -831,10 +656,6 @@ // 32-bit ARM: // [ last_sp ] // -// AArch64: -// [ extended SP (*) ] -// [ stack top (*) ] -// // [ sender_sp ] // [ saved FP ] <--- FP // [ saved LR ] @@ -846,8 +667,6 @@ // ... // [ parameter 1 ] <--- Rlocals // -// (*) - AArch64 only -// address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { // Code: _aload_0, _getfield, _areturn @@ -924,29 +743,18 @@ address entry_point = __ pc(); // Register allocation - const Register Rsize_of_params = AARCH64_ONLY(R20) NOT_AARCH64(R6); - const Register Rsig_handler = AARCH64_ONLY(R21) NOT_AARCH64(Rtmp_save0 /* R4 */); - const Register Rnative_code = AARCH64_ONLY(R22) NOT_AARCH64(Rtmp_save1 /* R5 */); - const Register Rresult_handler = AARCH64_ONLY(Rsig_handler) NOT_AARCH64(R6); - -#ifdef AARCH64 - const Register RconstMethod = R10; // also used in generate_fixed_frame (should match) - const Register Rsaved_result = Rnative_code; - const FloatRegister Dsaved_result = V8; -#else + const Register Rsize_of_params = R6; + const Register Rsig_handler = Rtmp_save0; + const Register Rnative_code = Rtmp_save1; + const Register Rresult_handler = R6; + const Register Rsaved_result_lo = Rtmp_save0; // R4 const Register Rsaved_result_hi = Rtmp_save1; // R5 FloatRegister saved_result_fp; -#endif // AARCH64 -#ifdef AARCH64 - __ ldr(RconstMethod, Address(Rmethod, Method::const_offset())); - __ ldrh(Rsize_of_params, Address(RconstMethod, ConstMethod::size_of_parameters_offset())); -#else __ ldr(Rsize_of_params, Address(Rmethod, Method::const_offset())); __ ldrh(Rsize_of_params, Address(Rsize_of_params, ConstMethod::size_of_parameters_offset())); -#endif // AARCH64 // native calls don't need the stack size check since they have no expression stack // and the arguments are already on the stack and we only add a handful of words @@ -956,19 +764,9 @@ __ sub(Rlocals, Rparams, wordSize); __ add(Rlocals, Rlocals, AsmOperand(Rsize_of_params, lsl, Interpreter::logStackElementSize)); -#ifdef AARCH64 - int extra_stack_reserve = 2*wordSize; // extra space for oop_temp - if(__ can_post_interpreter_events()) { - // extra space for saved results - extra_stack_reserve += 2*wordSize; - } - // reserve extra stack space and nullify oop_temp slot - __ stp(ZR, ZR, Address(SP, -extra_stack_reserve, pre_indexed)); -#else // reserve stack space for oop_temp __ mov(R0, 0); __ push(R0); -#endif // AARCH64 generate_fixed_frame(true); // Note: R9 is now saved in the frame @@ -1064,15 +862,6 @@ // Allocate stack space for arguments -#ifdef AARCH64 - __ sub(Rtemp, SP, Rsize_of_params, ex_uxtw, LogBytesPerWord); - __ align_reg(SP, Rtemp, StackAlignmentInBytes); - - // Allocate more stack space to accomodate all arguments passed on GP and FP registers: - // 8 * wordSize for GPRs - // 8 * wordSize for FPRs - int reg_arguments = align_up(8*wordSize + 8*wordSize, StackAlignmentInBytes); -#else // C functions need aligned stack __ bic(SP, SP, StackAlignmentInBytes - 1); @@ -1092,12 +881,11 @@ // It is also used for JNIEnv & class additional parameters. int reg_arguments = 4 * wordSize; #endif // __ABI_HARD__ -#endif // AARCH64 __ sub(SP, SP, reg_arguments); - // Note: signature handler blows R4 (32-bit ARM) or R21 (AArch64) besides all scratch registers. + // Note: signature handler blows R4 besides all scratch registers. // See AbstractInterpreterGenerator::generate_slow_signature_handler(). __ call(Rsig_handler); #if R9_IS_SCRATCHED @@ -1133,18 +921,11 @@ } #endif -#ifdef AARCH64 - __ mov(Rtemp, _thread_in_native); - __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset())); - // STLR is used to force all preceding writes to be observed prior to thread state change - __ stlr_w(Rtemp, Rtemp2); -#else // Force all preceding writes to be observed prior to thread state change __ membar(MacroAssembler::StoreStore, Rtemp); __ mov(Rtemp, _thread_in_native); __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset())); -#endif // AARCH64 __ call(Rnative_code); #if R9_IS_SCRATCHED @@ -1166,10 +947,6 @@ __ ldr_global_s32(Rtemp, SafepointSynchronize::address_of_state()); // Protect the return value in the interleaved code: save it to callee-save registers. -#ifdef AARCH64 - __ mov(Rsaved_result, R0); - __ fmov_d(Dsaved_result, D0); -#else __ mov(Rsaved_result_lo, R0); __ mov(Rsaved_result_hi, R1); #ifdef __ABI_HARD__ @@ -1179,26 +956,17 @@ #else saved_result_fp = fnoreg; #endif // __ABI_HARD__ -#endif // AARCH64 { __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset())); __ cmp(Rtemp, SafepointSynchronize::_not_synchronized); __ cond_cmp(R3, 0, eq); -#ifdef AARCH64 - Label L; - __ b(L, eq); - __ mov(R0, Rthread); - __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::none); - __ bind(L); -#else __ mov(R0, Rthread, ne); __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::none, ne); #if R9_IS_SCRATCHED __ restore_method(); #endif -#endif // AARCH64 } // Perform Native->Java thread transition @@ -1216,15 +984,9 @@ // Unbox oop result, e.g. JNIHandles::resolve result if it's an oop. { Label Lnot_oop; -#ifdef AARCH64 - __ mov_slow(Rtemp, AbstractInterpreter::result_handler(T_OBJECT)); - __ cmp(Rresult_handler, Rtemp); - __ b(Lnot_oop, ne); -#else // !AARCH64 // For ARM32, Rresult_handler is -1 for oop result, 0 otherwise. __ cbz(Rresult_handler, Lnot_oop); -#endif // !AARCH64 - Register value = AARCH64_ONLY(Rsaved_result) NOT_AARCH64(Rsaved_result_lo); + Register value = Rsaved_result_lo; __ resolve_jobject(value, // value Rtemp, // tmp1 R1_tmp); // tmp2 @@ -1233,43 +995,23 @@ __ bind(Lnot_oop); } -#ifdef AARCH64 - // Restore SP (drop native parameters area), to keep SP in sync with extended_sp in frame - __ restore_sp_after_call(Rtemp); - __ check_stack_top(); -#endif // AARCH64 // reguard stack if StackOverflow exception happened while in native. { __ ldr_u32(Rtemp, Address(Rthread, JavaThread::stack_guard_state_offset())); __ cmp_32(Rtemp, JavaThread::stack_guard_yellow_reserved_disabled); -#ifdef AARCH64 - Label L; - __ b(L, ne); - __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::none); - __ bind(L); -#else __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::none, eq); #if R9_IS_SCRATCHED __ restore_method(); #endif -#endif // AARCH64 } // check pending exceptions { __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset())); -#ifdef AARCH64 - Label L; - __ cbz(Rtemp, L); - __ mov_pc_to(Rexception_pc); - __ b(StubRoutines::forward_exception_entry()); - __ bind(L); -#else __ cmp(Rtemp, 0); __ mov(Rexception_pc, PC, ne); __ b(StubRoutines::forward_exception_entry(), ne); -#endif // AARCH64 } if (synchronized) { @@ -1283,19 +1025,9 @@ // the exception handler code notifies the runtime of method exits // too. If this happens before, method entry/exit notifications are // not properly paired (was bug - gri 11/22/99). -#ifdef AARCH64 - __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI, true, Rsaved_result, noreg, Dsaved_result); -#else __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI, true, Rsaved_result_lo, Rsaved_result_hi, saved_result_fp); -#endif // AARCH64 // Restore the result. Oop result is restored from the stack. -#ifdef AARCH64 - __ mov(R0, Rsaved_result); - __ fmov_d(D0, Dsaved_result); - - __ blr(Rresult_handler); -#else __ cmp(Rresult_handler, 0); __ ldr(R0, Address(FP, frame::interpreter_frame_oop_temp_offset * wordSize), ne); __ mov(R0, Rsaved_result_lo, eq); @@ -1315,18 +1047,11 @@ __ bind(L); } #endif // ASSERT -#endif // AARCH64 // Restore FP/LR, sender_sp and return -#ifdef AARCH64 - __ ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); - __ ldp(FP, LR, Address(FP)); - __ mov(SP, Rtemp); -#else __ mov(Rtemp, FP); __ ldmia(FP, RegisterSet(FP) | RegisterSet(LR)); __ ldr(SP, Address(Rtemp, frame::interpreter_frame_sender_sp_offset * wordSize)); -#endif // AARCH64 __ ret(); @@ -1353,12 +1078,8 @@ address entry_point = __ pc(); - const Register RconstMethod = AARCH64_ONLY(R10) NOT_AARCH64(R3); + const Register RconstMethod = R3; -#ifdef AARCH64 - const Register RmaxStack = R11; - const Register RlocalsBase = R12; -#endif // AARCH64 __ ldr(RconstMethod, Address(Rmethod, Method::const_offset())); @@ -1371,48 +1092,10 @@ __ sub(R3, R3, R2); // number of additional locals -#ifdef AARCH64 - // setup RmaxStack - __ ldrh(RmaxStack, Address(RconstMethod, ConstMethod::max_stack_offset())); - // We have to add extra reserved slots to max_stack. There are 3 users of the extra slots, - // none of which are at the same time, so we just need to make sure there is enough room - // for the biggest user: - // -reserved slot for exception handler - // -reserved slots for JSR292. Method::extra_stack_entries() is the size. - // -3 reserved slots so get_method_counters() can save some registers before call_VM(). - __ add(RmaxStack, RmaxStack, MAX2(3, Method::extra_stack_entries())); -#endif // AARCH64 // see if we've got enough room on the stack for locals plus overhead. generate_stack_overflow_check(); -#ifdef AARCH64 - - // allocate space for locals - { - __ sub(RlocalsBase, Rparams, AsmOperand(R3, lsl, Interpreter::logStackElementSize)); - __ align_reg(SP, RlocalsBase, StackAlignmentInBytes); - } - - // explicitly initialize locals - { - Label zero_loop, done; - __ cbz(R3, done); - - __ tbz(R3, 0, zero_loop); - __ subs(R3, R3, 1); - __ str(ZR, Address(RlocalsBase, wordSize, post_indexed)); - __ b(done, eq); - - __ bind(zero_loop); - __ subs(R3, R3, 2); - __ stp(ZR, ZR, Address(RlocalsBase, 2*wordSize, post_indexed)); - __ b(zero_loop, ne); - - __ bind(done); - } - -#else // allocate space for locals // explicitly initialize locals @@ -1438,7 +1121,6 @@ __ push(R0, ge); __ b(loop, gt); -#endif // AARCH64 // initialize fixed part of activation frame generate_fixed_frame(false); @@ -1553,11 +1235,9 @@ Interpreter::_rethrow_exception_entry = __ pc(); // Rexception_obj: exception -#ifndef AARCH64 // Clear interpreter_frame_last_sp. __ mov(Rtemp, 0); __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); -#endif // !AARCH64 #if R9_IS_SCRATCHED __ restore_method(); @@ -1566,9 +1246,6 @@ __ restore_dispatch(); __ restore_locals(); -#ifdef AARCH64 - __ restore_sp_after_call(Rtemp); -#endif // AARCH64 // Entry point for exceptions thrown within interpreter code Interpreter::_throw_exception_entry = __ pc(); @@ -1605,9 +1282,6 @@ // Interpreter::_remove_activation_preserving_args_entry = __ pc(); -#ifdef AARCH64 - __ restore_sp_after_call(Rtemp); // restore SP to extended SP -#endif // AARCH64 __ empty_expression_stack(); @@ -1634,9 +1308,6 @@ __ ldr(R0, Address(FP, frame::return_addr_offset * wordSize)); __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), R0); __ cbnz_32(R0, caller_not_deoptimized); -#ifdef AARCH64 - __ NOT_TESTED(); -#endif // Compute size of arguments for saving when returning to deoptimized caller __ restore_method(); @@ -1671,7 +1342,6 @@ /* install_monitor_exception */ false, /* notify_jvmdi */ false); -#ifndef AARCH64 // Finish with popframe handling // A previous I2C followed by a deoptimization might have moved the // outgoing arguments further up the stack. PopFrame expects the @@ -1690,17 +1360,11 @@ __ mov(R0, Rthread); __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), R0, R1, R2); __ reset_last_Java_frame(Rtemp); -#endif // !AARCH64 -#ifdef AARCH64 - __ restore_sp_after_call(Rtemp); - __ restore_stack_top(); -#else // Restore the last_sp and null it out __ ldr(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); __ mov(Rtemp, (int)NULL_WORD); __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); -#endif // AARCH64 __ restore_bcp(); __ restore_dispatch(); @@ -1777,9 +1441,6 @@ address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { address entry = __ pc(); -#ifdef AARCH64 - __ restore_sp_after_call(Rtemp); // restore SP to extended SP -#endif // AARCH64 __ restore_bcp(); __ restore_dispatch(); @@ -1800,13 +1461,11 @@ false, /* install_monitor_exception */ true); /* notify_jvmdi */ -#ifndef AARCH64 // According to interpreter calling conventions, result is returned in R0/R1, // so ftos (S0) and dtos (D0) are moved to R0/R1. // This conversion should be done after remove_activation, as it uses // push(state) & pop(state) to preserve return value. __ convert_tos_to_retval(state); -#endif // !AARCH64 __ ret(); return entry; @@ -1829,7 +1488,7 @@ lep = __ pc(); __ push(ltos); __ b(L); - if (AARCH64_ONLY(true) NOT_AARCH64(VerifyOops)) { // can't share atos entry with itos on AArch64 or if VerifyOops + if (VerifyOops) { // can't share atos entry if VerifyOops aep = __ pc(); __ push(atos); __ b(L); } else { aep = __ pc(); // fall through @@ -1857,11 +1516,7 @@ // pass tosca registers as arguments __ mov(R2, R0_tos); -#ifdef AARCH64 - __ mov(R3, ZR); -#else __ mov(R3, R1_tos_hi); -#endif // AARCH64 __ mov(R1, LR); // save return address // call tracer --- old/src/hotspot/cpu/arm/templateTable_arm.cpp 2018-09-17 10:30:44.228194906 -0400 +++ new/src/hotspot/cpu/arm/templateTable_arm.cpp 2018-09-17 10:30:43.594157874 -0400 @@ -58,9 +58,7 @@ } static inline Address laddress(int n) { return iaddress(n + 1); } -#ifndef AARCH64 static inline Address haddress(int n) { return iaddress(n + 0); } -#endif // !AARCH64 static inline Address faddress(int n) { return iaddress(n); } static inline Address daddress(int n) { return laddress(n); } @@ -72,12 +70,7 @@ } Address TemplateTable::load_iaddress(Register index, Register scratch) { -#ifdef AARCH64 - get_local_base_addr(scratch, index); - return Address(scratch); -#else return Address(Rlocals, index, lsl, Interpreter::logStackElementSize, basic_offset, sub_offset); -#endif // AARCH64 } Address TemplateTable::load_aaddress(Register index, Register scratch) { @@ -113,45 +106,29 @@ } -// 32-bit ARM: // Loads double/long local into R0_tos_lo/R1_tos_hi with two // separate ldr instructions (supports nonadjacent values). // Used for longs in all modes, and for doubles in SOFTFP mode. -// -// AArch64: loads long local into R0_tos. -// void TemplateTable::load_category2_local(Register Rlocal_index, Register tmp) { const Register Rlocal_base = tmp; assert_different_registers(Rlocal_index, tmp); get_local_base_addr(Rlocal_base, Rlocal_index); -#ifdef AARCH64 - __ ldr(R0_tos, Address(Rlocal_base, Interpreter::local_offset_in_bytes(1))); -#else __ ldr(R0_tos_lo, Address(Rlocal_base, Interpreter::local_offset_in_bytes(1))); __ ldr(R1_tos_hi, Address(Rlocal_base, Interpreter::local_offset_in_bytes(0))); -#endif // AARCH64 } -// 32-bit ARM: // Stores R0_tos_lo/R1_tos_hi to double/long local with two // separate str instructions (supports nonadjacent values). // Used for longs in all modes, and for doubles in SOFTFP mode -// -// AArch64: stores R0_tos to long local. -// void TemplateTable::store_category2_local(Register Rlocal_index, Register tmp) { const Register Rlocal_base = tmp; assert_different_registers(Rlocal_index, tmp); get_local_base_addr(Rlocal_base, Rlocal_index); -#ifdef AARCH64 - __ str(R0_tos, Address(Rlocal_base, Interpreter::local_offset_in_bytes(1))); -#else __ str(R0_tos_lo, Address(Rlocal_base, Interpreter::local_offset_in_bytes(1))); __ str(R1_tos_hi, Address(Rlocal_base, Interpreter::local_offset_in_bytes(0))); -#endif // AARCH64 } // Returns address of Java array element using temp register as address base. @@ -192,7 +169,7 @@ // Miscelaneous helper routines // Store an oop (or NULL) at the address described by obj. -// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +// Blows all volatile registers R0-R3, Rtemp, LR). // Also destroys new_val and obj.base(). static void do_oop_store(InterpreterMacroAssembler* _masm, Address obj, @@ -224,7 +201,7 @@ } -// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. +// Blows volatile registers R0-R3, Rtemp, LR. void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, Register temp_reg, bool load_bc_into_bc_reg/*=true*/, int byte_no) { @@ -327,22 +304,12 @@ transition(vtos, ltos); assert((value == 0) || (value == 1), "unexpected long constant"); __ mov(R0_tos, value); -#ifndef AARCH64 __ mov(R1_tos_hi, 0); -#endif // !AARCH64 } void TemplateTable::fconst(int value) { transition(vtos, ftos); -#ifdef AARCH64 - switch(value) { - case 0: __ fmov_sw(S0_tos, ZR); break; - case 1: __ fmov_s (S0_tos, 0x70); break; - case 2: __ fmov_s (S0_tos, 0x00); break; - default: ShouldNotReachHere(); break; - } -#else const int zero = 0; // 0.0f const int one = 0x3f800000; // 1.0f const int two = 0x40000000; // 2.0f @@ -357,19 +324,11 @@ #ifndef __SOFTFP__ __ fmsr(S0_tos, R0_tos); #endif // !__SOFTFP__ -#endif // AARCH64 } void TemplateTable::dconst(int value) { transition(vtos, dtos); -#ifdef AARCH64 - switch(value) { - case 0: __ fmov_dx(D0_tos, ZR); break; - case 1: __ fmov_d (D0_tos, 0x70); break; - default: ShouldNotReachHere(); break; - } -#else const int one_lo = 0; // low part of 1.0 const int one_hi = 0x3ff00000; // high part of 1.0 @@ -390,7 +349,6 @@ } else { ShouldNotReachHere(); } -#endif // AARCH64 } @@ -429,25 +387,15 @@ // get const type __ add(Rtemp, Rtags, tags_offset); -#ifdef AARCH64 - __ add(Rtemp, Rtemp, Rindex); - __ ldarb(RtagType, Rtemp); // TODO-AARCH64 figure out if barrier is needed here, or control dependency is enough -#else __ ldrb(RtagType, Address(Rtemp, Rindex)); volatile_barrier(MacroAssembler::LoadLoad, Rtemp); -#endif // AARCH64 // unresolved class - get the resolved class __ cmp(RtagType, JVM_CONSTANT_UnresolvedClass); // unresolved class in error (resolution failed) - call into runtime // so that the same error from first resolution attempt is thrown. -#ifdef AARCH64 - __ mov(Rtemp, JVM_CONSTANT_UnresolvedClassInError); // this constant does not fit into 5-bit immediate constraint - __ cond_cmp(RtagType, Rtemp, ne); -#else __ cond_cmp(RtagType, JVM_CONSTANT_UnresolvedClassInError, ne); -#endif // AARCH64 // resolved class - need to call vm to get java mirror of the class __ cond_cmp(RtagType, JVM_CONSTANT_Class, ne); @@ -556,12 +504,8 @@ __ cmp(Rtemp, JVM_CONSTANT_Long); __ b(Condy, ne); -#ifdef AARCH64 - __ ldr(R0_tos, Address(Rbase, base_offset)); -#else __ ldr(R0_tos_lo, Address(Rbase, base_offset + 0 * wordSize)); __ ldr(R1_tos_hi, Address(Rbase, base_offset + 1 * wordSize)); -#endif // AARCH64 __ push(ltos); __ b(exit); @@ -587,12 +531,8 @@ // VMr2 = flags = (tos, off) using format of CPCE::_flags __ mov(off, flags); -#ifdef AARCH64 - __ andr(off, off, (unsigned)ConstantPoolCacheEntry::field_index_mask); -#else __ logical_shift_left( off, off, 32 - ConstantPoolCacheEntry::field_index_bits); __ logical_shift_right(off, off, 32 - ConstantPoolCacheEntry::field_index_bits); -#endif const Address field(obj, off); @@ -652,13 +592,9 @@ __ cond_cmp(flags, dtos, ne); __ b(notLongDouble, ne); -#ifdef AARCH64 - __ ldr(R0_tos, field); -#else __ add(rtmp, obj, wordSize); __ ldr(R0_tos_lo, Address(obj, off)); __ ldr(R1_tos_hi, Address(rtmp, off)); -#endif __ push(ltos); __ b(Done); @@ -907,12 +843,8 @@ index_check(Rarray, Rindex); -#ifdef AARCH64 - __ ldr(R0_tos, get_array_elem_addr(T_LONG, Rarray, Rindex, Rtemp)); -#else Address addr = get_array_elem_addr_same_base(T_LONG, Rarray, Rindex, Rtemp); __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, addr, noreg /* ltos */, noreg, noreg, noreg); -#endif // AARCH64 } @@ -1012,12 +944,8 @@ void TemplateTable::lload(int n) { transition(vtos, ltos); -#ifdef AARCH64 - __ ldr(R0_tos, laddress(n)); -#else __ ldr(R0_tos_lo, laddress(n)); __ ldr(R1_tos_hi, haddress(n)); -#endif // AARCH64 } @@ -1105,14 +1033,8 @@ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition"); __ cmp(next_bytecode, Bytecodes::_fast_fgetfield); -#ifdef AARCH64 - __ mov(Rtemp, Bytecodes::_fast_faccess_0); - __ mov(target_bytecode, Bytecodes::_fast_aload_0); - __ mov(target_bytecode, Rtemp, eq); -#else __ mov(target_bytecode, Bytecodes::_fast_faccess_0, eq); __ mov(target_bytecode, Bytecodes::_fast_aload_0, ne); -#endif // AARCH64 // rewrite __ bind(rewrite); @@ -1198,11 +1120,7 @@ const Register Rlocal_index = R2_tmp; const Register Rlocal_base = R3_tmp; -#ifdef AARCH64 - __ pop_l(R0_tos); -#else __ pop_l(R0_tos_lo, R1_tos_hi); -#endif // AARCH64 locals_index_wide(Rlocal_index); store_category2_local(Rlocal_index, R3_tmp); @@ -1252,12 +1170,8 @@ __ pop_i(Rindex); index_check(Rarray, Rindex); -#ifdef AARCH64 - __ str(R0_tos, get_array_elem_addr(T_LONG, Rarray, Rindex, Rtemp)); -#else Address addr = get_array_elem_addr_same_base(T_LONG, Rarray, Rindex, Rtemp); __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, addr, noreg /* ltos */, noreg, noreg, noreg, false); -#endif // AARCH64 } @@ -1401,12 +1315,8 @@ void TemplateTable::lstore(int n) { transition(ltos, vtos); -#ifdef AARCH64 - __ str(R0_tos, laddress(n)); -#else __ str(R0_tos_lo, laddress(n)); __ str(R1_tos_hi, haddress(n)); -#endif // AARCH64 } @@ -1563,15 +1473,9 @@ case _and : __ and_32 (R0_tos, arg1, arg2); break; case _or : __ orr_32 (R0_tos, arg1, arg2); break; case _xor : __ eor_32 (R0_tos, arg1, arg2); break; -#ifdef AARCH64 - case shl : __ lslv_w (R0_tos, arg1, arg2); break; - case shr : __ asrv_w (R0_tos, arg1, arg2); break; - case ushr : __ lsrv_w (R0_tos, arg1, arg2); break; -#else case shl : __ andr(arg2, arg2, 0x1f); __ mov (R0_tos, AsmOperand(arg1, lsl, arg2)); break; case shr : __ andr(arg2, arg2, 0x1f); __ mov (R0_tos, AsmOperand(arg1, asr, arg2)); break; case ushr : __ andr(arg2, arg2, 0x1f); __ mov (R0_tos, AsmOperand(arg1, lsr, arg2)); break; -#endif // AARCH64 default : ShouldNotReachHere(); } } @@ -1579,20 +1483,6 @@ void TemplateTable::lop2(Operation op) { transition(ltos, ltos); -#ifdef AARCH64 - const Register arg1 = R1_tmp; - const Register arg2 = R0_tos; - - __ pop_l(arg1); - switch (op) { - case add : __ add (R0_tos, arg1, arg2); break; - case sub : __ sub (R0_tos, arg1, arg2); break; - case _and : __ andr(R0_tos, arg1, arg2); break; - case _or : __ orr (R0_tos, arg1, arg2); break; - case _xor : __ eor (R0_tos, arg1, arg2); break; - default : ShouldNotReachHere(); - } -#else const Register arg1_lo = R2_tmp; const Register arg1_hi = R3_tmp; const Register arg2_lo = R0_tos_lo; @@ -1607,20 +1497,11 @@ case _xor: __ eor (R0_tos_lo, arg1_lo, arg2_lo); __ eor (R1_tos_hi, arg1_hi, arg2_hi); break; default : ShouldNotReachHere(); } -#endif // AARCH64 } void TemplateTable::idiv() { transition(itos, itos); -#ifdef AARCH64 - const Register divisor = R0_tos; - const Register dividend = R1_tmp; - - __ cbz_w(divisor, Interpreter::_throw_ArithmeticException_entry); - __ pop_i(dividend); - __ sdiv_w(R0_tos, dividend, divisor); -#else __ mov(R2, R0_tos); __ pop_i(R0); // R0 - dividend @@ -1628,41 +1509,22 @@ __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::none); // R1 - result __ mov(R0_tos, R1); -#endif // AARCH64 } void TemplateTable::irem() { transition(itos, itos); -#ifdef AARCH64 - const Register divisor = R0_tos; - const Register dividend = R1_tmp; - const Register quotient = R2_tmp; - - __ cbz_w(divisor, Interpreter::_throw_ArithmeticException_entry); - __ pop_i(dividend); - __ sdiv_w(quotient, dividend, divisor); - __ msub_w(R0_tos, divisor, quotient, dividend); -#else __ mov(R2, R0_tos); __ pop_i(R0); // R0 - dividend // R2 - divisor __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::none); // R0 - remainder -#endif // AARCH64 } void TemplateTable::lmul() { transition(ltos, ltos); -#ifdef AARCH64 - const Register arg1 = R0_tos; - const Register arg2 = R1_tmp; - - __ pop_l(arg2); - __ mul(R0_tos, arg1, arg2); -#else const Register arg1_lo = R0_tos_lo; const Register arg1_hi = R1_tos_hi; const Register arg2_lo = R2_tmp; @@ -1671,20 +1533,11 @@ __ pop_l(arg2_lo, arg2_hi); __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lmul), arg1_lo, arg1_hi, arg2_lo, arg2_hi); -#endif // AARCH64 } void TemplateTable::ldiv() { transition(ltos, ltos); -#ifdef AARCH64 - const Register divisor = R0_tos; - const Register dividend = R1_tmp; - - __ cbz(divisor, Interpreter::_throw_ArithmeticException_entry); - __ pop_l(dividend); - __ sdiv(R0_tos, dividend, divisor); -#else const Register x_lo = R2_tmp; const Register x_hi = R3_tmp; const Register y_lo = R0_tos_lo; @@ -1696,22 +1549,11 @@ __ orrs(Rtemp, y_lo, y_hi); __ call(Interpreter::_throw_ArithmeticException_entry, relocInfo::none, eq); __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), y_lo, y_hi, x_lo, x_hi); -#endif // AARCH64 } void TemplateTable::lrem() { transition(ltos, ltos); -#ifdef AARCH64 - const Register divisor = R0_tos; - const Register dividend = R1_tmp; - const Register quotient = R2_tmp; - - __ cbz(divisor, Interpreter::_throw_ArithmeticException_entry); - __ pop_l(dividend); - __ sdiv(quotient, dividend, divisor); - __ msub(R0_tos, divisor, quotient, dividend); -#else const Register x_lo = R2_tmp; const Register x_hi = R3_tmp; const Register y_lo = R0_tos_lo; @@ -1723,18 +1565,11 @@ __ orrs(Rtemp, y_lo, y_hi); __ call(Interpreter::_throw_ArithmeticException_entry, relocInfo::none, eq); __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), y_lo, y_hi, x_lo, x_hi); -#endif // AARCH64 } void TemplateTable::lshl() { transition(itos, ltos); -#ifdef AARCH64 - const Register val = R1_tmp; - const Register shift_cnt = R0_tos; - __ pop_l(val); - __ lslv(R0_tos, val, shift_cnt); -#else const Register shift_cnt = R4_tmp; const Register val_lo = R2_tmp; const Register val_hi = R3_tmp; @@ -1742,18 +1577,11 @@ __ pop_l(val_lo, val_hi); __ andr(shift_cnt, R0_tos, 63); __ long_shift(R0_tos_lo, R1_tos_hi, val_lo, val_hi, lsl, shift_cnt); -#endif // AARCH64 } void TemplateTable::lshr() { transition(itos, ltos); -#ifdef AARCH64 - const Register val = R1_tmp; - const Register shift_cnt = R0_tos; - __ pop_l(val); - __ asrv(R0_tos, val, shift_cnt); -#else const Register shift_cnt = R4_tmp; const Register val_lo = R2_tmp; const Register val_hi = R3_tmp; @@ -1761,18 +1589,11 @@ __ pop_l(val_lo, val_hi); __ andr(shift_cnt, R0_tos, 63); __ long_shift(R0_tos_lo, R1_tos_hi, val_lo, val_hi, asr, shift_cnt); -#endif // AARCH64 } void TemplateTable::lushr() { transition(itos, ltos); -#ifdef AARCH64 - const Register val = R1_tmp; - const Register shift_cnt = R0_tos; - __ pop_l(val); - __ lsrv(R0_tos, val, shift_cnt); -#else const Register shift_cnt = R4_tmp; const Register val_lo = R2_tmp; const Register val_hi = R3_tmp; @@ -1780,7 +1601,6 @@ __ pop_l(val_lo, val_hi); __ andr(shift_cnt, R0_tos, 63); __ long_shift(R0_tos_lo, R1_tos_hi, val_lo, val_hi, lsr, shift_cnt); -#endif // AARCH64 } @@ -1876,12 +1696,8 @@ void TemplateTable::lneg() { transition(ltos, ltos); -#ifdef AARCH64 - __ neg(R0_tos, R0_tos); -#else __ rsbs(R0_tos_lo, R0_tos_lo, 0); __ rsc (R1_tos_hi, R1_tos_hi, 0); -#endif // AARCH64 } @@ -1991,37 +1807,25 @@ // Conversion switch (bytecode()) { case Bytecodes::_i2l: -#ifdef AARCH64 - __ sign_extend(R0_tos, R0_tos, 32); -#else __ mov(R1_tos_hi, AsmOperand(R0_tos, asr, BitsPerWord-1)); -#endif // AARCH64 break; case Bytecodes::_i2f: -#ifdef AARCH64 - __ scvtf_sw(S0_tos, R0_tos); -#else #ifdef __SOFTFP__ __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_i2f), R0_tos); #else __ fmsr(S0_tmp, R0_tos); __ fsitos(S0_tos, S0_tmp); #endif // __SOFTFP__ -#endif // AARCH64 break; case Bytecodes::_i2d: -#ifdef AARCH64 - __ scvtf_dw(D0_tos, R0_tos); -#else #ifdef __SOFTFP__ __ call_VM_leaf(CAST_FROM_FN_PTR(address, __aeabi_i2d), R0_tos); #else __ fmsr(S0_tmp, R0_tos); __ fsitod(D0_tos, S0_tmp); #endif // __SOFTFP__ -#endif // AARCH64 break; case Bytecodes::_i2b: @@ -2041,49 +1845,33 @@ break; case Bytecodes::_l2f: -#ifdef AARCH64 - __ scvtf_sx(S0_tos, R0_tos); -#else __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::l2f), R0_tos_lo, R1_tos_hi); #if !defined(__SOFTFP__) && !defined(__ABI_HARD__) __ fmsr(S0_tos, R0); #endif // !__SOFTFP__ && !__ABI_HARD__ -#endif // AARCH64 break; case Bytecodes::_l2d: -#ifdef AARCH64 - __ scvtf_dx(D0_tos, R0_tos); -#else __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::l2d), R0_tos_lo, R1_tos_hi); #if !defined(__SOFTFP__) && !defined(__ABI_HARD__) __ fmdrr(D0_tos, R0, R1); #endif // !__SOFTFP__ && !__ABI_HARD__ -#endif // AARCH64 break; case Bytecodes::_f2i: -#ifdef AARCH64 - __ fcvtzs_ws(R0_tos, S0_tos); -#else #ifndef __SOFTFP__ __ ftosizs(S0_tos, S0_tos); __ fmrs(R0_tos, S0_tos); #else __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), R0_tos); #endif // !__SOFTFP__ -#endif // AARCH64 break; case Bytecodes::_f2l: -#ifdef AARCH64 - __ fcvtzs_xs(R0_tos, S0_tos); -#else #ifndef __SOFTFP__ __ fmrs(R0_tos, S0_tos); #endif // !__SOFTFP__ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), R0_tos); -#endif // AARCH64 break; case Bytecodes::_f2d: @@ -2095,27 +1883,19 @@ break; case Bytecodes::_d2i: -#ifdef AARCH64 - __ fcvtzs_wd(R0_tos, D0_tos); -#else #ifndef __SOFTFP__ __ ftosizd(Stemp, D0); __ fmrs(R0, Stemp); #else __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), R0_tos_lo, R1_tos_hi); #endif // !__SOFTFP__ -#endif // AARCH64 break; case Bytecodes::_d2l: -#ifdef AARCH64 - __ fcvtzs_xd(R0_tos, D0_tos); -#else #ifndef __SOFTFP__ __ fmrrd(R0_tos_lo, R1_tos_hi, D0_tos); #endif // !__SOFTFP__ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), R0_tos_lo, R1_tos_hi); -#endif // AARCH64 break; case Bytecodes::_d2f: @@ -2134,16 +1914,6 @@ void TemplateTable::lcmp() { transition(ltos, itos); -#ifdef AARCH64 - const Register arg1 = R1_tmp; - const Register arg2 = R0_tos; - - __ pop_l(arg1); - - __ cmp(arg1, arg2); - __ cset(R0_tos, gt); // 1 if '>', else 0 - __ csinv(R0_tos, R0_tos, ZR, ge); // previous value if '>=', else -1 -#else const Register arg1_lo = R2_tmp; const Register arg1_hi = R3_tmp; const Register arg2_lo = R0_tos_lo; @@ -2166,33 +1936,12 @@ __ mov (res, 1, hi); __ bind(done); __ mov (R0_tos, res); -#endif // AARCH64 } void TemplateTable::float_cmp(bool is_float, int unordered_result) { assert((unordered_result == 1) || (unordered_result == -1), "invalid unordered result"); -#ifdef AARCH64 - if (is_float) { - transition(ftos, itos); - __ pop_f(S1_tmp); - __ fcmp_s(S1_tmp, S0_tos); - } else { - transition(dtos, itos); - __ pop_d(D1_tmp); - __ fcmp_d(D1_tmp, D0_tos); - } - - if (unordered_result < 0) { - __ cset(R0_tos, gt); // 1 if '>', else 0 - __ csinv(R0_tos, R0_tos, ZR, ge); // previous value if '>=', else -1 - } else { - __ cset(R0_tos, hi); // 1 if '>' or unordered, else 0 - __ csinv(R0_tos, R0_tos, ZR, pl); // previous value if '>=' or unordered, else -1 - } - -#else #ifdef __SOFTFP__ @@ -2258,7 +2007,6 @@ } __ mov(R0_tos, 0, eq); // result == 0 if equ (Z=1) #endif // __SOFTFP__ -#endif // AARCH64 } @@ -2303,12 +2051,7 @@ __ sub(Rret_addr, Rret_addr, Rtemp); // Load the next target bytecode into R3_bytecode and advance Rbcp -#ifdef AARCH64 - __ add(Rbcp, Rbcp, Rdisp); - __ ldrb(R3_bytecode, Address(Rbcp)); -#else __ ldrb(R3_bytecode, Address(Rbcp, Rdisp, lsl, 0, pre_indexed)); -#endif // AARCH64 // Push return address __ push_i(Rret_addr); @@ -2320,12 +2063,7 @@ // Normal (non-jsr) branch handling // Adjust the bcp by the displacement in Rdisp and load next bytecode. -#ifdef AARCH64 - __ add(Rbcp, Rbcp, Rdisp); - __ ldrb(R3_bytecode, Address(Rbcp)); -#else __ ldrb(R3_bytecode, Address(Rbcp, Rdisp, lsl, 0, pre_indexed)); -#endif // AARCH64 assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); Label backedge_counter_overflow; @@ -2340,12 +2078,8 @@ const Register Rcounters = R1_tmp; // count only if backward branch -#ifdef AARCH64 - __ tbz(Rdisp, (BitsPerWord - 1), dispatch); // TODO-AARCH64: check performance of this variant on 32-bit ARM -#else __ tst(Rdisp, Rdisp); __ b(dispatch, pl); -#endif // AARCH64 if (TieredCompilation) { Label no_mdo; @@ -2364,10 +2098,10 @@ } __ bind(no_mdo); // Increment backedge counter in MethodCounters* - // Note Rbumped_taken_count is a callee saved registers for ARM32, but caller saved for ARM64 + // Note Rbumped_taken_count is a callee saved registers for ARM32 __ get_method_counters(Rmethod, Rcounters, dispatch, true /*saveRegs*/, Rdisp, R3_bytecode, - AARCH64_ONLY(Rbumped_taken_count) NOT_AARCH64(noreg)); + noreg); const Address mask(Rcounters, in_bytes(MethodCounters::backedge_mask_offset())); __ increment_mask_and_jump(Address(Rcounters, be_offset), increment, mask, Rcnt, R4_tmp, eq, &backedge_counter_overflow); @@ -2375,17 +2109,13 @@ // Increment backedge counter in MethodCounters* __ get_method_counters(Rmethod, Rcounters, dispatch, true /*saveRegs*/, Rdisp, R3_bytecode, - AARCH64_ONLY(Rbumped_taken_count) NOT_AARCH64(noreg)); + noreg); __ ldr_u32(Rtemp, Address(Rcounters, be_offset)); // load backedge counter __ add(Rtemp, Rtemp, InvocationCounter::count_increment); // increment counter __ str_32(Rtemp, Address(Rcounters, be_offset)); // store counter __ ldr_u32(Rcnt, Address(Rcounters, inv_offset)); // load invocation counter -#ifdef AARCH64 - __ andr(Rcnt, Rcnt, (unsigned int)InvocationCounter::count_mask_value); // and the status bits -#else __ bic(Rcnt, Rcnt, ~InvocationCounter::count_mask_value); // and the status bits -#endif // AARCH64 __ add(Rcnt, Rcnt, Rtemp); // add both counters if (ProfileInterpreter) { @@ -2412,13 +2142,9 @@ // sure the overflow function is called only once every overflow_frequency. const int overflow_frequency = 1024; -#ifdef AARCH64 - __ tst(Rbumped_taken_count, (unsigned)(overflow_frequency-1)); -#else // was '__ andrs(...,overflow_frequency-1)', testing if lowest 10 bits are 0 assert(overflow_frequency == (1 << 10),"shift by 22 not correct for expected frequency"); __ movs(Rbumped_taken_count, AsmOperand(Rbumped_taken_count, lsl, 22)); -#endif // AARCH64 __ b(backedge_counter_overflow, eq); } @@ -2487,13 +2213,8 @@ __ ldr(R1_tmp, Address(Rtmp_save0, nmethod::osr_entry_point_offset())); __ ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); -#ifdef AARCH64 - __ ldp(FP, LR, Address(FP)); - __ mov(SP, Rtemp); -#else __ ldmia(FP, RegisterSet(FP) | RegisterSet(LR)); __ bic(SP, Rtemp, StackAlignmentInBytes - 1); // Remove frame and align stack -#endif // AARCH64 __ jump(R1_tmp); } @@ -2505,19 +2226,8 @@ transition(itos, vtos); // assume branch is more often taken than not (loops use backward branches) Label not_taken; -#ifdef AARCH64 - if (cc == equal) { - __ cbnz_w(R0_tos, not_taken); - } else if (cc == not_equal) { - __ cbz_w(R0_tos, not_taken); - } else { - __ cmp_32(R0_tos, 0); - __ b(not_taken, convNegCond(cc)); - } -#else __ cmp_32(R0_tos, 0); __ b(not_taken, convNegCond(cc)); -#endif // AARCH64 branch(false, false); __ bind(not_taken); __ profile_not_taken_branch(R0_tmp); @@ -2603,9 +2313,7 @@ transition(itos, vtos); const Register Rindex = R0_tos; -#ifndef AARCH64 const Register Rtemp2 = R1_tmp; -#endif // !AARCH64 const Register Rabcp = R2_tmp; // aligned bcp const Register Rlow = R3_tmp; const Register Rhigh = R4_tmp; @@ -2616,38 +2324,13 @@ __ align_reg(Rabcp, Rtemp, BytesPerInt); // load lo & hi -#ifdef AARCH64 - __ ldp_w(Rlow, Rhigh, Address(Rabcp, 2*BytesPerInt, post_indexed)); -#else __ ldmia(Rabcp, RegisterSet(Rlow) | RegisterSet(Rhigh), writeback); -#endif // AARCH64 __ byteswap_u32(Rlow, Rtemp, Rtemp2); __ byteswap_u32(Rhigh, Rtemp, Rtemp2); // compare index with high bound __ cmp_32(Rhigh, Rindex); -#ifdef AARCH64 - Label default_case, do_dispatch; - __ ccmp_w(Rindex, Rlow, Assembler::flags_for_condition(lt), ge); - __ b(default_case, lt); - - __ sub_w(Rindex, Rindex, Rlow); - __ ldr_s32(Roffset, Address(Rabcp, Rindex, ex_sxtw, LogBytesPerInt)); - if(ProfileInterpreter) { - __ sxtw(Rindex, Rindex); - __ profile_switch_case(Rabcp, Rindex, Rtemp2, R0_tmp); - } - __ b(do_dispatch); - - __ bind(default_case); - __ ldr_s32(Roffset, Address(Rabcp, -3 * BytesPerInt)); - if(ProfileInterpreter) { - __ profile_switch_default(R0_tmp); - } - - __ bind(do_dispatch); -#else // if Rindex <= Rhigh then calculate index in table (Rindex - Rlow) __ subs(Rindex, Rindex, Rlow, ge); @@ -2673,17 +2356,11 @@ __ ldr(Roffset, Address(Rabcp, -3 * BytesPerInt), lt); __ ldr(Roffset, Address(Rabcp, Rindex, lsl, LogBytesPerInt), ge); } -#endif // AARCH64 __ byteswap_u32(Roffset, Rtemp, Rtemp2); // load the next bytecode to R3_bytecode and advance Rbcp -#ifdef AARCH64 - __ add(Rbcp, Rbcp, Roffset, ex_sxtw); - __ ldrb(R3_bytecode, Address(Rbcp)); -#else __ ldrb(R3_bytecode, Address(Rbcp, Roffset, lsl, 0, pre_indexed)); -#endif // AARCH64 __ dispatch_only(vtos); } @@ -2713,32 +2390,19 @@ __ align_reg(Rabcp, Rtemp, BytesPerInt); // load default & counter -#ifdef AARCH64 - __ ldp_w(Rdefault, Rcount, Address(Rabcp, 2*BytesPerInt, post_indexed)); -#else __ ldmia(Rabcp, RegisterSet(Rdefault) | RegisterSet(Rcount), writeback); -#endif // AARCH64 __ byteswap_u32(Rcount, R1_tmp, Rtemp); -#ifdef AARCH64 - __ cbz_w(Rcount, default_case); -#else __ cmp_32(Rcount, 0); __ ldr(Rtemp, Address(Rabcp, 2*BytesPerInt, post_indexed), ne); __ b(default_case, eq); -#endif // AARCH64 // table search __ bind(loop); -#ifdef AARCH64 - __ ldr_s32(Rtemp, Address(Rabcp, 2*BytesPerInt, post_indexed)); -#endif // AARCH64 __ cmp_32(Rtemp, Rkey); __ b(found, eq); __ subs(Rcount, Rcount, 1); -#ifndef AARCH64 __ ldr(Rtemp, Address(Rabcp, 2*BytesPerInt, post_indexed), ne); -#endif // !AARCH64 __ b(loop, ne); // default case @@ -2773,12 +2437,7 @@ __ byteswap_u32(Roffset, R1_tmp, Rtemp); // load the next bytecode to R3_bytecode and advance Rbcp -#ifdef AARCH64 - __ add(Rbcp, Rbcp, Roffset, ex_sxtw); - __ ldrb(R3_bytecode, Address(Rbcp)); -#else __ ldrb(R3_bytecode, Address(Rbcp, Roffset, lsl, 0, pre_indexed)); -#endif // AARCH64 __ dispatch_only(vtos); } @@ -2846,12 +2505,7 @@ // } else { // i = h; // } -#ifdef AARCH64 - __ add(temp1, array, AsmOperand(h, lsl, 1+LogBytesPerInt)); - __ ldr_s32(val, Address(temp1)); -#else __ ldr_s32(val, Address(array, h, lsl, 1+LogBytesPerInt)); -#endif // AARCH64 // Convert array[h].match to native byte-ordering before compare __ byteswap_u32(val, temp1, temp2); __ cmp_32(key, val); @@ -2867,12 +2521,7 @@ // end of binary search, result index is i (must check again!) Label default_case; // Convert array[i].match to native byte-ordering before compare -#ifdef AARCH64 - __ add(temp1, array, AsmOperand(i, lsl, 1+LogBytesPerInt)); - __ ldr_s32(val, Address(temp1)); -#else __ ldr_s32(val, Address(array, i, lsl, 1+LogBytesPerInt)); -#endif // AARCH64 __ byteswap_u32(val, temp1, temp2); __ cmp_32(key, val); __ b(default_case, ne); @@ -2882,12 +2531,7 @@ __ ldr_s32(offset, Address(temp1, 1*BytesPerInt)); __ profile_switch_case(R0, i, R1, i); __ byteswap_u32(offset, temp1, temp2); -#ifdef AARCH64 - __ add(Rbcp, Rbcp, offset, ex_sxtw); - __ ldrb(R3_bytecode, Address(Rbcp)); -#else __ ldrb(R3_bytecode, Address(Rbcp, offset, lsl, 0, pre_indexed)); -#endif // AARCH64 __ dispatch_only(vtos); // default case @@ -2895,12 +2539,7 @@ __ profile_switch_default(R0); __ ldr_s32(offset, Address(array, -2*BytesPerInt)); __ byteswap_u32(offset, temp1, temp2); -#ifdef AARCH64 - __ add(Rbcp, Rbcp, offset, ex_sxtw); - __ ldrb(R3_bytecode, Address(Rbcp)); -#else __ ldrb(R3_bytecode, Address(Rbcp, offset, lsl, 0, pre_indexed)); -#endif // AARCH64 __ dispatch_only(vtos); } @@ -2932,13 +2571,11 @@ __ interp_verify_oop(R0_tos, state, __FILE__, __LINE__); -#ifndef AARCH64 // According to interpreter calling conventions, result is returned in R0/R1, // so ftos (S0) and dtos (D0) are moved to R0/R1. // This conversion should be done after remove_activation, as it uses // push(state) & pop(state) to preserve return value. __ convert_tos_to_retval(state); -#endif // !AARCH64 __ ret(); @@ -2972,19 +2609,14 @@ // requirement (1) but miss the volatile-store-volatile-load case. This final // case is placed after volatile-stores although it could just as well go // before volatile-loads. -// TODO-AARCH64: consider removing extra unused parameters void TemplateTable::volatile_barrier(MacroAssembler::Membar_mask_bits order_constraint, Register tmp, bool preserve_flags, Register load_tgt) { -#ifdef AARCH64 - __ membar(order_constraint); -#else __ membar(order_constraint, tmp, preserve_flags, load_tgt); -#endif } -// Blows all volatile registers: R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR. +// Blows all volatile registers: R0-R3, Rtemp, LR. void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register Rindex, @@ -3046,7 +2678,7 @@ } -// Blows all volatile registers: R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR. +// Blows all volatile registers: R0-R3, Rtemp, LR. void TemplateTable::load_invoke_cp_cache_entry(int byte_no, Register method, Register itable_index, @@ -3089,7 +2721,7 @@ // The registers cache and index expected to be set before call, and should not be Rtemp. -// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR, +// Blows volatile registers R0-R3, Rtemp, LR, // except cache and index registers which are preserved. void TemplateTable::jvmti_post_field_access(Register Rcache, Register Rindex, @@ -3167,46 +2799,27 @@ // There are actually two versions of implementation of getfield/getstatic: // - // 32-bit ARM: // 1) Table switch using add(PC,...) instruction (fast_version) // 2) Table switch using ldr(PC,...) instruction // - // AArch64: - // 1) Table switch using adr/add/br instructions (fast_version) - // 2) Table switch using adr/ldr/br instructions - // // First version requires fixed size of code block for each case and // can not be used in RewriteBytecodes and VerifyOops // modes. // Size of fixed size code block for fast_version - const int log_max_block_size = AARCH64_ONLY(2) NOT_AARCH64(3); + const int log_max_block_size = 3; const int max_block_size = 1 << log_max_block_size; // Decide if fast version is enabled - bool fast_version = (is_static || !RewriteBytecodes) && !VerifyOops && !VerifyInterpreterStackTop; + bool fast_version = (is_static || !RewriteBytecodes) && !VerifyOops; // On 32-bit ARM atos and itos cases can be merged only for fast version, because // atos requires additional processing in slow version. - // On AArch64 atos and itos cannot be merged. - bool atos_merged_with_itos = AARCH64_ONLY(false) NOT_AARCH64(fast_version); + bool atos_merged_with_itos = fast_version; assert(number_of_states == 10, "number of tos states should be equal to 9"); __ cmp(Rflags, itos); -#ifdef AARCH64 - __ b(Lint, eq); - - if(fast_version) { - __ adr(Rtemp, Lbtos); - __ add(Rtemp, Rtemp, AsmOperand(Rflags, lsl, log_max_block_size + Assembler::LogInstructionSize)); - __ br(Rtemp); - } else { - __ adr(Rtemp, Ltable); - __ ldr(Rtemp, Address::indexed_ptr(Rtemp, Rflags)); - __ br(Rtemp); - } -#else if(atos_merged_with_itos) { __ cmp(Rflags, atos, ne); } @@ -3220,13 +2833,11 @@ // jump to itos/atos case __ b(Lint); -#endif // AARCH64 // table with addresses for slow version if (fast_version) { // nothing to do } else { - AARCH64_ONLY(__ align(wordSize)); __ bind(Ltable); __ emit_address(Lbtos); __ emit_address(Lztos); @@ -3309,11 +2920,7 @@ assert(ltos == seq++, "ltos has unexpected value"); FixedSizeCodeBlock ltos_block(_masm, max_block_size, fast_version); __ bind(Lltos); -#ifdef AARCH64 - __ ldr(R0_tos, Address(Robj, Roffset)); -#else __ access_load_at(T_LONG, IN_HEAP, Address(Robj, Roffset), noreg /* ltos */, noreg, noreg, noreg); -#endif // AARCH64 __ push(ltos); if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_lgetfield, R0_tmp, Rtemp); @@ -3343,11 +2950,7 @@ __ bind(Ldtos); // doubles and longs are placed on stack in the same way, so // we can use push(ltos) to transfer value without using VFP -#ifdef AARCH64 - __ ldr(R0_tos, Address(Robj, Roffset)); -#else __ access_load_at(T_LONG, IN_HEAP, Address(Robj, Roffset), noreg /* ltos */, noreg, noreg, noreg); -#endif // AARCH64 __ push(ltos); if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_dgetfield, R0_tmp, Rtemp); @@ -3359,7 +2962,7 @@ { assert(atos == seq++, "atos has unexpected value"); - // atos case for AArch64 and slow version on 32-bit ARM + // atos case for slow version on 32-bit ARM if(!atos_merged_with_itos) { __ bind(Latos); do_oop_load(_masm, R0_tos, Address(Robj, Roffset)); @@ -3416,7 +3019,7 @@ // The registers cache and index expected to be set before call, and should not be R1 or Rtemp. -// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR, +// Blows volatile registers R0-R3, Rtemp, LR, // except cache and index registers which are preserved. void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rindex, bool is_static) { ByteSize cp_base_offset = ConstantPoolCache::base_offset(); @@ -3448,18 +3051,11 @@ __ cmp(Rtemp, ltos); __ cond_cmp(Rtemp, dtos, ne); -#ifdef AARCH64 - __ mov(Rtemp, Interpreter::expr_offset_in_bytes(2)); - __ mov(R1, Interpreter::expr_offset_in_bytes(1)); - __ mov(R1, Rtemp, eq); - __ ldr(R1, Address(Rstack_top, R1)); -#else // two word value (ltos/dtos) __ ldr(R1, Address(SP, Interpreter::expr_offset_in_bytes(2)), eq); // one word value (not ltos, dtos) __ ldr(R1, Address(SP, Interpreter::expr_offset_in_bytes(1)), ne); -#endif // AARCH64 } // cache entry pointer @@ -3522,39 +3118,22 @@ // 1) Table switch using add(PC,...) instruction (fast_version) // 2) Table switch using ldr(PC,...) instruction // - // AArch64: - // 1) Table switch using adr/add/br instructions (fast_version) - // 2) Table switch using adr/ldr/br instructions - // // First version requires fixed size of code block for each case and // can not be used in RewriteBytecodes and VerifyOops // modes. // Size of fixed size code block for fast_version (in instructions) - const int log_max_block_size = AARCH64_ONLY(is_static ? 2 : 3) NOT_AARCH64(3); + const int log_max_block_size = 3; const int max_block_size = 1 << log_max_block_size; // Decide if fast version is enabled - bool fast_version = (is_static || !RewriteBytecodes) && !VerifyOops && !ZapHighNonSignificantBits; + bool fast_version = (is_static || !RewriteBytecodes) && !VerifyOops; assert(number_of_states == 10, "number of tos states should be equal to 9"); // itos case is frequent and is moved outside table switch __ cmp(Rflags, itos); -#ifdef AARCH64 - __ b(Lint, eq); - - if (fast_version) { - __ adr(Rtemp, Lbtos); - __ add(Rtemp, Rtemp, AsmOperand(Rflags, lsl, log_max_block_size + Assembler::LogInstructionSize)); - __ br(Rtemp); - } else { - __ adr(Rtemp, Ltable); - __ ldr(Rtemp, Address::indexed_ptr(Rtemp, Rflags)); - __ br(Rtemp); - } -#else // table switch by type if (fast_version) { __ add(PC, PC, AsmOperand(Rflags, lsl, log_max_block_size + Assembler::LogInstructionSize), ne); @@ -3564,13 +3143,11 @@ // jump to itos case __ b(Lint); -#endif // AARCH64 // table with addresses for slow version if (fast_version) { // nothing to do } else { - AARCH64_ONLY(__ align(wordSize)); __ bind(Ltable); __ emit_address(Lbtos); __ emit_address(Lztos); @@ -3657,11 +3234,7 @@ __ bind(Lltos); __ pop(ltos); if (!is_static) pop_and_check_object(Robj); -#ifdef AARCH64 - __ str(R0_tos, Address(Robj, Roffset)); -#else __ access_store_at(T_LONG, IN_HEAP, Address(Robj, Roffset), noreg /* ltos */, noreg, noreg, noreg, false); -#endif // AARCH64 if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_lputfield, R0_tmp, Rtemp, true, byte_no); } @@ -3693,11 +3266,7 @@ // we can use pop(ltos) to transfer value without using VFP __ pop(ltos); if (!is_static) pop_and_check_object(Robj); -#ifdef AARCH64 - __ str(R0_tos, Address(Robj, Roffset)); -#else __ access_store_at(T_LONG, IN_HEAP, Address(Robj, Roffset), noreg /* ltos */, noreg, noreg, noreg, false); -#endif // AARCH64 if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_dputfield, R0_tmp, Rtemp, true, byte_no); } @@ -3782,7 +3351,7 @@ Unimplemented(); } -// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR, +// Blows volatile registers R0-R3, Rtemp, LR, // but preserves tosca with the given state. void TemplateTable::jvmti_post_fast_field_mod(TosState state) { if (__ can_post_field_modification()) { @@ -3851,7 +3420,6 @@ Label notVolatile; __ tbz(Rflags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); - // TODO-AARCH64 on AArch64, store-release instructions can be used to get rid of this explict barrier volatile_barrier(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp); __ bind(notVolatile); @@ -3878,11 +3446,6 @@ case Bytecodes::_fast_iputfield: __ access_store_at(T_INT, IN_HEAP, addr, R0_tos, noreg, noreg, noreg, false); break; -#ifdef AARCH64 - case Bytecodes::_fast_lputfield: __ str (R0_tos, addr); break; - case Bytecodes::_fast_fputfield: __ str_s(S0_tos, addr); break; - case Bytecodes::_fast_dputfield: __ str_d(D0_tos, addr); break; -#else case Bytecodes::_fast_lputfield: __ access_store_at(T_LONG, IN_HEAP, addr, noreg, noreg, noreg, noreg, false); break; @@ -3892,8 +3455,6 @@ case Bytecodes::_fast_dputfield: __ access_store_at(T_DOUBLE, IN_HEAP, addr, noreg, noreg, noreg, noreg, false); break; -#endif // AARCH64 - case Bytecodes::_fast_aputfield: do_oop_store(_masm, addr, R0_tos, Rtemp, R1_tmp, R2_tmp, false); break; @@ -3902,6 +3463,7 @@ ShouldNotReachHere(); } + if (gen_volatile_check) { Label notVolatile; Label skipMembar; @@ -3984,11 +3546,6 @@ case Bytecodes::_fast_igetfield: __ access_load_at(T_INT, IN_HEAP, addr, R0_tos, noreg, noreg, noreg); break; -#ifdef AARCH64 - case Bytecodes::_fast_lgetfield: __ ldr (R0_tos, addr); break; - case Bytecodes::_fast_fgetfield: __ ldr_s(S0_tos, addr); break; - case Bytecodes::_fast_dgetfield: __ ldr_d(D0_tos, addr); break; -#else case Bytecodes::_fast_lgetfield: __ access_load_at(T_LONG, IN_HEAP, addr, noreg, noreg, noreg, noreg); break; @@ -3998,7 +3555,6 @@ case Bytecodes::_fast_dgetfield: __ access_load_at(T_DOUBLE, IN_HEAP, addr, noreg, noreg, noreg, noreg); break; -#endif // AARCH64 case Bytecodes::_fast_agetfield: do_oop_load(_masm, R0_tos, addr); __ verify_oop(R0_tos); @@ -4012,7 +3568,6 @@ Label notVolatile; __ tbz(Rflags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); - // TODO-AARCH64 on AArch64, load-acquire instructions can be used to get rid of this explict barrier volatile_barrier(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), Rtemp); __ bind(notVolatile); @@ -4050,34 +3605,6 @@ __ null_check(Robj, Rtemp); __ sub(Rbcp, Rbcp, 1); -#ifdef AARCH64 - if (gen_volatile_check) { - Label notVolatile; - __ tbz(Rflags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); - - __ add(Rtemp, Robj, Roffset); - - if (state == itos) { - __ ldar_w(R0_tos, Rtemp); - } else if (state == atos) { - if (UseCompressedOops) { - __ ldar_w(R0_tos, Rtemp); - __ decode_heap_oop(R0_tos); - } else { - __ ldar(R0_tos, Rtemp); - } - __ verify_oop(R0_tos); - } else if (state == ftos) { - __ ldar_w(R0_tos, Rtemp); - __ fmov_sw(S0_tos, R0_tos); - } else { - ShouldNotReachHere(); - } - __ b(done); - - __ bind(notVolatile); - } -#endif // AARCH64 if (state == itos) { __ access_load_at(T_INT, IN_HEAP, Address(Robj, Roffset), R0_tos, noreg, noreg, noreg); @@ -4085,20 +3612,15 @@ do_oop_load(_masm, R0_tos, Address(Robj, Roffset)); __ verify_oop(R0_tos); } else if (state == ftos) { -#ifdef AARCH64 - __ ldr_s(S0_tos, Address(Robj, Roffset)); -#else #ifdef __SOFTFP__ __ ldr(R0_tos, Address(Robj, Roffset)); #else __ access_load_at(T_FLOAT, IN_HEAP, Address(Robj, Roffset), noreg /* ftos */, noreg, noreg, noreg); #endif // __SOFTFP__ -#endif // AARCH64 } else { ShouldNotReachHere(); } -#ifndef AARCH64 if (gen_volatile_check) { // Check for volatile load Label notVolatile; @@ -4108,7 +3630,6 @@ __ bind(notVolatile); } -#endif // !AARCH64 __ bind(done); } @@ -4378,7 +3899,6 @@ void TemplateTable::invokehandle(int byte_no) { transition(vtos, vtos); - // TODO-AARCH64 review register usage const Register Rrecv = R2_tmp; const Register Rmtype = R4_tmp; const Register R5_method = R5_tmp; // can't reuse Rmethod! @@ -4400,7 +3920,6 @@ void TemplateTable::invokedynamic(int byte_no) { transition(vtos, vtos); - // TODO-AARCH64 review register usage const Register Rcallsite = R4_tmp; const Register R5_method = R5_tmp; // can't reuse Rmethod! @@ -4456,15 +3975,10 @@ const int tags_offset = Array::base_offset_in_bytes(); __ add(Rtemp, Rtags, Rindex); -#ifdef AARCH64 - __ add(Rtemp, Rtemp, tags_offset); - __ ldarb(Rtemp, Rtemp); -#else __ ldrb(Rtemp, Address(Rtemp, tags_offset)); // use Rklass as a scratch volatile_barrier(MacroAssembler::LoadLoad, Rklass); -#endif // AARCH64 // get InstanceKlass __ cmp(Rtemp, JVM_CONSTANT_Class); @@ -4529,11 +4043,7 @@ Label retry; __ bind(retry); -#ifdef AARCH64 - __ ldxr(Robj, Rheap_top_addr); -#else __ ldr(Robj, Address(Rheap_top_addr)); -#endif // AARCH64 __ ldr(Rheap_end, Address(Rheap_top_addr, (intptr_t)Universe::heap()->end_addr()-(intptr_t)Universe::heap()->top_addr())); __ add(Rheap_top, Robj, Rsize); @@ -4542,13 +4052,8 @@ // Update heap top atomically. // If someone beats us on the allocation, try again, otherwise continue. -#ifdef AARCH64 - __ stxr(Rtemp2, Rheap_top, Rheap_top_addr); - __ cbnz_w(Rtemp2, retry); -#else __ atomic_cas_bool(Robj, Rheap_top, Rheap_top_addr, 0, Rheap_end/*scratched*/); __ b(retry, ne); -#endif // AARCH64 __ incr_allocated_bytes(Rsize, Rtemp); } @@ -4577,21 +4082,6 @@ __ bind(L); #endif -#ifdef AARCH64 - { - Label loop; - // Step back by 1 word if object size is not a multiple of 2*wordSize. - assert(wordSize <= sizeof(oopDesc), "oop header should contain at least one word"); - __ andr(Rtemp2, Rsize, (uintx)wordSize); - __ sub(Rzero_cur, Rzero_cur, Rtemp2); - - // Zero by 2 words per iteration. - __ bind(loop); - __ subs(Rsize, Rsize, 2*wordSize); - __ stp(ZR, ZR, Address(Rzero_cur, 2*wordSize, post_indexed)); - __ b(loop, gt); - } -#else __ mov(Rzero0, 0); __ mov(Rzero1, 0); __ add(Rzero_end, Rzero_cur, Rsize); @@ -4608,7 +4098,6 @@ __ cmp(Rzero_cur, Rzero_end, ne); __ b(loop, ne); } -#endif // AARCH64 // initialize object header only. __ bind(initialize_header); @@ -4621,9 +4110,6 @@ __ str(Rtemp, Address(Robj, oopDesc::mark_offset_in_bytes())); // klass -#ifdef AARCH64 - __ store_klass_gap(Robj); -#endif // AARCH64 __ store_klass(Rklass, Robj); // blows Rklass: Rklass = noreg; @@ -4714,19 +4200,11 @@ // See if bytecode has already been quicked __ add(Rtemp, Rtags, Rindex); -#ifdef AARCH64 - // TODO-AARCH64: investigate if LoadLoad barrier is needed here or control dependency is enough - __ add(Rtemp, Rtemp, Array::base_offset_in_bytes()); - __ ldarb(Rtemp, Rtemp); // acts as LoadLoad memory barrier -#else __ ldrb(Rtemp, Address(Rtemp, Array::base_offset_in_bytes())); -#endif // AARCH64 __ cmp(Rtemp, JVM_CONSTANT_Class); -#ifndef AARCH64 volatile_barrier(MacroAssembler::LoadLoad, Rtemp, true); -#endif // !AARCH64 __ b(quicked, eq); @@ -4794,18 +4272,10 @@ // See if bytecode has already been quicked __ add(Rtemp, Rtags, Rindex); -#ifdef AARCH64 - // TODO-AARCH64: investigate if LoadLoad barrier is needed here or control dependency is enough - __ add(Rtemp, Rtemp, Array::base_offset_in_bytes()); - __ ldarb(Rtemp, Rtemp); // acts as LoadLoad memory barrier -#else __ ldrb(Rtemp, Address(Rtemp, Array::base_offset_in_bytes())); -#endif // AARCH64 __ cmp(Rtemp, JVM_CONSTANT_Class); -#ifndef AARCH64 volatile_barrier(MacroAssembler::LoadLoad, Rtemp, true); -#endif // !AARCH64 __ b(quicked, eq); @@ -4861,11 +4331,7 @@ __ mov(R1, Rmethod); __ mov(R2, Rbcp); __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), R1, R2); -#ifdef AARCH64 - __ sxtw(Rtmp_save0, R0); -#else __ mov(Rtmp_save0, R0); -#endif // AARCH64 // post the breakpoint event __ mov(R1, Rmethod); @@ -4937,16 +4403,11 @@ // points to word before bottom of monitor block __ cmp(Rcur, Rbottom); // check if there are no monitors -#ifndef AARCH64 __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); // prefetch monitor's object for the first iteration -#endif // !AARCH64 __ b(allocate_monitor, eq); // there are no monitors, skip searching __ bind(loop); -#ifdef AARCH64 - __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes())); -#endif // AARCH64 __ cmp(Rcur_obj, 0); // check if current entry is used __ mov(Rentry, Rcur, eq); // if not used then remember entry @@ -4956,10 +4417,8 @@ __ add(Rcur, Rcur, entry_size); // otherwise advance to next entry __ cmp(Rcur, Rbottom); // check if bottom reached -#ifndef AARCH64 __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); // prefetch monitor's object for the next iteration -#endif // !AARCH64 __ b(loop, ne); // if not at bottom then check this entry __ bind(exit); } @@ -4974,12 +4433,6 @@ // 1. compute new pointers -#ifdef AARCH64 - __ check_extended_sp(Rtemp); - __ sub(SP, SP, entry_size); // adjust extended SP - __ mov(Rtemp, SP); - __ str(Rtemp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); -#endif // AARCH64 __ ldr(Rentry, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize)); // old monitor block top / expression stack bottom @@ -4997,21 +4450,14 @@ // 2. move expression stack contents __ cmp(R2_tmp, Rentry); // check if expression stack is empty -#ifndef AARCH64 __ ldr(Rtemp, Address(R2_tmp, entry_size), ne); // load expression stack word from old location -#endif // !AARCH64 __ b(allocated, eq); __ bind(loop); -#ifdef AARCH64 - __ ldr(Rtemp, Address(R2_tmp, entry_size)); // load expression stack word from old location -#endif // AARCH64 __ str(Rtemp, Address(R2_tmp, wordSize, post_indexed)); // store expression stack word at new location // and advance to next word __ cmp(R2_tmp, Rentry); // check if bottom reached -#ifndef AARCH64 __ ldr(Rtemp, Address(R2, entry_size), ne); // load expression stack word from old location -#endif // !AARCH64 __ b(loop, ne); // if not at bottom then copy next word } @@ -5060,24 +4506,17 @@ // points to word before bottom of monitor block __ cmp(Rcur, Rbottom); // check if bottom reached -#ifndef AARCH64 __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); // prefetch monitor's object for the first iteration -#endif // !AARCH64 __ b(throw_exception, eq); // throw exception if there are now monitors __ bind(loop); -#ifdef AARCH64 - __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes())); -#endif // AARCH64 // check if current entry is for same object __ cmp(Rcur_obj, Robj); __ b(found, eq); // if same object then stop searching __ add(Rcur, Rcur, entry_size); // otherwise advance to next entry __ cmp(Rcur, Rbottom); // check if bottom reached -#ifndef AARCH64 __ ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); -#endif // !AARCH64 __ b (loop, ne); // if not at bottom then check this entry } --- old/src/hotspot/cpu/arm/vm_version_arm.hpp 2018-09-17 10:30:45.921293796 -0400 +++ new/src/hotspot/cpu/arm/vm_version_arm.hpp 2018-09-17 10:30:45.279256296 -0400 @@ -41,23 +41,6 @@ static void initialize(); static bool is_initialized() { return _is_initialized; } -#ifdef AARCH64 - - public: - static bool supports_ldrex() { return true; } - static bool supports_ldrexd() { return true; } - static bool supports_movw() { return true; } - - // Override Abstract_VM_Version implementation - static bool use_biased_locking(); - - static bool has_simd() { return _has_simd; } - static bool has_vfp() { return has_simd(); } - static bool simd_math_is_compliant() { return true; } - - static bool prefer_moves_over_load_literal() { return true; } - -#else protected: enum Feature_Flag { @@ -122,7 +105,6 @@ friend class VM_Version_StubGenerator; -#endif // AARCH64 }; #endif // CPU_ARM_VM_VM_VERSION_ARM_HPP --- old/src/hotspot/cpu/arm/vm_version_ext_arm.cpp 2018-09-17 10:30:47.488385326 -0400 +++ new/src/hotspot/cpu/arm/vm_version_ext_arm.cpp 2018-09-17 10:30:46.853348235 -0400 @@ -49,11 +49,7 @@ _no_of_cores = os::processor_count(); _no_of_threads = _no_of_cores; _no_of_sockets = _no_of_cores; -#ifdef AARCH64 - snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "AArch64"); -#else snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "ARM%d", _arm_arch); -#endif snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "%s", _features_string); _initialized = true; } --- old/src/hotspot/cpu/arm/vtableStubs_arm.cpp 2018-09-17 10:30:49.049476506 -0400 +++ new/src/hotspot/cpu/arm/vtableStubs_arm.cpp 2018-09-17 10:30:48.414439415 -0400 @@ -92,7 +92,7 @@ int method_offset = vtableEntry::method_offset_in_bytes() + entry_offset; assert ((method_offset & (wordSize - 1)) == 0, "offset should be aligned"); - int offset_mask = AARCH64_ONLY(0xfff << LogBytesPerWord) NOT_AARCH64(0xfff); + int offset_mask = 0xfff; if (method_offset & ~offset_mask) { __ add(tmp, tmp, method_offset & ~offset_mask); } @@ -109,12 +109,7 @@ #endif address ame_addr = __ pc(); -#ifdef AARCH64 - __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset())); - __ br(tmp); -#else __ ldr(PC, Address(Rmethod, Method::from_compiled_offset())); -#endif // AARCH64 masm->flush(); bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0); @@ -150,9 +145,9 @@ assert(VtableStub::receiver_location() == R0->as_VMReg(), "receiver expected in R0"); // R0-R3 / R0-R7 registers hold the arguments and cannot be spoiled - const Register Rclass = AARCH64_ONLY(R9) NOT_AARCH64(R4); - const Register Rintf = AARCH64_ONLY(R10) NOT_AARCH64(R5); - const Register Rscan = AARCH64_ONLY(R11) NOT_AARCH64(R6); + const Register Rclass = R4; + const Register Rintf = R5; + const Register Rscan = R6; Label L_no_such_interface; @@ -200,12 +195,7 @@ address ame_addr = __ pc(); -#ifdef AARCH64 - __ ldr(Rtemp, Address(Rmethod, Method::from_compiled_offset())); - __ br(Rtemp); -#else __ ldr(PC, Address(Rmethod, Method::from_compiled_offset())); -#endif // AARCH64 __ bind(L_no_such_interface); // Handle IncompatibleClassChangeError in itable stubs. --- old/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp 2018-09-17 10:30:50.606567452 -0400 +++ new/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp 2018-09-17 10:30:49.974530536 -0400 @@ -44,7 +44,6 @@ * kernel source or kernel_user_helpers.txt in Linux Doc. */ -#ifndef AARCH64 template<> template inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const { @@ -61,18 +60,9 @@ (*os::atomic_store_long_func)( PrimitiveConversions::cast(store_value), reinterpret_cast(dest)); } -#endif // As per atomic.hpp all read-modify-write operations have to provide two-way -// barriers semantics. For AARCH64 we are using load-acquire-with-reservation and -// store-release-with-reservation. While load-acquire combined with store-release -// do not generally form two-way barriers, their use with reservations does - the -// ARMv8 architecture manual Section F "Barrier Litmus Tests" indicates they -// provide sequentially consistent semantics. All we need to add is an explicit -// barrier in the failure path of the cmpxchg operations (as these don't execute -// the store) - arguably this may be overly cautious as there is a very low -// likelihood that the hardware would pull loads/stores into the region guarded -// by the reservation. +// barriers semantics. // // For ARMv7 we add explicit barriers in the stubs. @@ -90,45 +80,9 @@ atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); -#ifdef AARCH64 - D val; - int tmp; - __asm__ volatile( - "1:\n\t" - " ldaxr %w[val], [%[dest]]\n\t" - " add %w[val], %w[val], %w[add_val]\n\t" - " stlxr %w[tmp], %w[val], [%[dest]]\n\t" - " cbnz %w[tmp], 1b\n\t" - : [val] "=&r" (val), [tmp] "=&r" (tmp) - : [add_val] "r" (add_value), [dest] "r" (dest) - : "memory"); - return val; -#else return add_using_helper(os::atomic_add_func, add_value, dest); -#endif } -#ifdef AARCH64 -template<> -template -inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, - atomic_memory_order order) const { - STATIC_ASSERT(8 == sizeof(I)); - STATIC_ASSERT(8 == sizeof(D)); - D val; - int tmp; - __asm__ volatile( - "1:\n\t" - " ldaxr %[val], [%[dest]]\n\t" - " add %[val], %[val], %[add_val]\n\t" - " stlxr %w[tmp], %[val], [%[dest]]\n\t" - " cbnz %w[tmp], 1b\n\t" - : [val] "=&r" (val), [tmp] "=&r" (tmp) - : [add_val] "r" (add_value), [dest] "r" (dest) - : "memory"); - return val; -} -#endif template<> template @@ -136,43 +90,9 @@ T volatile* dest, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); -#ifdef AARCH64 - T old_val; - int tmp; - __asm__ volatile( - "1:\n\t" - " ldaxr %w[old_val], [%[dest]]\n\t" - " stlxr %w[tmp], %w[new_val], [%[dest]]\n\t" - " cbnz %w[tmp], 1b\n\t" - : [old_val] "=&r" (old_val), [tmp] "=&r" (tmp) - : [new_val] "r" (exchange_value), [dest] "r" (dest) - : "memory"); - return old_val; -#else return xchg_using_helper(os::atomic_xchg_func, exchange_value, dest); -#endif } -#ifdef AARCH64 -template<> -template -inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, - T volatile* dest, - atomic_memory_order order) const { - STATIC_ASSERT(8 == sizeof(T)); - T old_val; - int tmp; - __asm__ volatile( - "1:\n\t" - " ldaxr %[old_val], [%[dest]]\n\t" - " stlxr %w[tmp], %[new_val], [%[dest]]\n\t" - " cbnz %w[tmp], 1b\n\t" - : [old_val] "=&r" (old_val), [tmp] "=&r" (tmp) - : [new_val] "r" (exchange_value), [dest] "r" (dest) - : "memory"); - return old_val; -} -#endif // AARCH64 // The memory_order parameter is ignored - we always provide the strongest/most-conservative ordering @@ -180,7 +100,6 @@ template<> struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; -#ifndef AARCH64 inline int32_t reorder_cmpxchg_func(int32_t exchange_value, int32_t volatile* dest, @@ -197,7 +116,6 @@ return (*os::atomic_cmpxchg_long_func)(compare_value, exchange_value, dest); } -#endif // !AARCH64 template<> template @@ -206,27 +124,7 @@ T compare_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); -#ifdef AARCH64 - T rv; - int tmp; - __asm__ volatile( - "1:\n\t" - " ldaxr %w[rv], [%[dest]]\n\t" - " cmp %w[rv], %w[cv]\n\t" - " b.ne 2f\n\t" - " stlxr %w[tmp], %w[ev], [%[dest]]\n\t" - " cbnz %w[tmp], 1b\n\t" - " b 3f\n\t" - "2:\n\t" - " dmb sy\n\t" - "3:\n\t" - : [rv] "=&r" (rv), [tmp] "=&r" (tmp) - : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value) - : "memory"); - return rv; -#else return cmpxchg_using_helper(reorder_cmpxchg_func, exchange_value, dest, compare_value); -#endif } template<> @@ -236,27 +134,7 @@ T compare_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); -#ifdef AARCH64 - T rv; - int tmp; - __asm__ volatile( - "1:\n\t" - " ldaxr %[rv], [%[dest]]\n\t" - " cmp %[rv], %[cv]\n\t" - " b.ne 2f\n\t" - " stlxr %w[tmp], %[ev], [%[dest]]\n\t" - " cbnz %w[tmp], 1b\n\t" - " b 3f\n\t" - "2:\n\t" - " dmb sy\n\t" - "3:\n\t" - : [rv] "=&r" (rv), [tmp] "=&r" (tmp) - : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value) - : "memory"); - return rv; -#else return cmpxchg_using_helper(reorder_cmpxchg_long_func, exchange_value, dest, compare_value); -#endif } #endif // OS_CPU_LINUX_ARM_VM_ATOMIC_LINUX_ARM_HPP --- old/src/hotspot/os_cpu/linux_arm/copy_linux_arm.inline.hpp 2018-09-17 10:30:52.166658573 -0400 +++ new/src/hotspot/os_cpu/linux_arm/copy_linux_arm.inline.hpp 2018-09-17 10:30:51.532621540 -0400 @@ -58,37 +58,18 @@ } static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { -#ifdef AARCH64 - _Copy_conjoint_jints_atomic(from, to, count * BytesPerInt); -#else assert(HeapWordSize == BytesPerInt, "heapwords and jints must be the same size"); // pd_conjoint_words is word-atomic in this implementation. pd_conjoint_words((const HeapWord*)from, (HeapWord*)to, count); -#endif } static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { -#ifdef AARCH64 - assert(HeapWordSize == BytesPerLong, "64-bit architecture"); - pd_conjoint_words((const HeapWord*)from, (HeapWord*)to, count); -#else _Copy_conjoint_jlongs_atomic(from, to, count * BytesPerLong); -#endif } static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { -#ifdef AARCH64 - if (UseCompressedOops) { - assert(BytesPerHeapOop == BytesPerInt, "compressed oops"); - pd_conjoint_jints_atomic((const jint*)from, (jint*)to, count); - } else { - assert(BytesPerHeapOop == BytesPerLong, "64-bit architecture"); - pd_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); - } -#else assert(BytesPerHeapOop == BytesPerInt, "32-bit architecture"); pd_conjoint_jints_atomic((const jint*)from, (jint*)to, count); -#endif } static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { --- old/src/hotspot/os_cpu/linux_arm/globals_linux_arm.hpp 2018-09-17 10:30:53.739750453 -0400 +++ new/src/hotspot/os_cpu/linux_arm/globals_linux_arm.hpp 2018-09-17 10:30:53.104713362 -0400 @@ -30,16 +30,10 @@ // (see globals.hpp) // define_pd_global(bool, DontYieldALot, false); -#ifdef AARCH64 -define_pd_global(intx, CompilerThreadStackSize, 1024); -define_pd_global(intx, ThreadStackSize, 1024); -define_pd_global(intx, VMThreadStackSize, 1024); -#else define_pd_global(intx, CompilerThreadStackSize, 512); // System default ThreadStackSize appears to be 512 which is too big. define_pd_global(intx, ThreadStackSize, 320); define_pd_global(intx, VMThreadStackSize, 512); -#endif // AARCH64 define_pd_global(size_t, JVMInvokeMethodSlack, 8192); --- old/src/hotspot/os_cpu/linux_arm/linux_arm_32.s 2018-09-17 10:30:55.295841341 -0400 +++ new/src/hotspot/os_cpu/linux_arm/linux_arm_32.s 2018-09-17 10:30:54.662804367 -0400 @@ -1,4 +1,4 @@ -# +# # Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # @@ -19,15 +19,15 @@ # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA # or visit www.oracle.com if you need additional information or have any # questions. -# +# + - # NOTE WELL! The _Copy functions are called directly # from server-compiler-generated code via CallLeafNoFP, # which means that they *must* either not use floating # point or use it in the same manner as does the server # compiler. - + .globl _Copy_conjoint_bytes .type _Copy_conjoint_bytes, %function .globl _Copy_arrayof_conjoint_bytes --- old/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.hpp 2018-09-17 10:30:56.847931995 -0400 +++ new/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.hpp 2018-09-17 10:30:56.217895196 -0400 @@ -32,8 +32,7 @@ // Implementation of class OrderAccess. // - we define the high level barriers below and use the general -// implementation in orderAccess.hpp, with customizations -// on AARCH64 via the specialized_* template functions +// implementation in orderAccess.hpp. // Memory Ordering on ARM is weak. // @@ -61,9 +60,6 @@ if (!os::is_MP()) { return; } -#ifdef AARCH64 - __asm__ __volatile__ ("dmb sy" : : : "memory"); -#else if (VM_Version::arm_arch() >= 7) { #ifdef __thumb__ __asm__ volatile ( @@ -78,16 +74,12 @@ "mcr p15, 0, %0, c7, c10, 5" : : "r" (zero) : "memory"); } -#endif } inline static void dmb_st() { if (!os::is_MP()) { return; } -#ifdef AARCH64 - __asm__ __volatile__ ("dmb st" : : : "memory"); -#else if (VM_Version::arm_arch() >= 7) { #ifdef __thumb__ __asm__ volatile ( @@ -102,19 +94,11 @@ "mcr p15, 0, %0, c7, c10, 5" : : "r" (zero) : "memory"); } -#endif } // Load-Load/Store barrier inline static void dmb_ld() { -#ifdef AARCH64 - if (!os::is_MP()) { - return; - } - __asm__ __volatile__ ("dmb ld" : : : "memory"); -#else dmb_sy(); -#endif } @@ -126,123 +110,4 @@ inline void OrderAccess::release() { dmb_sy(); } inline void OrderAccess::fence() { dmb_sy(); } -// specializations for Aarch64 -// TODO-AARCH64: evaluate effectiveness of ldar*/stlr* implementations compared to 32-bit ARM approach - -#ifdef AARCH64 - -template<> -struct OrderAccess::PlatformOrderedLoad<1, X_ACQUIRE> -{ - template - T operator()(const volatile T* p) const { - volatile T result; - __asm__ volatile( - "ldarb %w[res], [%[ptr]]" - : [res] "=&r" (result) - : [ptr] "r" (p) - : "memory"); - return result; - } -}; - -template<> -struct OrderAccess::PlatformOrderedLoad<2, X_ACQUIRE> -{ - template - T operator()(const volatile T* p) const { - volatile T result; - __asm__ volatile( - "ldarh %w[res], [%[ptr]]" - : [res] "=&r" (result) - : [ptr] "r" (p) - : "memory"); - return result; - } -}; - -template<> -struct OrderAccess::PlatformOrderedLoad<4, X_ACQUIRE> -{ - template - T operator()(const volatile T* p) const { - volatile T result; - __asm__ volatile( - "ldar %w[res], [%[ptr]]" - : [res] "=&r" (result) - : [ptr] "r" (p) - : "memory"); - return result; - } -}; - -template<> -struct OrderAccess::PlatformOrderedLoad<8, X_ACQUIRE> -{ - template - T operator()(const volatile T* p) const { - volatile T result; - __asm__ volatile( - "ldar %[res], [%[ptr]]" - : [res] "=&r" (result) - : [ptr] "r" (p) - : "memory"); - return result; - } -}; - -template<> -struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE> -{ - template - void operator()(T v, volatile T* p) const { - __asm__ volatile( - "stlrb %w[val], [%[ptr]]" - : - : [ptr] "r" (p), [val] "r" (v) - : "memory"); - } -}; - -template<> -struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE> -{ - template - void operator()(T v, volatile T* p) const { - __asm__ volatile( - "stlrh %w[val], [%[ptr]]" - : - : [ptr] "r" (p), [val] "r" (v) - : "memory"); - } -}; - -template<> -struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE> -{ - template - void operator()(T v, volatile T* p) const { - __asm__ volatile( - "stlr %w[val], [%[ptr]]" - : - : [ptr] "r" (p), [val] "r" (v) - : "memory"); - } -}; - -template<> -struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE> -{ - template - void operator()(T v, volatile T* p) const { - __asm__ volatile( - "stlr %[val], [%[ptr]]" - : - : [ptr] "r" (p), [val] "r" (v) - : "memory"); - } -}; - -#endif // AARCH64 - #endif // OS_CPU_LINUX_ARM_VM_ORDERACCESS_LINUX_ARM_HPP --- old/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp 2018-09-17 10:30:58.408023116 -0400 +++ new/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp 2018-09-17 10:30:57.775986200 -0400 @@ -78,7 +78,7 @@ // Don't #define SPELL_REG_FP for thumb because it is not safe to use, so this makes sure we never fetch it. #ifndef __thumb__ -#define SPELL_REG_FP AARCH64_ONLY("x29") NOT_AARCH64("fp") +#define SPELL_REG_FP "fp" #endif address os::current_stack_pointer() { @@ -95,15 +95,6 @@ // Nothing to do } -#ifdef AARCH64 - -#define arm_pc pc -#define arm_sp sp -#define arm_fp regs[29] -#define arm_r0 regs[0] -#define ARM_REGS_IN_CONTEXT 31 - -#else #if NGREG == 16 // These definitions are based on the observation that until @@ -119,7 +110,6 @@ #define ARM_REGS_IN_CONTEXT 16 -#endif // AARCH64 address os::Linux::ucontext_get_pc(const ucontext_t* uc) { return (address)uc->uc_mcontext.arm_pc; @@ -260,13 +250,11 @@ #endif } -#ifndef AARCH64 extern "C" address check_vfp_fault_instr; extern "C" address check_vfp3_32_fault_instr; address check_vfp_fault_instr = NULL; address check_vfp3_32_fault_instr = NULL; -#endif // !AARCH64 extern "C" address check_simd_fault_instr; address check_simd_fault_instr = NULL; @@ -286,8 +274,8 @@ if (sig == SIGILL && ((info->si_addr == (caddr_t)check_simd_fault_instr) - NOT_AARCH64(|| info->si_addr == (caddr_t)check_vfp_fault_instr) - NOT_AARCH64(|| info->si_addr == (caddr_t)check_vfp3_32_fault_instr))) { + || info->si_addr == (caddr_t)check_vfp_fault_instr + || info->si_addr == (caddr_t)check_vfp3_32_fault_instr)) { // skip faulty instruction + instruction that sets return value to // success and set return value to failure. os::Linux::ucontext_set_pc(uc, (address)info->si_addr + 8); @@ -512,9 +500,6 @@ } void os::setup_fpu() { -#ifdef AARCH64 - __asm__ volatile ("msr fpcr, xzr"); -#else #if !defined(__SOFTFP__) && defined(__VFP_FP__) // Turn on IEEE-754 compliant VFP mode __asm__ volatile ( @@ -523,7 +508,6 @@ : /* no output */ : /* no input */ : "r0" ); #endif -#endif // AARCH64 } bool os::is_allocatable(size_t bytes) { @@ -559,14 +543,8 @@ st->print_cr(" %-3s = " INTPTR_FORMAT, as_Register(r)->name(), reg_area[r]); } #define U64_FORMAT "0x%016llx" -#ifdef AARCH64 - st->print_cr(" %-3s = " U64_FORMAT, "sp", uc->uc_mcontext.sp); - st->print_cr(" %-3s = " U64_FORMAT, "pc", uc->uc_mcontext.pc); - st->print_cr(" %-3s = " U64_FORMAT, "pstate", uc->uc_mcontext.pstate); -#else // now print flag register st->print_cr(" %-4s = 0x%08lx", "cpsr",uc->uc_mcontext.arm_cpsr); -#endif st->cr(); intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); @@ -595,16 +573,10 @@ print_location(st, reg_area[r]); st->cr(); } -#ifdef AARCH64 - st->print_cr(" %-3s = " U64_FORMAT, "pc", uc->uc_mcontext.pc); - print_location(st, uc->uc_mcontext.pc); - st->cr(); -#endif st->cr(); } -#ifndef AARCH64 typedef int64_t cmpxchg_long_func_t(int64_t, int64_t, volatile int64_t*); @@ -714,7 +686,6 @@ return old_value; } -#endif // !AARCH64 #ifndef PRODUCT void os::verify_stack_alignment() { --- old/src/hotspot/os_cpu/linux_arm/os_linux_arm.hpp 2018-09-17 10:30:59.980114938 -0400 +++ new/src/hotspot/os_cpu/linux_arm/os_linux_arm.hpp 2018-09-17 10:30:59.335077263 -0400 @@ -28,11 +28,7 @@ #ifndef __thumb__ enum { // Offset to add to frame::_fp when dealing with non-thumb C frames -#ifdef AARCH64 - C_frame_offset = 0, -#else C_frame_offset = -1, -#endif }; #endif @@ -44,7 +40,6 @@ // Note: Currently only used in 64 bit Windows implementations static bool register_code_area(char *low, char *high) { return true; } -#ifndef AARCH64 static int64_t (*atomic_cmpxchg_long_func)(int64_t compare_value, int64_t exchange_value, volatile int64_t *dest); @@ -74,6 +69,5 @@ static int32_t atomic_cmpxchg_bootstrap(int32_t compare_value, int32_t exchange_value, volatile int32_t *dest); -#endif // !AARCH64 #endif // OS_CPU_LINUX_ARM_VM_OS_LINUX_ARM_HPP --- old/src/hotspot/os_cpu/linux_arm/prefetch_linux_arm.inline.hpp 2018-09-17 10:31:01.625211024 -0400 +++ new/src/hotspot/os_cpu/linux_arm/prefetch_linux_arm.inline.hpp 2018-09-17 10:31:00.972172881 -0400 @@ -28,21 +28,13 @@ #include "runtime/prefetch.hpp" inline void Prefetch::read (void *loc, intx interval) { -#ifdef AARCH64 - __asm__ volatile ("prfm PLDL1KEEP, [%0]" : : "r" (loc)); -#else #if defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_5TE__) __asm__ volatile ("pld [%0]" : : "r" (loc)); #endif -#endif // AARCH64 } inline void Prefetch::write(void *loc, intx interval) { -#ifdef AARCH64 - __asm__ volatile ("prfm PSTL1KEEP, [%0]" : : "r" (loc)); -#else // Not available on 32-bit ARM (prior to ARMv7 with MP extensions) -#endif // AARCH64 } #endif // OS_CPU_LINUX_ARM_VM_PREFETCH_LINUX_ARM_INLINE_HPP --- old/src/hotspot/os_cpu/linux_arm/thread_linux_arm.cpp 2018-09-17 10:31:03.187302262 -0400 +++ new/src/hotspot/os_cpu/linux_arm/thread_linux_arm.cpp 2018-09-17 10:31:02.552265171 -0400 @@ -32,17 +32,12 @@ frame JavaThread::pd_last_frame() { assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); -#ifdef AARCH64 - assert (_anchor.last_Java_pc() != NULL, "pc should be stored"); - return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); -#else if (_anchor.last_Java_pc() != NULL) { return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); } else { // This will pick up pc from sp return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); } -#endif // AARCH64 } void JavaThread::cache_global_variables() { @@ -84,7 +79,7 @@ // If we have a last_Java_frame, then we should use it even if // isInJava == true. It should be more reliable than ucontext info. - if (jt->has_last_Java_frame() AARCH64_ONLY(&& jt->last_Java_pc() != NULL)) { + if (jt->has_last_Java_frame()) { *fr_addr = jt->pd_last_frame(); return true; } --- old/src/hotspot/share/utilities/macros.hpp 2018-09-17 10:31:04.761394201 -0400 +++ new/src/hotspot/share/utilities/macros.hpp 2018-09-17 10:31:04.124356993 -0400 @@ -541,10 +541,9 @@ #define NOT_E500V2(code) code #endif -// Note: There are three ARM ports. They set the following in the makefiles: -// 1. Closed 32-bit port: -DARM -DARM32 -DTARGET_ARCH_arm -// 2. Closed 64-bit port: -DARM -DAARCH64 -D_LP64 -DTARGET_ARCH_arm -// 3. Open 64-bit port: -DAARCH64 -D_LP64 -DTARGET_ARCH_aaarch64 +// Note: There are two ARM ports. They set the following in the makefiles: +// 1. 32-bit port: -DARM -DARM32 -DTARGET_ARCH_arm +// 2. 64-bit port: -DAARCH64 -D_LP64 -DTARGET_ARCH_aaarch64 #ifdef ARM #define ARM_ONLY(code) code #define NOT_ARM(code) --- old/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java 2018-09-17 10:31:06.366487951 -0400 +++ new/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java 2018-09-17 10:31:05.724450451 -0400 @@ -72,17 +72,6 @@ public static String getCPU() throws UnsupportedPlatformException { String cpu = System.getProperty("os.arch"); - // Let any additional CPU mangling fire first - try { - Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed"); - AltPlatformInfo api = (AltPlatformInfo) pic.newInstance(); - if (api.knownCPU(cpu)) { - return api.getCPU(cpu); - } - } catch (Exception e) { - // Ignored - } - // Check that CPU is supported if (!knownCPU(cpu)) { throw new UnsupportedPlatformException("CPU type " + cpu + " not yet supported"); --- old/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java 2018-09-17 10:31:07.939579831 -0400 +++ new/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java 2018-09-17 10:31:07.283541514 -0400 @@ -232,22 +232,6 @@ } } - private static boolean isAlwaysSupportedPlatform() { - // Note: To date Aarch64 is the only platform that we don't statically - // know if it supports the reserved stack area. This is because the - // open Aarch64 port supports it and the Oracle arm64 port does not. - return Platform.isAix() || - (Platform.isLinux() && - (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || - Platform.isX86())) || - Platform.isOSX() || - Platform.isSolaris(); - } - - private static boolean isNeverSupportedPlatform() { - return !isAlwaysSupportedPlatform() && !Platform.isAArch64(); - } - private static boolean isSupportedPlatform; private static void initIsSupportedPlatform() throws Exception { @@ -272,19 +256,11 @@ // Do a sanity check. Some platforms we know are always supported. Make sure // we didn't determine that one of those platforms is not supported. - if (!isSupportedPlatform && isAlwaysSupportedPlatform()) { + if (!isSupportedPlatform) { String msg = "This platform should be supported: " + Platform.getOsArch(); System.err.println("FAILED: " + msg); throw new RuntimeException(msg); } - - // And some platforms we know are never supported. Make sure - // we didn't determine that one of those platforms is supported. - if (isSupportedPlatform && isNeverSupportedPlatform()) { - String msg = "This platform should not be supported: " + Platform.getOsArch(); - System.err.println("FAILED: " + msg); - throw new RuntimeException(msg); - } } public static void main(String[] args) throws Exception { --- old/src/hotspot/cpu/arm/arm_64.ad 2018-09-17 10:31:09.765686490 -0400 +++ /dev/null 2018-04-28 00:25:57.886812021 -0400 @@ -1,998 +0,0 @@ -// -// Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// - -// ARM Architecture Description File - -//----------REGISTER DEFINITION BLOCK------------------------------------------ -// This information is used by the matcher and the register allocator to -// describe individual registers and classes of registers within the target -// archtecture. -register %{ -//----------Architecture Description Register Definitions---------------------- -// General Registers -// "reg_def" name ( register save type, C convention save type, -// ideal register type, encoding, vm name ); -// Register Save Types: -// -// NS = No-Save: The register allocator assumes that these registers -// can be used without saving upon entry to the method, & -// that they do not need to be saved at call sites. -// -// SOC = Save-On-Call: The register allocator assumes that these registers -// can be used without saving upon entry to the method, -// but that they must be saved at call sites. -// -// SOE = Save-On-Entry: The register allocator assumes that these registers -// must be saved before using them upon entry to the -// method, but they do not need to be saved at call -// sites. -// -// AS = Always-Save: The register allocator assumes that these registers -// must be saved before using them upon entry to the -// method, & that they must be saved at call sites. -// -// Ideal Register Type is used to determine how to save & restore a -// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get -// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. -// FIXME: above comment seems wrong. Spill done through MachSpillCopyNode -// -// The encoding number is the actual bit-pattern placed into the opcodes. - - -// ---------------------------- -// Integer/Long Registers -// ---------------------------- - -// TODO: would be nice to keep track of high-word state: -// zeroRegI --> RegL -// signedRegI --> RegL -// junkRegI --> RegL -// how to tell C2 to treak RegI as RegL, or RegL as RegI? -reg_def R_R0 (SOC, SOC, Op_RegI, 0, R0->as_VMReg()); -reg_def R_R0x (SOC, SOC, Op_RegI, 255, R0->as_VMReg()->next()); -reg_def R_R1 (SOC, SOC, Op_RegI, 1, R1->as_VMReg()); -reg_def R_R1x (SOC, SOC, Op_RegI, 255, R1->as_VMReg()->next()); -reg_def R_R2 (SOC, SOC, Op_RegI, 2, R2->as_VMReg()); -reg_def R_R2x (SOC, SOC, Op_RegI, 255, R2->as_VMReg()->next()); -reg_def R_R3 (SOC, SOC, Op_RegI, 3, R3->as_VMReg()); -reg_def R_R3x (SOC, SOC, Op_RegI, 255, R3->as_VMReg()->next()); -reg_def R_R4 (SOC, SOC, Op_RegI, 4, R4->as_VMReg()); -reg_def R_R4x (SOC, SOC, Op_RegI, 255, R4->as_VMReg()->next()); -reg_def R_R5 (SOC, SOC, Op_RegI, 5, R5->as_VMReg()); -reg_def R_R5x (SOC, SOC, Op_RegI, 255, R5->as_VMReg()->next()); -reg_def R_R6 (SOC, SOC, Op_RegI, 6, R6->as_VMReg()); -reg_def R_R6x (SOC, SOC, Op_RegI, 255, R6->as_VMReg()->next()); -reg_def R_R7 (SOC, SOC, Op_RegI, 7, R7->as_VMReg()); -reg_def R_R7x (SOC, SOC, Op_RegI, 255, R7->as_VMReg()->next()); - -reg_def R_R8 (SOC, SOC, Op_RegI, 8, R8->as_VMReg()); -reg_def R_R8x (SOC, SOC, Op_RegI, 255, R8->as_VMReg()->next()); -reg_def R_R9 (SOC, SOC, Op_RegI, 9, R9->as_VMReg()); -reg_def R_R9x (SOC, SOC, Op_RegI, 255, R9->as_VMReg()->next()); -reg_def R_R10 (SOC, SOC, Op_RegI, 10, R10->as_VMReg()); -reg_def R_R10x(SOC, SOC, Op_RegI, 255, R10->as_VMReg()->next()); -reg_def R_R11 (SOC, SOC, Op_RegI, 11, R11->as_VMReg()); -reg_def R_R11x(SOC, SOC, Op_RegI, 255, R11->as_VMReg()->next()); -reg_def R_R12 (SOC, SOC, Op_RegI, 12, R12->as_VMReg()); -reg_def R_R12x(SOC, SOC, Op_RegI, 255, R12->as_VMReg()->next()); -reg_def R_R13 (SOC, SOC, Op_RegI, 13, R13->as_VMReg()); -reg_def R_R13x(SOC, SOC, Op_RegI, 255, R13->as_VMReg()->next()); -reg_def R_R14 (SOC, SOC, Op_RegI, 14, R14->as_VMReg()); -reg_def R_R14x(SOC, SOC, Op_RegI, 255, R14->as_VMReg()->next()); -reg_def R_R15 (SOC, SOC, Op_RegI, 15, R15->as_VMReg()); -reg_def R_R15x(SOC, SOC, Op_RegI, 255, R15->as_VMReg()->next()); - -reg_def R_R16 (SOC, SOC, Op_RegI, 16, R16->as_VMReg()); // IP0 -reg_def R_R16x(SOC, SOC, Op_RegI, 255, R16->as_VMReg()->next()); -reg_def R_R17 (SOC, SOC, Op_RegI, 17, R17->as_VMReg()); // IP1 -reg_def R_R17x(SOC, SOC, Op_RegI, 255, R17->as_VMReg()->next()); -reg_def R_R18 (SOC, SOC, Op_RegI, 18, R18->as_VMReg()); // Platform Register -reg_def R_R18x(SOC, SOC, Op_RegI, 255, R18->as_VMReg()->next()); - -reg_def R_R19 (SOC, SOE, Op_RegI, 19, R19->as_VMReg()); -reg_def R_R19x(SOC, SOE, Op_RegI, 255, R19->as_VMReg()->next()); -reg_def R_R20 (SOC, SOE, Op_RegI, 20, R20->as_VMReg()); -reg_def R_R20x(SOC, SOE, Op_RegI, 255, R20->as_VMReg()->next()); -reg_def R_R21 (SOC, SOE, Op_RegI, 21, R21->as_VMReg()); -reg_def R_R21x(SOC, SOE, Op_RegI, 255, R21->as_VMReg()->next()); -reg_def R_R22 (SOC, SOE, Op_RegI, 22, R22->as_VMReg()); -reg_def R_R22x(SOC, SOE, Op_RegI, 255, R22->as_VMReg()->next()); -reg_def R_R23 (SOC, SOE, Op_RegI, 23, R23->as_VMReg()); -reg_def R_R23x(SOC, SOE, Op_RegI, 255, R23->as_VMReg()->next()); -reg_def R_R24 (SOC, SOE, Op_RegI, 24, R24->as_VMReg()); -reg_def R_R24x(SOC, SOE, Op_RegI, 255, R24->as_VMReg()->next()); -reg_def R_R25 (SOC, SOE, Op_RegI, 25, R25->as_VMReg()); -reg_def R_R25x(SOC, SOE, Op_RegI, 255, R25->as_VMReg()->next()); -reg_def R_R26 (SOC, SOE, Op_RegI, 26, R26->as_VMReg()); -reg_def R_R26x(SOC, SOE, Op_RegI, 255, R26->as_VMReg()->next()); -reg_def R_R27 (SOC, SOE, Op_RegI, 27, R27->as_VMReg()); // Rheap_base -reg_def R_R27x(SOC, SOE, Op_RegI, 255, R27->as_VMReg()->next()); // Rheap_base -reg_def R_R28 ( NS, SOE, Op_RegI, 28, R28->as_VMReg()); // TLS -reg_def R_R28x( NS, SOE, Op_RegI, 255, R28->as_VMReg()->next()); // TLS - -reg_def R_R29 ( NS, SOE, Op_RegI, 29, R29->as_VMReg()); // FP -reg_def R_R29x( NS, SOE, Op_RegI, 255, R29->as_VMReg()->next()); // FP -reg_def R_R30 (SOC, SOC, Op_RegI, 30, R30->as_VMReg()); // LR -reg_def R_R30x(SOC, SOC, Op_RegI, 255, R30->as_VMReg()->next()); // LR - -reg_def R_ZR ( NS, NS, Op_RegI, 31, ZR->as_VMReg()); // ZR -reg_def R_ZRx( NS, NS, Op_RegI, 255, ZR->as_VMReg()->next()); // ZR - -// FIXME -//reg_def R_SP ( NS, NS, Op_RegP, 32, SP->as_VMReg()); -reg_def R_SP ( NS, NS, Op_RegI, 32, SP->as_VMReg()); -//reg_def R_SPx( NS, NS, Op_RegP, 255, SP->as_VMReg()->next()); -reg_def R_SPx( NS, NS, Op_RegI, 255, SP->as_VMReg()->next()); - -// ---------------------------- -// Float/Double/Vector Registers -// ---------------------------- - -reg_def R_V0(SOC, SOC, Op_RegF, 0, V0->as_VMReg()); -reg_def R_V1(SOC, SOC, Op_RegF, 1, V1->as_VMReg()); -reg_def R_V2(SOC, SOC, Op_RegF, 2, V2->as_VMReg()); -reg_def R_V3(SOC, SOC, Op_RegF, 3, V3->as_VMReg()); -reg_def R_V4(SOC, SOC, Op_RegF, 4, V4->as_VMReg()); -reg_def R_V5(SOC, SOC, Op_RegF, 5, V5->as_VMReg()); -reg_def R_V6(SOC, SOC, Op_RegF, 6, V6->as_VMReg()); -reg_def R_V7(SOC, SOC, Op_RegF, 7, V7->as_VMReg()); -reg_def R_V8(SOC, SOC, Op_RegF, 8, V8->as_VMReg()); -reg_def R_V9(SOC, SOC, Op_RegF, 9, V9->as_VMReg()); -reg_def R_V10(SOC, SOC, Op_RegF, 10, V10->as_VMReg()); -reg_def R_V11(SOC, SOC, Op_RegF, 11, V11->as_VMReg()); -reg_def R_V12(SOC, SOC, Op_RegF, 12, V12->as_VMReg()); -reg_def R_V13(SOC, SOC, Op_RegF, 13, V13->as_VMReg()); -reg_def R_V14(SOC, SOC, Op_RegF, 14, V14->as_VMReg()); -reg_def R_V15(SOC, SOC, Op_RegF, 15, V15->as_VMReg()); -reg_def R_V16(SOC, SOC, Op_RegF, 16, V16->as_VMReg()); -reg_def R_V17(SOC, SOC, Op_RegF, 17, V17->as_VMReg()); -reg_def R_V18(SOC, SOC, Op_RegF, 18, V18->as_VMReg()); -reg_def R_V19(SOC, SOC, Op_RegF, 19, V19->as_VMReg()); -reg_def R_V20(SOC, SOC, Op_RegF, 20, V20->as_VMReg()); -reg_def R_V21(SOC, SOC, Op_RegF, 21, V21->as_VMReg()); -reg_def R_V22(SOC, SOC, Op_RegF, 22, V22->as_VMReg()); -reg_def R_V23(SOC, SOC, Op_RegF, 23, V23->as_VMReg()); -reg_def R_V24(SOC, SOC, Op_RegF, 24, V24->as_VMReg()); -reg_def R_V25(SOC, SOC, Op_RegF, 25, V25->as_VMReg()); -reg_def R_V26(SOC, SOC, Op_RegF, 26, V26->as_VMReg()); -reg_def R_V27(SOC, SOC, Op_RegF, 27, V27->as_VMReg()); -reg_def R_V28(SOC, SOC, Op_RegF, 28, V28->as_VMReg()); -reg_def R_V29(SOC, SOC, Op_RegF, 29, V29->as_VMReg()); -reg_def R_V30(SOC, SOC, Op_RegF, 30, V30->as_VMReg()); -reg_def R_V31(SOC, SOC, Op_RegF, 31, V31->as_VMReg()); - -reg_def R_V0b(SOC, SOC, Op_RegF, 255, V0->as_VMReg()->next(1)); -reg_def R_V1b(SOC, SOC, Op_RegF, 255, V1->as_VMReg()->next(1)); -reg_def R_V2b(SOC, SOC, Op_RegF, 255, V2->as_VMReg()->next(1)); -reg_def R_V3b(SOC, SOC, Op_RegF, 3, V3->as_VMReg()->next(1)); -reg_def R_V4b(SOC, SOC, Op_RegF, 4, V4->as_VMReg()->next(1)); -reg_def R_V5b(SOC, SOC, Op_RegF, 5, V5->as_VMReg()->next(1)); -reg_def R_V6b(SOC, SOC, Op_RegF, 6, V6->as_VMReg()->next(1)); -reg_def R_V7b(SOC, SOC, Op_RegF, 7, V7->as_VMReg()->next(1)); -reg_def R_V8b(SOC, SOC, Op_RegF, 255, V8->as_VMReg()->next(1)); -reg_def R_V9b(SOC, SOC, Op_RegF, 9, V9->as_VMReg()->next(1)); -reg_def R_V10b(SOC, SOC, Op_RegF, 10, V10->as_VMReg()->next(1)); -reg_def R_V11b(SOC, SOC, Op_RegF, 11, V11->as_VMReg()->next(1)); -reg_def R_V12b(SOC, SOC, Op_RegF, 12, V12->as_VMReg()->next(1)); -reg_def R_V13b(SOC, SOC, Op_RegF, 13, V13->as_VMReg()->next(1)); -reg_def R_V14b(SOC, SOC, Op_RegF, 14, V14->as_VMReg()->next(1)); -reg_def R_V15b(SOC, SOC, Op_RegF, 15, V15->as_VMReg()->next(1)); -reg_def R_V16b(SOC, SOC, Op_RegF, 16, V16->as_VMReg()->next(1)); -reg_def R_V17b(SOC, SOC, Op_RegF, 17, V17->as_VMReg()->next(1)); -reg_def R_V18b(SOC, SOC, Op_RegF, 18, V18->as_VMReg()->next(1)); -reg_def R_V19b(SOC, SOC, Op_RegF, 19, V19->as_VMReg()->next(1)); -reg_def R_V20b(SOC, SOC, Op_RegF, 20, V20->as_VMReg()->next(1)); -reg_def R_V21b(SOC, SOC, Op_RegF, 21, V21->as_VMReg()->next(1)); -reg_def R_V22b(SOC, SOC, Op_RegF, 22, V22->as_VMReg()->next(1)); -reg_def R_V23b(SOC, SOC, Op_RegF, 23, V23->as_VMReg()->next(1)); -reg_def R_V24b(SOC, SOC, Op_RegF, 24, V24->as_VMReg()->next(1)); -reg_def R_V25b(SOC, SOC, Op_RegF, 25, V25->as_VMReg()->next(1)); -reg_def R_V26b(SOC, SOC, Op_RegF, 26, V26->as_VMReg()->next(1)); -reg_def R_V27b(SOC, SOC, Op_RegF, 27, V27->as_VMReg()->next(1)); -reg_def R_V28b(SOC, SOC, Op_RegF, 28, V28->as_VMReg()->next(1)); -reg_def R_V29b(SOC, SOC, Op_RegF, 29, V29->as_VMReg()->next(1)); -reg_def R_V30b(SOC, SOC, Op_RegD, 30, V30->as_VMReg()->next(1)); -reg_def R_V31b(SOC, SOC, Op_RegF, 31, V31->as_VMReg()->next(1)); - -reg_def R_V0c(SOC, SOC, Op_RegF, 0, V0->as_VMReg()->next(2)); -reg_def R_V1c(SOC, SOC, Op_RegF, 1, V1->as_VMReg()->next(2)); -reg_def R_V2c(SOC, SOC, Op_RegF, 2, V2->as_VMReg()->next(2)); -reg_def R_V3c(SOC, SOC, Op_RegF, 3, V3->as_VMReg()->next(2)); -reg_def R_V4c(SOC, SOC, Op_RegF, 4, V4->as_VMReg()->next(2)); -reg_def R_V5c(SOC, SOC, Op_RegF, 5, V5->as_VMReg()->next(2)); -reg_def R_V6c(SOC, SOC, Op_RegF, 6, V6->as_VMReg()->next(2)); -reg_def R_V7c(SOC, SOC, Op_RegF, 7, V7->as_VMReg()->next(2)); -reg_def R_V8c(SOC, SOC, Op_RegF, 8, V8->as_VMReg()->next(2)); -reg_def R_V9c(SOC, SOC, Op_RegF, 9, V9->as_VMReg()->next(2)); -reg_def R_V10c(SOC, SOC, Op_RegF, 10, V10->as_VMReg()->next(2)); -reg_def R_V11c(SOC, SOC, Op_RegF, 11, V11->as_VMReg()->next(2)); -reg_def R_V12c(SOC, SOC, Op_RegF, 12, V12->as_VMReg()->next(2)); -reg_def R_V13c(SOC, SOC, Op_RegF, 13, V13->as_VMReg()->next(2)); -reg_def R_V14c(SOC, SOC, Op_RegF, 14, V14->as_VMReg()->next(2)); -reg_def R_V15c(SOC, SOC, Op_RegF, 15, V15->as_VMReg()->next(2)); -reg_def R_V16c(SOC, SOC, Op_RegF, 16, V16->as_VMReg()->next(2)); -reg_def R_V17c(SOC, SOC, Op_RegF, 17, V17->as_VMReg()->next(2)); -reg_def R_V18c(SOC, SOC, Op_RegF, 18, V18->as_VMReg()->next(2)); -reg_def R_V19c(SOC, SOC, Op_RegF, 19, V19->as_VMReg()->next(2)); -reg_def R_V20c(SOC, SOC, Op_RegF, 20, V20->as_VMReg()->next(2)); -reg_def R_V21c(SOC, SOC, Op_RegF, 21, V21->as_VMReg()->next(2)); -reg_def R_V22c(SOC, SOC, Op_RegF, 22, V22->as_VMReg()->next(2)); -reg_def R_V23c(SOC, SOC, Op_RegF, 23, V23->as_VMReg()->next(2)); -reg_def R_V24c(SOC, SOC, Op_RegF, 24, V24->as_VMReg()->next(2)); -reg_def R_V25c(SOC, SOC, Op_RegF, 25, V25->as_VMReg()->next(2)); -reg_def R_V26c(SOC, SOC, Op_RegF, 26, V26->as_VMReg()->next(2)); -reg_def R_V27c(SOC, SOC, Op_RegF, 27, V27->as_VMReg()->next(2)); -reg_def R_V28c(SOC, SOC, Op_RegF, 28, V28->as_VMReg()->next(2)); -reg_def R_V29c(SOC, SOC, Op_RegF, 29, V29->as_VMReg()->next(2)); -reg_def R_V30c(SOC, SOC, Op_RegF, 30, V30->as_VMReg()->next(2)); -reg_def R_V31c(SOC, SOC, Op_RegF, 31, V31->as_VMReg()->next(2)); - -reg_def R_V0d(SOC, SOC, Op_RegF, 0, V0->as_VMReg()->next(3)); -reg_def R_V1d(SOC, SOC, Op_RegF, 1, V1->as_VMReg()->next(3)); -reg_def R_V2d(SOC, SOC, Op_RegF, 2, V2->as_VMReg()->next(3)); -reg_def R_V3d(SOC, SOC, Op_RegF, 3, V3->as_VMReg()->next(3)); -reg_def R_V4d(SOC, SOC, Op_RegF, 4, V4->as_VMReg()->next(3)); -reg_def R_V5d(SOC, SOC, Op_RegF, 5, V5->as_VMReg()->next(3)); -reg_def R_V6d(SOC, SOC, Op_RegF, 6, V6->as_VMReg()->next(3)); -reg_def R_V7d(SOC, SOC, Op_RegF, 7, V7->as_VMReg()->next(3)); -reg_def R_V8d(SOC, SOC, Op_RegF, 8, V8->as_VMReg()->next(3)); -reg_def R_V9d(SOC, SOC, Op_RegF, 9, V9->as_VMReg()->next(3)); -reg_def R_V10d(SOC, SOC, Op_RegF, 10, V10->as_VMReg()->next(3)); -reg_def R_V11d(SOC, SOC, Op_RegF, 11, V11->as_VMReg()->next(3)); -reg_def R_V12d(SOC, SOC, Op_RegF, 12, V12->as_VMReg()->next(3)); -reg_def R_V13d(SOC, SOC, Op_RegF, 13, V13->as_VMReg()->next(3)); -reg_def R_V14d(SOC, SOC, Op_RegF, 14, V14->as_VMReg()->next(3)); -reg_def R_V15d(SOC, SOC, Op_RegF, 15, V15->as_VMReg()->next(3)); -reg_def R_V16d(SOC, SOC, Op_RegF, 16, V16->as_VMReg()->next(3)); -reg_def R_V17d(SOC, SOC, Op_RegF, 17, V17->as_VMReg()->next(3)); -reg_def R_V18d(SOC, SOC, Op_RegF, 18, V18->as_VMReg()->next(3)); -reg_def R_V19d(SOC, SOC, Op_RegF, 19, V19->as_VMReg()->next(3)); -reg_def R_V20d(SOC, SOC, Op_RegF, 20, V20->as_VMReg()->next(3)); -reg_def R_V21d(SOC, SOC, Op_RegF, 21, V21->as_VMReg()->next(3)); -reg_def R_V22d(SOC, SOC, Op_RegF, 22, V22->as_VMReg()->next(3)); -reg_def R_V23d(SOC, SOC, Op_RegF, 23, V23->as_VMReg()->next(3)); -reg_def R_V24d(SOC, SOC, Op_RegF, 24, V24->as_VMReg()->next(3)); -reg_def R_V25d(SOC, SOC, Op_RegF, 25, V25->as_VMReg()->next(3)); -reg_def R_V26d(SOC, SOC, Op_RegF, 26, V26->as_VMReg()->next(3)); -reg_def R_V27d(SOC, SOC, Op_RegF, 27, V27->as_VMReg()->next(3)); -reg_def R_V28d(SOC, SOC, Op_RegF, 28, V28->as_VMReg()->next(3)); -reg_def R_V29d(SOC, SOC, Op_RegF, 29, V29->as_VMReg()->next(3)); -reg_def R_V30d(SOC, SOC, Op_RegF, 30, V30->as_VMReg()->next(3)); -reg_def R_V31d(SOC, SOC, Op_RegF, 31, V31->as_VMReg()->next(3)); - -// ---------------------------- -// Special Registers -// Condition Codes Flag Registers -reg_def APSR (SOC, SOC, Op_RegFlags, 255, VMRegImpl::Bad()); -reg_def FPSCR(SOC, SOC, Op_RegFlags, 255, VMRegImpl::Bad()); - -// ---------------------------- -// Specify the enum values for the registers. These enums are only used by the -// OptoReg "class". We can convert these enum values at will to VMReg when needed -// for visibility to the rest of the vm. The order of this enum influences the -// register allocator so having the freedom to set this order and not be stuck -// with the order that is natural for the rest of the vm is worth it. - -// Quad vector must be aligned here, so list them first. -alloc_class fprs( - R_V8, R_V8b, R_V8c, R_V8d, R_V9, R_V9b, R_V9c, R_V9d, - R_V10, R_V10b, R_V10c, R_V10d, R_V11, R_V11b, R_V11c, R_V11d, - R_V12, R_V12b, R_V12c, R_V12d, R_V13, R_V13b, R_V13c, R_V13d, - R_V14, R_V14b, R_V14c, R_V14d, R_V15, R_V15b, R_V15c, R_V15d, - R_V16, R_V16b, R_V16c, R_V16d, R_V17, R_V17b, R_V17c, R_V17d, - R_V18, R_V18b, R_V18c, R_V18d, R_V19, R_V19b, R_V19c, R_V19d, - R_V20, R_V20b, R_V20c, R_V20d, R_V21, R_V21b, R_V21c, R_V21d, - R_V22, R_V22b, R_V22c, R_V22d, R_V23, R_V23b, R_V23c, R_V23d, - R_V24, R_V24b, R_V24c, R_V24d, R_V25, R_V25b, R_V25c, R_V25d, - R_V26, R_V26b, R_V26c, R_V26d, R_V27, R_V27b, R_V27c, R_V27d, - R_V28, R_V28b, R_V28c, R_V28d, R_V29, R_V29b, R_V29c, R_V29d, - R_V30, R_V30b, R_V30c, R_V30d, R_V31, R_V31b, R_V31c, R_V31d, - R_V0, R_V0b, R_V0c, R_V0d, R_V1, R_V1b, R_V1c, R_V1d, - R_V2, R_V2b, R_V2c, R_V2d, R_V3, R_V3b, R_V3c, R_V3d, - R_V4, R_V4b, R_V4c, R_V4d, R_V5, R_V5b, R_V5c, R_V5d, - R_V6, R_V6b, R_V6c, R_V6d, R_V7, R_V7b, R_V7c, R_V7d -); - -// Need double-register alignment here. -// We are already quad-register aligned because of vectors above. -alloc_class gprs( - R_R0, R_R0x, R_R1, R_R1x, R_R2, R_R2x, R_R3, R_R3x, - R_R4, R_R4x, R_R5, R_R5x, R_R6, R_R6x, R_R7, R_R7x, - R_R8, R_R8x, R_R9, R_R9x, R_R10, R_R10x, R_R11, R_R11x, - R_R12, R_R12x, R_R13, R_R13x, R_R14, R_R14x, R_R15, R_R15x, - R_R16, R_R16x, R_R17, R_R17x, R_R18, R_R18x, R_R19, R_R19x, - R_R20, R_R20x, R_R21, R_R21x, R_R22, R_R22x, R_R23, R_R23x, - R_R24, R_R24x, R_R25, R_R25x, R_R26, R_R26x, R_R27, R_R27x, - R_R28, R_R28x, R_R29, R_R29x, R_R30, R_R30x -); -// Continuing with double-reigister alignment... -alloc_class chunk2(APSR, FPSCR); -alloc_class chunk3(R_SP, R_SPx); -alloc_class chunk4(R_ZR, R_ZRx); - -//----------Architecture Description Register Classes-------------------------- -// Several register classes are automatically defined based upon information in -// this architecture description. -// 1) reg_class inline_cache_reg ( as defined in frame section ) -// 2) reg_class interpreter_method_oop_reg ( as defined in frame section ) -// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) -// - -// ---------------------------- -// Integer Register Classes -// ---------------------------- -reg_class int_reg_all(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, - R_R8, R_R9, R_R10, R_R11, R_R12, R_R13, R_R14, R_R15, - R_R16, R_R17, R_R18, R_R19, R_R20, R_R21, R_R22, R_R23, - R_R24, R_R25, R_R26, R_R27, R_R28, R_R29, R_R30 -); - -// Exclusions from i_reg: -// SP (R31) -// Rthread/R28: reserved by HotSpot to the TLS register (invariant within Java) -reg_class int_reg %{ - return _INT_REG_mask; -%} -reg_class ptr_reg %{ - return _PTR_REG_mask; -%} -reg_class vectorx_reg %{ - return _VECTORX_REG_mask; -%} - -reg_class R0_regI(R_R0); -reg_class R1_regI(R_R1); -reg_class R2_regI(R_R2); -reg_class R3_regI(R_R3); -//reg_class R12_regI(R_R12); - -// ---------------------------- -// Pointer Register Classes -// ---------------------------- - -// Special class for storeP instructions, which can store SP or RPC to TLS. -// It is also used for memory addressing, allowing direct TLS addressing. - -reg_class sp_ptr_reg %{ - return _SP_PTR_REG_mask; -%} - -reg_class store_reg %{ - return _STR_REG_mask; -%} - -reg_class store_ptr_reg %{ - return _STR_PTR_REG_mask; -%} - -reg_class spillP_reg %{ - return _SPILLP_REG_mask; -%} - -// Other special pointer regs -reg_class R0_regP(R_R0, R_R0x); -reg_class R1_regP(R_R1, R_R1x); -reg_class R2_regP(R_R2, R_R2x); -reg_class Rexception_regP(R_R19, R_R19x); -reg_class Ricklass_regP(R_R8, R_R8x); -reg_class Rmethod_regP(R_R27, R_R27x); - -reg_class Rthread_regP(R_R28, R_R28x); -reg_class IP_regP(R_R16, R_R16x); -#define RtempRegP IPRegP -reg_class LR_regP(R_R30, R_R30x); - -reg_class SP_regP(R_SP, R_SPx); -reg_class FP_regP(R_R29, R_R29x); - -reg_class ZR_regP(R_ZR, R_ZRx); -reg_class ZR_regI(R_ZR); - -// ---------------------------- -// Long Register Classes -// ---------------------------- -reg_class long_reg %{ return _PTR_REG_mask; %} -// for ldrexd, strexd: first reg of pair must be even -reg_class long_reg_align %{ return LONG_REG_mask(); %} - -reg_class R0_regL(R_R0,R_R0x); // arg 1 or return value - -// ---------------------------- -// Special Class for Condition Code Flags Register -reg_class int_flags(APSR); -reg_class float_flags(FPSCR); - - -// ---------------------------- -// Float Point Register Classes -// ---------------------------- -reg_class sflt_reg_0( - R_V0, R_V1, R_V2, R_V3, R_V4, R_V5, R_V6, R_V7, - R_V8, R_V9, R_V10, R_V11, R_V12, R_V13, R_V14, R_V15, - R_V16, R_V17, R_V18, R_V19, R_V20, R_V21, R_V22, R_V23, - R_V24, R_V25, R_V26, R_V27, R_V28, R_V29, R_V30, R_V31); - -reg_class sflt_reg %{ - return _SFLT_REG_mask; -%} - -reg_class dflt_low_reg %{ - return _DFLT_REG_mask; -%} - -reg_class actual_dflt_reg %{ - return _DFLT_REG_mask; -%} - -reg_class vectorx_reg_0( - R_V0, R_V1, R_V2, R_V3, R_V4, R_V5, R_V6, R_V7, - R_V8, R_V9, R_V10, R_V11, R_V12, R_V13, R_V14, R_V15, - R_V16, R_V17, R_V18, R_V19, R_V20, R_V21, R_V22, R_V23, - R_V24, R_V25, R_V26, R_V27, R_V28, R_V29, R_V30, /*R_V31,*/ - R_V0b, R_V1b, R_V2b, R_V3b, R_V4b, R_V5b, R_V6b, R_V7b, - R_V8b, R_V9b, R_V10b, R_V11b, R_V12b, R_V13b, R_V14b, R_V15b, - R_V16b, R_V17b, R_V18b, R_V19b, R_V20b, R_V21b, R_V22b, R_V23b, - R_V24b, R_V25b, R_V26b, R_V27b, R_V28b, R_V29b, R_V30b, /*R_V31b,*/ - R_V0c, R_V1c, R_V2c, R_V3c, R_V4c, R_V5c, R_V6c, R_V7c, - R_V8c, R_V9c, R_V10c, R_V11c, R_V12c, R_V13c, R_V14c, R_V15c, - R_V16c, R_V17c, R_V18c, R_V19c, R_V20c, R_V21c, R_V22c, R_V23c, - R_V24c, R_V25c, R_V26c, R_V27c, R_V28c, R_V29c, R_V30c, /*R_V31c,*/ - R_V0d, R_V1d, R_V2d, R_V3d, R_V4d, R_V5d, R_V6d, R_V7d, - R_V8d, R_V9d, R_V10d, R_V11d, R_V12d, R_V13d, R_V14d, R_V15d, - R_V16d, R_V17d, R_V18d, R_V19d, R_V20d, R_V21d, R_V22d, R_V23d, - R_V24d, R_V25d, R_V26d, R_V27d, R_V28d, R_V29d, R_V30d, /*R_V31d*/); - -reg_class Rmemcopy_reg %{ - return _RMEMCOPY_REG_mask; -%} - -%} - -source_hpp %{ - -const MachRegisterNumbers R_mem_copy_lo_num = R_V31_num; -const MachRegisterNumbers R_mem_copy_hi_num = R_V31b_num; -const FloatRegister Rmemcopy = V31; - -const MachRegisterNumbers R_hf_ret_lo_num = R_V0_num; -const MachRegisterNumbers R_hf_ret_hi_num = R_V0b_num; -const FloatRegister Rhfret = V0; - -extern OptoReg::Name R_Ricklass_num; -extern OptoReg::Name R_Rmethod_num; -extern OptoReg::Name R_tls_num; -extern OptoReg::Name R_Rheap_base_num; - -extern RegMask _INT_REG_mask; -extern RegMask _PTR_REG_mask; -extern RegMask _SFLT_REG_mask; -extern RegMask _DFLT_REG_mask; -extern RegMask _VECTORX_REG_mask; -extern RegMask _RMEMCOPY_REG_mask; -extern RegMask _SP_PTR_REG_mask; -extern RegMask _SPILLP_REG_mask; -extern RegMask _STR_REG_mask; -extern RegMask _STR_PTR_REG_mask; - -#define LDR_DOUBLE "LDR_D" -#define LDR_FLOAT "LDR_S" -#define STR_DOUBLE "STR_D" -#define STR_FLOAT "STR_S" -#define STR_64 "STR" -#define LDR_64 "LDR" -#define STR_32 "STR_W" -#define LDR_32 "LDR_W" -#define MOV_DOUBLE "FMOV_D" -#define MOV_FLOAT "FMOV_S" -#define FMSR "FMOV_SW" -#define FMRS "FMOV_WS" -#define LDREX "ldxr " -#define STREX "stxr " - -#define str_64 str -#define ldr_64 ldr -#define ldr_32 ldr_w -#define ldrex ldxr -#define strex stxr - -#define fmsr fmov_sw -#define fmrs fmov_ws -#define fconsts fmov_s -#define fconstd fmov_d - -static inline bool is_uimm12(jlong imm, int shift) { - return Assembler::is_unsigned_imm_in_range(imm, 12, shift); -} - -static inline bool is_memoryD(int offset) { - int scale = 3; // LogBytesPerDouble - return is_uimm12(offset, scale); -} - -static inline bool is_memoryfp(int offset) { - int scale = LogBytesPerInt; // include 32-bit word accesses - return is_uimm12(offset, scale); -} - -static inline bool is_memoryI(int offset) { - int scale = LogBytesPerInt; - return is_uimm12(offset, scale); -} - -static inline bool is_memoryP(int offset) { - int scale = LogBytesPerWord; - return is_uimm12(offset, scale); -} - -static inline bool is_memoryHD(int offset) { - int scale = LogBytesPerInt; // include 32-bit word accesses - return is_uimm12(offset, scale); -} - -uintx limmL_low(uintx imm, int n); - -static inline bool Xis_aimm(int imm) { - return Assembler::ArithmeticImmediate(imm).is_encoded(); -} - -static inline bool is_aimm(intptr_t imm) { - return Assembler::ArithmeticImmediate(imm).is_encoded(); -} - -static inline bool is_limmL(uintptr_t imm) { - return Assembler::LogicalImmediate(imm).is_encoded(); -} - -static inline bool is_limmL_low(intptr_t imm, int n) { - return is_limmL(limmL_low(imm, n)); -} - -static inline bool is_limmI(jint imm) { - return Assembler::LogicalImmediate(imm, true).is_encoded(); -} - -static inline uintx limmI_low(jint imm, int n) { - return limmL_low(imm, n); -} - -static inline bool is_limmI_low(jint imm, int n) { - return is_limmL_low(imm, n); -} - -%} - -source %{ - -// Given a register encoding, produce a Integer Register object -static Register reg_to_register_object(int register_encoding) { - assert(R0->encoding() == R_R0_enc && R30->encoding() == R_R30_enc, "right coding"); - assert(Rthread->encoding() == R_R28_enc, "right coding"); - assert(SP->encoding() == R_SP_enc, "right coding"); - return as_Register(register_encoding); -} - -// Given a register encoding, produce a single-precision Float Register object -static FloatRegister reg_to_FloatRegister_object(int register_encoding) { - assert(V0->encoding() == R_V0_enc && V31->encoding() == R_V31_enc, "right coding"); - return as_FloatRegister(register_encoding); -} - -RegMask _INT_REG_mask; -RegMask _PTR_REG_mask; -RegMask _SFLT_REG_mask; -RegMask _DFLT_REG_mask; -RegMask _VECTORX_REG_mask; -RegMask _RMEMCOPY_REG_mask; -RegMask _SP_PTR_REG_mask; -RegMask _SPILLP_REG_mask; -RegMask _STR_REG_mask; -RegMask _STR_PTR_REG_mask; - -OptoReg::Name R_Ricklass_num = -1; -OptoReg::Name R_Rmethod_num = -1; -OptoReg::Name R_tls_num = -1; -OptoReg::Name R_Rtemp_num = -1; -OptoReg::Name R_Rheap_base_num = -1; - -static int mov_oop_size = -1; - -#ifdef ASSERT -static bool same_mask(const RegMask &a, const RegMask &b) { - RegMask a_sub_b = a; a_sub_b.SUBTRACT(b); - RegMask b_sub_a = b; b_sub_a.SUBTRACT(a); - return a_sub_b.Size() == 0 && b_sub_a.Size() == 0; -} -#endif - -void Compile::pd_compiler2_init() { - - R_Ricklass_num = OptoReg::as_OptoReg(Ricklass->as_VMReg()); - R_Rmethod_num = OptoReg::as_OptoReg(Rmethod->as_VMReg()); - R_tls_num = OptoReg::as_OptoReg(Rthread->as_VMReg()); - R_Rtemp_num = OptoReg::as_OptoReg(Rtemp->as_VMReg()); - R_Rheap_base_num = OptoReg::as_OptoReg(Rheap_base->as_VMReg()); - - _INT_REG_mask = _INT_REG_ALL_mask; - _INT_REG_mask.Remove(R_tls_num); - _INT_REG_mask.Remove(R_SP_num); - if (UseCompressedOops) { - _INT_REG_mask.Remove(R_Rheap_base_num); - } - // Remove Rtemp because safepoint poll can trash it - // (see SharedRuntime::generate_handler_blob) - _INT_REG_mask.Remove(R_Rtemp_num); - - _PTR_REG_mask = _INT_REG_mask; - _PTR_REG_mask.smear_to_sets(2); - - // STR_REG = INT_REG+ZR - // SPILLP_REG = INT_REG+SP - // SP_PTR_REG = INT_REG+SP+TLS - _STR_REG_mask = _INT_REG_mask; - _SP_PTR_REG_mask = _STR_REG_mask; - _STR_REG_mask.Insert(R_ZR_num); - _SP_PTR_REG_mask.Insert(R_SP_num); - _SPILLP_REG_mask = _SP_PTR_REG_mask; - _SP_PTR_REG_mask.Insert(R_tls_num); - _STR_PTR_REG_mask = _STR_REG_mask; - _STR_PTR_REG_mask.smear_to_sets(2); - _SP_PTR_REG_mask.smear_to_sets(2); - _SPILLP_REG_mask.smear_to_sets(2); - - _RMEMCOPY_REG_mask = RegMask(R_mem_copy_lo_num); -assert(OptoReg::as_OptoReg(Rmemcopy->as_VMReg()) == R_mem_copy_lo_num, "!"); - - _SFLT_REG_mask = _SFLT_REG_0_mask; - _SFLT_REG_mask.SUBTRACT(_RMEMCOPY_REG_mask); - _DFLT_REG_mask = _SFLT_REG_mask; - _DFLT_REG_mask.smear_to_sets(2); - _VECTORX_REG_mask = _SFLT_REG_mask; - _VECTORX_REG_mask.smear_to_sets(4); - assert(same_mask(_VECTORX_REG_mask, _VECTORX_REG_0_mask), "!"); - -#ifdef ASSERT - RegMask r((RegMask *)&SFLT_REG_mask()); - r.smear_to_sets(2); - assert(same_mask(r, _DFLT_REG_mask), "!"); -#endif - - if (VM_Version::prefer_moves_over_load_literal()) { - mov_oop_size = 4; - } else { - mov_oop_size = 1; - } - - assert(Matcher::interpreter_method_oop_reg_encode() == Rmethod->encoding(), "should be"); -} - -uintx limmL_low(uintx imm, int n) { - // 1: try as is - if (is_limmL(imm)) { - return imm; - } - // 2: try low bits + all 0's - uintx imm0 = imm & right_n_bits(n); - if (is_limmL(imm0)) { - return imm0; - } - // 3: try low bits + all 1's - uintx imm1 = imm0 | left_n_bits(BitsPerWord - n); - if (is_limmL(imm1)) { - return imm1; - } -#if 0 - // 4: try low bits replicated - int field = 1 << log2_intptr(n + n - 1); - assert(field >= n, "!"); - assert(field / n == 1, "!"); - intptr_t immr = immx; - while (field < BitsPerWord) { - intrptr_t bits = immr & right_n_bits(field); - immr = bits | (bits << field); - field = field << 1; - } - // replicate at power-of-2 boundary - if (is_limmL(immr)) { - return immr; - } -#endif - return imm; -} - -// Convert the raw encoding form into the form expected by the -// constructor for Address. -Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { - RelocationHolder rspec; - if (disp_reloc != relocInfo::none) { - rspec = Relocation::spec_simple(disp_reloc); - } - - Register rbase = (base == 0xff) ? SP : as_Register(base); - if (index != 0xff) { - Register rindex = as_Register(index); - if (disp == 0x7fffffff) { // special value to indicate sign-extend - Address madr(rbase, rindex, ex_sxtw, scale); - madr._rspec = rspec; - return madr; - } else { - assert(disp == 0, "unsupported"); - Address madr(rbase, rindex, ex_lsl, scale); - madr._rspec = rspec; - return madr; - } - } else { - assert(scale == 0, "not supported"); - Address madr(rbase, disp); - madr._rspec = rspec; - return madr; - } -} - -// Location of compiled Java return values. Same as C -OptoRegPair c2::return_value(int ideal_reg) { - assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); - static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num, R_R0_num, R_hf_ret_lo_num, R_hf_ret_lo_num, R_R0_num }; - static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, R_R0x_num, OptoReg::Bad, R_hf_ret_hi_num, R_R0x_num }; - return OptoRegPair( hi[ideal_reg], lo[ideal_reg]); -} - -// !!!!! Special hack to get all type of calls to specify the byte offset -// from the start of the call to the point where the return address -// will point. - -int MachCallStaticJavaNode::ret_addr_offset() { - bool far = (_method == NULL) ? maybe_far_call(this) : !cache_reachable(); - bool patchable = _method != NULL; - int call_size = MacroAssembler::call_size(entry_point(), far, patchable); - return (call_size + (_method_handle_invoke ? 1 : 0)) * NativeInstruction::instruction_size; -} - -int MachCallDynamicJavaNode::ret_addr_offset() { - bool far = !cache_reachable(); - int call_size = MacroAssembler::call_size(entry_point(), far, true); - return (mov_oop_size + call_size) * NativeInstruction::instruction_size; -} - -int MachCallRuntimeNode::ret_addr_offset() { - int call_size = 0; - // TODO: check if Leaf nodes also need this - if (!is_MachCallLeaf()) { - // adr $temp, ret_addr - // str $temp, [SP + last_java_pc] - call_size += 2; - } - // bl or mov_slow; blr - bool far = maybe_far_call(this); - call_size += MacroAssembler::call_size(entry_point(), far, false); - return call_size * NativeInstruction::instruction_size; -} - -%} - -// The intptr_t operand types, defined by textual substitution. -// (Cf. opto/type.hpp. This lets us avoid many, many other ifdefs.) -#define immX immL -#define iRegX iRegL -#define aimmX aimmL -#define limmX limmL -#define immX9 immL9 -#define LShiftX LShiftL -#define shimmX immU6 - -#define store_RegLd store_RegL - -//----------ATTRIBUTES--------------------------------------------------------- -//----------Operand Attributes------------------------------------------------- -op_attrib op_cost(1); // Required cost attribute - -//----------OPERANDS----------------------------------------------------------- -// Operand definitions must precede instruction definitions for correct parsing -// in the ADLC because operands constitute user defined types which are used in -// instruction definitions. - -//----------Simple Operands---------------------------------------------------- -// Immediate Operands - -// Integer Immediate: 9-bit (including sign bit), so same as immI8? -// FIXME: simm9 allows -256, but immI8 doesn't... -operand simm9() %{ - predicate(Assembler::is_imm_in_range(n->get_int(), 9, 0)); - match(ConI); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - - -operand uimm12() %{ - predicate(Assembler::is_unsigned_imm_in_range(n->get_int(), 12, 0)); - match(ConI); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -operand aimmP() %{ - predicate(n->get_ptr() == 0 || (is_aimm(n->get_ptr()) && ((ConPNode*)n)->type()->reloc() == relocInfo::none)); - match(ConP); - - op_cost(0); - // formats are generated automatically for constants and base registers - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate: 12-bit - for addressing mode -operand immL12() %{ - predicate((-4096 < n->get_long()) && (n->get_long() < 4096)); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate: 9-bit - for addressing mode -operand immL9() %{ - predicate((-256 <= n->get_long()) && (n->get_long() < 256)); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -operand immIMov() %{ - predicate(n->get_int() >> 16 == 0); - match(ConI); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -operand immLMov() %{ - predicate(n->get_long() >> 16 == 0); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -operand immUL12() %{ - predicate(is_uimm12(n->get_long(), 0)); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -operand immUL12x2() %{ - predicate(is_uimm12(n->get_long(), 1)); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -operand immUL12x4() %{ - predicate(is_uimm12(n->get_long(), 2)); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -operand immUL12x8() %{ - predicate(is_uimm12(n->get_long(), 3)); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -operand immUL12x16() %{ - predicate(is_uimm12(n->get_long(), 4)); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Used for long shift -operand immU6() %{ - predicate(0 <= n->get_int() && (n->get_int() <= 63)); - match(ConI); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Used for register extended shift -operand immI_0_4() %{ - predicate(0 <= n->get_int() && (n->get_int() <= 4)); - match(ConI); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Compressed Pointer Register -operand iRegN() %{ - constraint(ALLOC_IN_RC(int_reg)); - match(RegN); - match(ZRRegN); - - format %{ %} - interface(REG_INTER); -%} - -operand SPRegP() %{ - constraint(ALLOC_IN_RC(SP_regP)); - match(RegP); - - format %{ %} - interface(REG_INTER); -%} - -operand ZRRegP() %{ - constraint(ALLOC_IN_RC(ZR_regP)); - match(RegP); - - format %{ %} - interface(REG_INTER); -%} - -operand ZRRegL() %{ - constraint(ALLOC_IN_RC(ZR_regP)); - match(RegL); - - format %{ %} - interface(REG_INTER); -%} - -operand ZRRegI() %{ - constraint(ALLOC_IN_RC(ZR_regI)); - match(RegI); - - format %{ %} - interface(REG_INTER); -%} - -operand ZRRegN() %{ - constraint(ALLOC_IN_RC(ZR_regI)); - match(RegN); - - format %{ %} - interface(REG_INTER); -%} --- old/src/hotspot/cpu/arm/assembler_arm_64.cpp 2018-09-17 10:31:11.115765345 -0400 +++ /dev/null 2018-04-28 00:25:57.886812021 -0400 @@ -1,186 +0,0 @@ -/* - * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "ci/ciEnv.hpp" -#include "gc/shared/cardTableBarrierSet.hpp" -#include "gc/shared/collectedHeap.inline.hpp" -#include "interpreter/interpreter.hpp" -#include "interpreter/interpreterRuntime.hpp" -#include "interpreter/templateInterpreterGenerator.hpp" -#include "memory/resourceArea.hpp" -#include "prims/jvm_misc.hpp" -#include "prims/methodHandles.hpp" -#include "runtime/biasedLocking.hpp" -#include "runtime/interfaceSupport.inline.hpp" -#include "runtime/objectMonitor.hpp" -#include "runtime/os.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/stubRoutines.hpp" -#include "utilities/hashtable.hpp" -#include "utilities/macros.hpp" - -// Returns whether given imm has equal bit fields <0:size-1> and . -inline bool Assembler::LogicalImmediate::has_equal_subpatterns(uintx imm, int size) { - uintx mask = right_n_bits(size); - uintx subpattern1 = mask_bits(imm, mask); - uintx subpattern2 = mask_bits(imm >> size, mask); - return subpattern1 == subpattern2; -} - -// Returns least size that is a power of two from 2 to 64 with the proviso that given -// imm is composed of repeating patterns of this size. -inline int Assembler::LogicalImmediate::least_pattern_size(uintx imm) { - int size = BitsPerWord; - while (size > 2 && has_equal_subpatterns(imm, size >> 1)) { - size >>= 1; - } - return size; -} - -// Returns count of set bits in given imm. Based on variable-precision SWAR algorithm. -inline int Assembler::LogicalImmediate::population_count(uintx x) { - x -= ((x >> 1) & 0x5555555555555555L); - x = (((x >> 2) & 0x3333333333333333L) + (x & 0x3333333333333333L)); - x = (((x >> 4) + x) & 0x0f0f0f0f0f0f0f0fL); - x += (x >> 8); - x += (x >> 16); - x += (x >> 32); - return(x & 0x7f); -} - -// Let given x be where B = 0 and least bit of A = 1. Returns , where C is B-size set bits. -inline uintx Assembler::LogicalImmediate::set_least_zeroes(uintx x) { - return x | (x - 1); -} - - -#ifdef ASSERT - -// Restores immediate by encoded bit masks. -uintx Assembler::LogicalImmediate::decode() { - assert (_encoded, "should be"); - - int len_code = (_immN << 6) | ((~_imms) & 0x3f); - assert (len_code != 0, "should be"); - - int len = 6; - while (!is_set_nth_bit(len_code, len)) len--; - int esize = 1 << len; - assert (len > 0, "should be"); - assert ((_is32bit ? 32 : 64) >= esize, "should be"); - - int levels = right_n_bits(len); - int S = _imms & levels; - int R = _immr & levels; - - assert (S != levels, "should be"); - - uintx welem = right_n_bits(S + 1); - uintx wmask = (R == 0) ? welem : ((welem >> R) | (welem << (esize - R))); - - for (int size = esize; size < 64; size <<= 1) { - wmask |= (wmask << size); - } - - return wmask; -} - -#endif - - -// Constructs LogicalImmediate by given imm. Figures out if given imm can be used in AArch64 logical -// instructions (AND, ANDS, EOR, ORR) and saves its encoding. -void Assembler::LogicalImmediate::construct(uintx imm, bool is32) { - _is32bit = is32; - - if (is32) { - assert(((imm >> 32) == 0) || (((intx)imm >> 31) == -1), "32-bit immediate is out of range"); - - // Replicate low 32 bits. - imm &= 0xffffffff; - imm |= imm << 32; - } - - // All-zeroes and all-ones can not be encoded. - if (imm != 0 && (~imm != 0)) { - - // Let LPS (least pattern size) be the least size (power of two from 2 to 64) of repeating - // patterns in the immediate. If immediate value can be encoded, it is encoded by pattern - // of exactly LPS size (due to structure of valid patterns). In order to verify - // that immediate value can be encoded, LPS is calculated and bits of immediate - // are verified to be valid pattern. - int lps = least_pattern_size(imm); - uintx lps_mask = right_n_bits(lps); - - // A valid pattern has one of the following forms: - // | 0 x A | 1 x B | 0 x C |, where B > 0 and C > 0, or - // | 1 x A | 0 x B | 1 x C |, where B > 0 and C > 0. - // For simplicity, the second form of the pattern is inverted into the first form. - bool inverted = imm & 0x1; - uintx pattern = (inverted ? ~imm : imm) & lps_mask; - - // | 0 x A | 1 x (B + C) | - uintx without_least_zeroes = set_least_zeroes(pattern); - - // Pattern is valid iff without least zeroes it is a power of two - 1. - if ((without_least_zeroes & (without_least_zeroes + 1)) == 0) { - - // Count B as population count of pattern. - int bits_count = population_count(pattern); - - // Count B+C as population count of pattern without least zeroes - int left_range = population_count(without_least_zeroes); - - // S-prefix is a part of imms field which encodes LPS. - // LPS | S prefix - // 64 | not defined - // 32 | 0b0 - // 16 | 0b10 - // 8 | 0b110 - // 4 | 0b1110 - // 2 | 0b11110 - int s_prefix = (lps == 64) ? 0 : ~set_least_zeroes(lps) & 0x3f; - - // immN bit is set iff LPS == 64. - _immN = (lps == 64) ? 1 : 0; - assert (!is32 || (_immN == 0), "32-bit immediate should be encoded with zero N-bit"); - - // immr is the rotation size. - _immr = lps + (inverted ? 0 : bits_count) - left_range; - - // imms is the field that encodes bits count and S-prefix. - _imms = ((inverted ? (lps - bits_count) : bits_count) - 1) | s_prefix; - - _encoded = true; - assert (decode() == imm, "illegal encoding"); - - return; - } - } - - _encoded = false; -} --- old/src/hotspot/cpu/arm/assembler_arm_64.hpp 2018-09-17 10:31:12.245831349 -0400 +++ /dev/null 2018-04-28 00:25:57.886812021 -0400 @@ -1,1718 +0,0 @@ -/* - * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef CPU_ARM_VM_ASSEMBLER_ARM_64_HPP -#define CPU_ARM_VM_ASSEMBLER_ARM_64_HPP - -enum AsmShift12 { - lsl0, lsl12 -}; - -enum AsmPrefetchOp { - pldl1keep = 0b00000, - pldl1strm, - pldl2keep, - pldl2strm, - pldl3keep, - pldl3strm, - - plil1keep = 0b01000, - plil1strm, - plil2keep, - plil2strm, - plil3keep, - plil3strm, - - pstl1keep = 0b10000, - pstl1strm, - pstl2keep, - pstl2strm, - pstl3keep, - pstl3strm, -}; - -// Shifted register operand for data processing instructions. -class AsmOperand { - private: - Register _reg; - AsmShift _shift; - int _shift_imm; - - public: - AsmOperand(Register reg) { - assert(reg != SP, "SP is not allowed in shifted register operand"); - _reg = reg; - _shift = lsl; - _shift_imm = 0; - } - - AsmOperand(Register reg, AsmShift shift, int shift_imm) { - assert(reg != SP, "SP is not allowed in shifted register operand"); - assert(shift_imm >= 0, "shift amount should be non-negative"); - _reg = reg; - _shift = shift; - _shift_imm = shift_imm; - } - - Register reg() const { - return _reg; - } - - AsmShift shift() const { - return _shift; - } - - int shift_imm() const { - return _shift_imm; - } -}; - - -class Assembler : public AbstractAssembler { - - public: - - static const int LogInstructionSize = 2; - static const int InstructionSize = 1 << LogInstructionSize; - - Assembler(CodeBuffer* code) : AbstractAssembler(code) {} - - static inline AsmCondition inverse(AsmCondition cond) { - assert ((cond != al) && (cond != nv), "AL and NV conditions cannot be inversed"); - return (AsmCondition)((int)cond ^ 1); - } - - // Returns value of nzcv flags conforming to the given condition. - static inline int flags_for_condition(AsmCondition cond) { - switch(cond) { // NZCV - case mi: case lt: return 0b1000; - case eq: case le: return 0b0100; - case hs: case hi: return 0b0010; - case vs: return 0b0001; - default: return 0b0000; - } - } - - // Immediate, encoded into logical instructions. - class LogicalImmediate { - private: - bool _encoded; - bool _is32bit; - int _immN; - int _immr; - int _imms; - - static inline bool has_equal_subpatterns(uintx imm, int size); - static inline int least_pattern_size(uintx imm); - static inline int population_count(uintx x); - static inline uintx set_least_zeroes(uintx x); - -#ifdef ASSERT - uintx decode(); -#endif - - void construct(uintx imm, bool is32); - - public: - LogicalImmediate(uintx imm, bool is32 = false) { construct(imm, is32); } - - // Returns true if given immediate can be used in AArch64 logical instruction. - bool is_encoded() const { return _encoded; } - - bool is32bit() const { return _is32bit; } - int immN() const { assert(_encoded, "should be"); return _immN; } - int immr() const { assert(_encoded, "should be"); return _immr; } - int imms() const { assert(_encoded, "should be"); return _imms; } - }; - - // Immediate, encoded into arithmetic add/sub instructions. - class ArithmeticImmediate { - private: - bool _encoded; - int _imm; - AsmShift12 _shift; - - public: - ArithmeticImmediate(intx x) { - if (is_unsigned_imm_in_range(x, 12, 0)) { - _encoded = true; - _imm = x; - _shift = lsl0; - } else if (is_unsigned_imm_in_range(x, 12, 12)) { - _encoded = true; - _imm = x >> 12; - _shift = lsl12; - } else { - _encoded = false; - } - } - - ArithmeticImmediate(intx x, AsmShift12 sh) { - if (is_unsigned_imm_in_range(x, 12, 0)) { - _encoded = true; - _imm = x; - _shift = sh; - } else { - _encoded = false; - } - } - - // Returns true if this immediate can be used in AArch64 arithmetic (add/sub/cmp/cmn) instructions. - bool is_encoded() const { return _encoded; } - - int imm() const { assert(_encoded, "should be"); return _imm; } - AsmShift12 shift() const { assert(_encoded, "should be"); return _shift; } - }; - - static inline bool is_imm_in_range(intx value, int bits, int align_bits) { - intx sign_bits = (value >> (bits + align_bits - 1)); - return ((value & right_n_bits(align_bits)) == 0) && ((sign_bits == 0) || (sign_bits == -1)); - } - - static inline int encode_imm(intx value, int bits, int align_bits, int low_bit_in_encoding) { - assert (is_imm_in_range(value, bits, align_bits), "immediate value is out of range"); - return ((value >> align_bits) & right_n_bits(bits)) << low_bit_in_encoding; - } - - static inline bool is_unsigned_imm_in_range(intx value, int bits, int align_bits) { - return (value >= 0) && ((value & right_n_bits(align_bits)) == 0) && ((value >> (align_bits + bits)) == 0); - } - - static inline int encode_unsigned_imm(intx value, int bits, int align_bits, int low_bit_in_encoding) { - assert (is_unsigned_imm_in_range(value, bits, align_bits), "immediate value is out of range"); - return (value >> align_bits) << low_bit_in_encoding; - } - - static inline bool is_offset_in_range(intx offset, int bits) { - assert (bits == 14 || bits == 19 || bits == 26, "wrong bits number"); - return is_imm_in_range(offset, bits, 2); - } - - static inline int encode_offset(intx offset, int bits, int low_bit_in_encoding) { - return encode_imm(offset, bits, 2, low_bit_in_encoding); - } - - // Returns true if given value can be used as immediate in arithmetic (add/sub/cmp/cmn) instructions. - static inline bool is_arith_imm_in_range(intx value) { - return ArithmeticImmediate(value).is_encoded(); - } - - - // Load/store instructions - -#define F(mnemonic, opc) \ - void mnemonic(Register rd, address literal_addr) { \ - intx offset = literal_addr - pc(); \ - assert (opc != 0b01 || offset == 0 || ((uintx)literal_addr & 7) == 0, "ldr target should be aligned"); \ - assert (is_offset_in_range(offset, 19), "offset is out of range"); \ - emit_int32(opc << 30 | 0b011 << 27 | encode_offset(offset, 19, 5) | rd->encoding_with_zr()); \ - } - - F(ldr_w, 0b00) - F(ldr, 0b01) - F(ldrsw, 0b10) -#undef F - -#define F(mnemonic, opc) \ - void mnemonic(FloatRegister rt, address literal_addr) { \ - intx offset = literal_addr - pc(); \ - assert (offset == 0 || ((uintx)literal_addr & right_n_bits(2 + opc)) == 0, "ldr target should be aligned"); \ - assert (is_offset_in_range(offset, 19), "offset is out of range"); \ - emit_int32(opc << 30 | 0b011100 << 24 | encode_offset(offset, 19, 5) | rt->encoding()); \ - } - - F(ldr_s, 0b00) - F(ldr_d, 0b01) - F(ldr_q, 0b10) -#undef F - -#define F(mnemonic, size, o2, L, o1, o0) \ - void mnemonic(Register rt, Register rn) { \ - emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | 0b11111 << 16 | \ - o0 << 15 | 0b11111 << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ - } - - F(ldxrb, 0b00, 0, 1, 0, 0) - F(ldaxrb, 0b00, 0, 1, 0, 1) - F(ldarb, 0b00, 1, 1, 0, 1) - F(ldxrh, 0b01, 0, 1, 0, 0) - F(ldaxrh, 0b01, 0, 1, 0, 1) - F(ldarh, 0b01, 1, 1, 0, 1) - F(ldxr_w, 0b10, 0, 1, 0, 0) - F(ldaxr_w, 0b10, 0, 1, 0, 1) - F(ldar_w, 0b10, 1, 1, 0, 1) - F(ldxr, 0b11, 0, 1, 0, 0) - F(ldaxr, 0b11, 0, 1, 0, 1) - F(ldar, 0b11, 1, 1, 0, 1) - - F(stlrb, 0b00, 1, 0, 0, 1) - F(stlrh, 0b01, 1, 0, 0, 1) - F(stlr_w, 0b10, 1, 0, 0, 1) - F(stlr, 0b11, 1, 0, 0, 1) -#undef F - -#define F(mnemonic, size, o2, L, o1, o0) \ - void mnemonic(Register rs, Register rt, Register rn) { \ - assert (rs != rt, "should be different"); \ - assert (rs != rn, "should be different"); \ - emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | rs->encoding_with_zr() << 16 | \ - o0 << 15 | 0b11111 << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ - } - - F(stxrb, 0b00, 0, 0, 0, 0) - F(stlxrb, 0b00, 0, 0, 0, 1) - F(stxrh, 0b01, 0, 0, 0, 0) - F(stlxrh, 0b01, 0, 0, 0, 1) - F(stxr_w, 0b10, 0, 0, 0, 0) - F(stlxr_w, 0b10, 0, 0, 0, 1) - F(stxr, 0b11, 0, 0, 0, 0) - F(stlxr, 0b11, 0, 0, 0, 1) -#undef F - -#define F(mnemonic, size, o2, L, o1, o0) \ - void mnemonic(Register rt, Register rt2, Register rn) { \ - assert (rt != rt2, "should be different"); \ - emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | 0b11111 << 16 | \ - o0 << 15 | rt2->encoding_with_zr() << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ - } - - F(ldxp_w, 0b10, 0, 1, 1, 0) - F(ldaxp_w, 0b10, 0, 1, 1, 1) - F(ldxp, 0b11, 0, 1, 1, 0) - F(ldaxp, 0b11, 0, 1, 1, 1) -#undef F - -#define F(mnemonic, size, o2, L, o1, o0) \ - void mnemonic(Register rs, Register rt, Register rt2, Register rn) { \ - assert (rs != rt, "should be different"); \ - assert (rs != rt2, "should be different"); \ - assert (rs != rn, "should be different"); \ - emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | rs->encoding_with_zr() << 16 | \ - o0 << 15 | rt2->encoding_with_zr() << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ - } - - F(stxp_w, 0b10, 0, 0, 1, 0) - F(stlxp_w, 0b10, 0, 0, 1, 1) - F(stxp, 0b11, 0, 0, 1, 0) - F(stlxp, 0b11, 0, 0, 1, 1) -#undef F - -#define F(mnemonic, opc, V, L) \ - void mnemonic(Register rt, Register rt2, Register rn, int offset = 0) { \ - assert (!L || rt != rt2, "should be different"); \ - int align_bits = 2 + (opc >> 1); \ - assert (is_imm_in_range(offset, 7, align_bits), "offset is out of range"); \ - emit_int32(opc << 30 | 0b101 << 27 | V << 26 | L << 22 | encode_imm(offset, 7, align_bits, 15) | \ - rt2->encoding_with_zr() << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ - } - - F(stnp_w, 0b00, 0, 0) - F(ldnp_w, 0b00, 0, 1) - F(stnp, 0b10, 0, 0) - F(ldnp, 0b10, 0, 1) -#undef F - -#define F(mnemonic, opc, V, L) \ - void mnemonic(FloatRegister rt, FloatRegister rt2, Register rn, int offset = 0) { \ - assert (!L || (rt != rt2), "should be different"); \ - int align_bits = 2 + opc; \ - assert (is_imm_in_range(offset, 7, align_bits), "offset is out of range"); \ - emit_int32(opc << 30 | 0b101 << 27 | V << 26 | L << 22 | encode_imm(offset, 7, align_bits, 15) | \ - rt2->encoding() << 10 | rn->encoding_with_sp() << 5 | rt->encoding()); \ - } - - F(stnp_s, 0b00, 1, 0) - F(stnp_d, 0b01, 1, 0) - F(stnp_q, 0b10, 1, 0) - F(ldnp_s, 0b00, 1, 1) - F(ldnp_d, 0b01, 1, 1) - F(ldnp_q, 0b10, 1, 1) -#undef F - -#define F(mnemonic, size, V, opc) \ - void mnemonic(Register rt, Address addr) { \ - assert((addr.mode() == basic_offset) || (rt != addr.base()), "should be different"); \ - if (addr.index() == noreg) { \ - if ((addr.mode() == basic_offset) && is_unsigned_imm_in_range(addr.disp(), 12, size)) { \ - emit_int32(size << 30 | 0b111 << 27 | V << 26 | 0b01 << 24 | opc << 22 | \ - encode_unsigned_imm(addr.disp(), 12, size, 10) | \ - addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ - } else { \ - assert(is_imm_in_range(addr.disp(), 9, 0), "offset is out of range"); \ - emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | encode_imm(addr.disp(), 9, 0, 12) | \ - addr.mode() << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ - } \ - } else { \ - assert (addr.disp() == 0, "non-zero displacement for [reg + reg] address mode"); \ - assert ((addr.shift_imm() == 0) || (addr.shift_imm() == size), "invalid shift amount"); \ - emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | 1 << 21 | \ - addr.index()->encoding_with_zr() << 16 | addr.extend() << 13 | (addr.shift_imm() != 0) << 12 | \ - 0b10 << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ - } \ - } - - F(strb, 0b00, 0, 0b00) - F(ldrb, 0b00, 0, 0b01) - F(ldrsb, 0b00, 0, 0b10) - F(ldrsb_w, 0b00, 0, 0b11) - - F(strh, 0b01, 0, 0b00) - F(ldrh, 0b01, 0, 0b01) - F(ldrsh, 0b01, 0, 0b10) - F(ldrsh_w, 0b01, 0, 0b11) - - F(str_w, 0b10, 0, 0b00) - F(ldr_w, 0b10, 0, 0b01) - F(ldrsw, 0b10, 0, 0b10) - - F(str, 0b11, 0, 0b00) - F(ldr, 0b11, 0, 0b01) -#undef F - -#define F(mnemonic, size, V, opc) \ - void mnemonic(AsmPrefetchOp prfop, Address addr) { \ - assert (addr.mode() == basic_offset, #mnemonic " supports only basic_offset address mode"); \ - if (addr.index() == noreg) { \ - if (is_unsigned_imm_in_range(addr.disp(), 12, size)) { \ - emit_int32(size << 30 | 0b111 << 27 | V << 26 | 0b01 << 24 | opc << 22 | \ - encode_unsigned_imm(addr.disp(), 12, size, 10) | \ - addr.base()->encoding_with_sp() << 5 | prfop); \ - } else { \ - assert(is_imm_in_range(addr.disp(), 9, 0), "offset is out of range"); \ - emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | encode_imm(addr.disp(), 9, 0, 12) | \ - addr.base()->encoding_with_sp() << 5 | prfop); \ - } \ - } else { \ - assert (addr.disp() == 0, "non-zero displacement for [reg + reg] address mode"); \ - assert ((addr.shift_imm() == 0) || (addr.shift_imm() == size), "invalid shift amount"); \ - emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | 1 << 21 | \ - addr.index()->encoding_with_zr() << 16 | addr.extend() << 13 | (addr.shift_imm() != 0) << 12 | \ - 0b10 << 10 | addr.base()->encoding_with_sp() << 5 | prfop); \ - } \ - } - - F(prfm, 0b11, 0, 0b10) -#undef F - -#define F(mnemonic, size, V, opc) \ - void mnemonic(FloatRegister rt, Address addr) { \ - int align_bits = (((opc & 0b10) >> 1) << 2) | size; \ - if (addr.index() == noreg) { \ - if ((addr.mode() == basic_offset) && is_unsigned_imm_in_range(addr.disp(), 12, align_bits)) { \ - emit_int32(size << 30 | 0b111 << 27 | V << 26 | 0b01 << 24 | opc << 22 | \ - encode_unsigned_imm(addr.disp(), 12, align_bits, 10) | \ - addr.base()->encoding_with_sp() << 5 | rt->encoding()); \ - } else { \ - assert(is_imm_in_range(addr.disp(), 9, 0), "offset is out of range"); \ - emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | encode_imm(addr.disp(), 9, 0, 12) | \ - addr.mode() << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding()); \ - } \ - } else { \ - assert (addr.disp() == 0, "non-zero displacement for [reg + reg] address mode"); \ - assert ((addr.shift_imm() == 0) || (addr.shift_imm() == align_bits), "invalid shift amount"); \ - emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | 1 << 21 | \ - addr.index()->encoding_with_zr() << 16 | addr.extend() << 13 | (addr.shift_imm() != 0) << 12 | \ - 0b10 << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding()); \ - } \ - } - - F(str_b, 0b00, 1, 0b00) - F(ldr_b, 0b00, 1, 0b01) - F(str_h, 0b01, 1, 0b00) - F(ldr_h, 0b01, 1, 0b01) - F(str_s, 0b10, 1, 0b00) - F(ldr_s, 0b10, 1, 0b01) - F(str_d, 0b11, 1, 0b00) - F(ldr_d, 0b11, 1, 0b01) - F(str_q, 0b00, 1, 0b10) - F(ldr_q, 0b00, 1, 0b11) -#undef F - -#define F(mnemonic, opc, V, L) \ - void mnemonic(Register rt, Register rt2, Address addr) { \ - assert((addr.mode() == basic_offset) || ((rt != addr.base()) && (rt2 != addr.base())), "should be different"); \ - assert(!L || (rt != rt2), "should be different"); \ - assert(addr.index() == noreg, "[reg + reg] address mode is not available for load/store pair"); \ - int align_bits = 2 + (opc >> 1); \ - int mode_encoding = (addr.mode() == basic_offset) ? 0b10 : addr.mode(); \ - assert(is_imm_in_range(addr.disp(), 7, align_bits), "offset is out of range"); \ - emit_int32(opc << 30 | 0b101 << 27 | V << 26 | mode_encoding << 23 | L << 22 | \ - encode_imm(addr.disp(), 7, align_bits, 15) | rt2->encoding_with_zr() << 10 | \ - addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ - } - - F(stp_w, 0b00, 0, 0) - F(ldp_w, 0b00, 0, 1) - F(ldpsw, 0b01, 0, 1) - F(stp, 0b10, 0, 0) - F(ldp, 0b10, 0, 1) -#undef F - -#define F(mnemonic, opc, V, L) \ - void mnemonic(FloatRegister rt, FloatRegister rt2, Address addr) { \ - assert(!L || (rt != rt2), "should be different"); \ - assert(addr.index() == noreg, "[reg + reg] address mode is not available for load/store pair"); \ - int align_bits = 2 + opc; \ - int mode_encoding = (addr.mode() == basic_offset) ? 0b10 : addr.mode(); \ - assert(is_imm_in_range(addr.disp(), 7, align_bits), "offset is out of range"); \ - emit_int32(opc << 30 | 0b101 << 27 | V << 26 | mode_encoding << 23 | L << 22 | \ - encode_imm(addr.disp(), 7, align_bits, 15) | rt2->encoding() << 10 | \ - addr.base()->encoding_with_sp() << 5 | rt->encoding()); \ - } - - F(stp_s, 0b00, 1, 0) - F(ldp_s, 0b00, 1, 1) - F(stp_d, 0b01, 1, 0) - F(ldp_d, 0b01, 1, 1) - F(stp_q, 0b10, 1, 0) - F(ldp_q, 0b10, 1, 1) -#undef F - - // Data processing instructions - -#define F(mnemonic, sf, opc) \ - void mnemonic(Register rd, Register rn, const LogicalImmediate& imm) { \ - assert (imm.is_encoded(), "illegal immediate for logical instruction"); \ - assert (imm.is32bit() == (sf == 0), "immediate size does not match instruction size"); \ - emit_int32(sf << 31 | opc << 29 | 0b100100 << 23 | imm.immN() << 22 | imm.immr() << 16 | \ - imm.imms() << 10 | rn->encoding_with_zr() << 5 | \ - ((opc == 0b11) ? rd->encoding_with_zr() : rd->encoding_with_sp())); \ - } \ - void mnemonic(Register rd, Register rn, uintx imm) { \ - LogicalImmediate limm(imm, (sf == 0)); \ - mnemonic(rd, rn, limm); \ - } \ - void mnemonic(Register rd, Register rn, unsigned int imm) { \ - mnemonic(rd, rn, (uintx)imm); \ - } - - F(andr_w, 0, 0b00) - F(orr_w, 0, 0b01) - F(eor_w, 0, 0b10) - F(ands_w, 0, 0b11) - - F(andr, 1, 0b00) - F(orr, 1, 0b01) - F(eor, 1, 0b10) - F(ands, 1, 0b11) -#undef F - - void tst(Register rn, unsigned int imm) { - ands(ZR, rn, imm); - } - - void tst_w(Register rn, unsigned int imm) { - ands_w(ZR, rn, imm); - } - -#define F(mnemonic, sf, opc, N) \ - void mnemonic(Register rd, Register rn, AsmOperand operand) { \ - assert (operand.shift_imm() >> (5 + sf) == 0, "shift amount is too large"); \ - emit_int32(sf << 31 | opc << 29 | 0b01010 << 24 | operand.shift() << 22 | N << 21 | \ - operand.reg()->encoding_with_zr() << 16 | operand.shift_imm() << 10 | \ - rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ - } - - F(andr_w, 0, 0b00, 0) - F(bic_w, 0, 0b00, 1) - F(orr_w, 0, 0b01, 0) - F(orn_w, 0, 0b01, 1) - F(eor_w, 0, 0b10, 0) - F(eon_w, 0, 0b10, 1) - F(ands_w, 0, 0b11, 0) - F(bics_w, 0, 0b11, 1) - - F(andr, 1, 0b00, 0) - F(bic, 1, 0b00, 1) - F(orr, 1, 0b01, 0) - F(orn, 1, 0b01, 1) - F(eor, 1, 0b10, 0) - F(eon, 1, 0b10, 1) - F(ands, 1, 0b11, 0) - F(bics, 1, 0b11, 1) -#undef F - - void tst(Register rn, AsmOperand operand) { - ands(ZR, rn, operand); - } - - void tst_w(Register rn, AsmOperand operand) { - ands_w(ZR, rn, operand); - } - - void mvn(Register rd, AsmOperand operand) { - orn(rd, ZR, operand); - } - - void mvn_w(Register rd, AsmOperand operand) { - orn_w(rd, ZR, operand); - } - -#define F(mnemonic, sf, op, S) \ - void mnemonic(Register rd, Register rn, const ArithmeticImmediate& imm) { \ - assert(imm.is_encoded(), "immediate is out of range"); \ - emit_int32(sf << 31 | op << 30 | S << 29 | 0b10001 << 24 | imm.shift() << 22 | \ - imm.imm() << 10 | rn->encoding_with_sp() << 5 | \ - (S == 1 ? rd->encoding_with_zr() : rd->encoding_with_sp())); \ - } \ - void mnemonic(Register rd, Register rn, int imm) { \ - mnemonic(rd, rn, ArithmeticImmediate(imm)); \ - } \ - void mnemonic(Register rd, Register rn, int imm, AsmShift12 shift) { \ - mnemonic(rd, rn, ArithmeticImmediate(imm, shift)); \ - } \ - void mnemonic(Register rd, Register rn, Register rm, AsmExtendOp extend, int shift_imm = 0) { \ - assert ((0 <= shift_imm) && (shift_imm <= 4), "shift amount is out of range"); \ - emit_int32(sf << 31 | op << 30 | S << 29 | 0b01011001 << 21 | rm->encoding_with_zr() << 16 | \ - extend << 13 | shift_imm << 10 | rn->encoding_with_sp() << 5 | \ - (S == 1 ? rd->encoding_with_zr() : rd->encoding_with_sp())); \ - } \ - void mnemonic(Register rd, Register rn, AsmOperand operand) { \ - assert (operand.shift() != ror, "illegal shift type"); \ - assert (operand.shift_imm() >> (5 + sf) == 0, "shift amount is too large"); \ - emit_int32(sf << 31 | op << 30 | S << 29 | 0b01011 << 24 | operand.shift() << 22 | \ - operand.reg()->encoding_with_zr() << 16 | operand.shift_imm() << 10 | \ - rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ - } - - F(add_w, 0, 0, 0) - F(adds_w, 0, 0, 1) - F(sub_w, 0, 1, 0) - F(subs_w, 0, 1, 1) - - F(add, 1, 0, 0) - F(adds, 1, 0, 1) - F(sub, 1, 1, 0) - F(subs, 1, 1, 1) -#undef F - - void mov(Register rd, Register rm) { - if ((rd == SP) || (rm == SP)) { - add(rd, rm, 0); - } else { - orr(rd, ZR, rm); - } - } - - void mov_w(Register rd, Register rm) { - if ((rd == SP) || (rm == SP)) { - add_w(rd, rm, 0); - } else { - orr_w(rd, ZR, rm); - } - } - - void cmp(Register rn, int imm) { - subs(ZR, rn, imm); - } - - void cmp_w(Register rn, int imm) { - subs_w(ZR, rn, imm); - } - - void cmp(Register rn, Register rm) { - assert (rm != SP, "SP should not be used as the 2nd operand of cmp"); - if (rn == SP) { - subs(ZR, rn, rm, ex_uxtx); - } else { - subs(ZR, rn, rm); - } - } - - void cmp_w(Register rn, Register rm) { - assert ((rn != SP) && (rm != SP), "SP should not be used in 32-bit cmp"); - subs_w(ZR, rn, rm); - } - - void cmp(Register rn, AsmOperand operand) { - assert (rn != SP, "SP is not allowed in cmp with shifted register (AsmOperand)"); - subs(ZR, rn, operand); - } - - void cmn(Register rn, int imm) { - adds(ZR, rn, imm); - } - - void cmn_w(Register rn, int imm) { - adds_w(ZR, rn, imm); - } - - void cmn(Register rn, Register rm) { - assert (rm != SP, "SP should not be used as the 2nd operand of cmp"); - if (rn == SP) { - adds(ZR, rn, rm, ex_uxtx); - } else { - adds(ZR, rn, rm); - } - } - - void cmn_w(Register rn, Register rm) { - assert ((rn != SP) && (rm != SP), "SP should not be used in 32-bit cmp"); - adds_w(ZR, rn, rm); - } - - void neg(Register rd, Register rm) { - sub(rd, ZR, rm); - } - - void neg_w(Register rd, Register rm) { - sub_w(rd, ZR, rm); - } - -#define F(mnemonic, sf, op, S) \ - void mnemonic(Register rd, Register rn, Register rm) { \ - emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010000 << 21 | rm->encoding_with_zr() << 16 | \ - rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ - } - - F(adc_w, 0, 0, 0) - F(adcs_w, 0, 0, 1) - F(sbc_w, 0, 1, 0) - F(sbcs_w, 0, 1, 1) - - F(adc, 1, 0, 0) - F(adcs, 1, 0, 1) - F(sbc, 1, 1, 0) - F(sbcs, 1, 1, 1) -#undef F - -#define F(mnemonic, sf, N) \ - void mnemonic(Register rd, Register rn, Register rm, int lsb) { \ - assert ((lsb >> (5 + sf)) == 0, "illegal least significant bit position"); \ - emit_int32(sf << 31 | 0b100111 << 23 | N << 22 | rm->encoding_with_zr() << 16 | \ - lsb << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ - } - - F(extr_w, 0, 0) - F(extr, 1, 1) -#undef F - -#define F(mnemonic, sf, opc) \ - void mnemonic(Register rd, int imm, int shift) { \ - assert ((imm >> 16) == 0, "immediate is out of range"); \ - assert (((shift & 0xf) == 0) && ((shift >> (5 + sf)) == 0), "invalid shift"); \ - emit_int32(sf << 31 | opc << 29 | 0b100101 << 23 | (shift >> 4) << 21 | \ - imm << 5 | rd->encoding_with_zr()); \ - } - - F(movn_w, 0, 0b00) - F(movz_w, 0, 0b10) - F(movk_w, 0, 0b11) - F(movn, 1, 0b00) - F(movz, 1, 0b10) - F(movk, 1, 0b11) -#undef F - - void mov(Register rd, int imm) { - assert ((imm >> 16) == 0, "immediate is out of range"); - movz(rd, imm, 0); - } - - void mov_w(Register rd, int imm) { - assert ((imm >> 16) == 0, "immediate is out of range"); - movz_w(rd, imm, 0); - } - -#define F(mnemonic, sf, op, S) \ - void mnemonic(Register rn, int imm, int nzcv, AsmCondition cond) { \ - assert ((imm >> 5) == 0, "immediate is out of range"); \ - assert ((nzcv >> 4) == 0, "illegal nzcv"); \ - emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010010 << 21 | imm << 16 | \ - cond << 12 | 1 << 11 | rn->encoding_with_zr() << 5 | nzcv); \ - } - - F(ccmn_w, 0, 0, 1) - F(ccmp_w, 0, 1, 1) - F(ccmn, 1, 0, 1) - F(ccmp, 1, 1, 1) -#undef F - -#define F(mnemonic, sf, op, S) \ - void mnemonic(Register rn, Register rm, int nzcv, AsmCondition cond) { \ - assert ((nzcv >> 4) == 0, "illegal nzcv"); \ - emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010010 << 21 | rm->encoding_with_zr() << 16 | \ - cond << 12 | rn->encoding_with_zr() << 5 | nzcv); \ - } - - F(ccmn_w, 0, 0, 1) - F(ccmp_w, 0, 1, 1) - F(ccmn, 1, 0, 1) - F(ccmp, 1, 1, 1) -#undef F - -#define F(mnemonic, sf, op, S, op2) \ - void mnemonic(Register rd, Register rn, Register rm, AsmCondition cond) { \ - emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010100 << 21 | rm->encoding_with_zr() << 16 | \ - cond << 12 | op2 << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ - } - - F(csel_w, 0, 0, 0, 0b00) - F(csinc_w, 0, 0, 0, 0b01) - F(csinv_w, 0, 1, 0, 0b00) - F(csneg_w, 0, 1, 0, 0b01) - - F(csel, 1, 0, 0, 0b00) - F(csinc, 1, 0, 0, 0b01) - F(csinv, 1, 1, 0, 0b00) - F(csneg, 1, 1, 0, 0b01) -#undef F - - void cset(Register rd, AsmCondition cond) { - csinc(rd, ZR, ZR, inverse(cond)); - } - - void cset_w(Register rd, AsmCondition cond) { - csinc_w(rd, ZR, ZR, inverse(cond)); - } - - void csetm(Register rd, AsmCondition cond) { - csinv(rd, ZR, ZR, inverse(cond)); - } - - void csetm_w(Register rd, AsmCondition cond) { - csinv_w(rd, ZR, ZR, inverse(cond)); - } - - void cinc(Register rd, Register rn, AsmCondition cond) { - csinc(rd, rn, rn, inverse(cond)); - } - - void cinc_w(Register rd, Register rn, AsmCondition cond) { - csinc_w(rd, rn, rn, inverse(cond)); - } - - void cinv(Register rd, Register rn, AsmCondition cond) { - csinv(rd, rn, rn, inverse(cond)); - } - - void cinv_w(Register rd, Register rn, AsmCondition cond) { - csinv_w(rd, rn, rn, inverse(cond)); - } - -#define F(mnemonic, sf, S, opcode) \ - void mnemonic(Register rd, Register rn) { \ - emit_int32(sf << 31 | 1 << 30 | S << 29 | 0b11010110 << 21 | opcode << 10 | \ - rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ - } - - F(rbit_w, 0, 0, 0b000000) - F(rev16_w, 0, 0, 0b000001) - F(rev_w, 0, 0, 0b000010) - F(clz_w, 0, 0, 0b000100) - F(cls_w, 0, 0, 0b000101) - - F(rbit, 1, 0, 0b000000) - F(rev16, 1, 0, 0b000001) - F(rev32, 1, 0, 0b000010) - F(rev, 1, 0, 0b000011) - F(clz, 1, 0, 0b000100) - F(cls, 1, 0, 0b000101) -#undef F - -#define F(mnemonic, sf, S, opcode) \ - void mnemonic(Register rd, Register rn, Register rm) { \ - emit_int32(sf << 31 | S << 29 | 0b11010110 << 21 | rm->encoding_with_zr() << 16 | \ - opcode << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ - } - - F(udiv_w, 0, 0, 0b000010) - F(sdiv_w, 0, 0, 0b000011) - F(lslv_w, 0, 0, 0b001000) - F(lsrv_w, 0, 0, 0b001001) - F(asrv_w, 0, 0, 0b001010) - F(rorv_w, 0, 0, 0b001011) - - F(udiv, 1, 0, 0b000010) - F(sdiv, 1, 0, 0b000011) - F(lslv, 1, 0, 0b001000) - F(lsrv, 1, 0, 0b001001) - F(asrv, 1, 0, 0b001010) - F(rorv, 1, 0, 0b001011) -#undef F - -#define F(mnemonic, sf, op31, o0) \ - void mnemonic(Register rd, Register rn, Register rm, Register ra) { \ - emit_int32(sf << 31 | 0b11011 << 24 | op31 << 21 | rm->encoding_with_zr() << 16 | \ - o0 << 15 | ra->encoding_with_zr() << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ - } - - F(madd_w, 0, 0b000, 0) - F(msub_w, 0, 0b000, 1) - F(madd, 1, 0b000, 0) - F(msub, 1, 0b000, 1) - - F(smaddl, 1, 0b001, 0) - F(smsubl, 1, 0b001, 1) - F(umaddl, 1, 0b101, 0) - F(umsubl, 1, 0b101, 1) -#undef F - - void mul(Register rd, Register rn, Register rm) { - madd(rd, rn, rm, ZR); - } - - void mul_w(Register rd, Register rn, Register rm) { - madd_w(rd, rn, rm, ZR); - } - -#define F(mnemonic, sf, op31, o0) \ - void mnemonic(Register rd, Register rn, Register rm) { \ - emit_int32(sf << 31 | 0b11011 << 24 | op31 << 21 | rm->encoding_with_zr() << 16 | \ - o0 << 15 | 0b11111 << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ - } - - F(smulh, 1, 0b010, 0) - F(umulh, 1, 0b110, 0) -#undef F - -#define F(mnemonic, op) \ - void mnemonic(Register rd, address addr) { \ - intx offset; \ - if (op == 0) { \ - offset = addr - pc(); \ - } else { \ - offset = (((intx)addr) - (((intx)pc()) & ~0xfff)) >> 12; \ - } \ - assert (is_imm_in_range(offset, 21, 0), "offset is out of range"); \ - emit_int32(op << 31 | (offset & 3) << 29 | 0b10000 << 24 | \ - encode_imm(offset >> 2, 19, 0, 5) | rd->encoding_with_zr()); \ - } \ - - F(adr, 0) - F(adrp, 1) -#undef F - - void adr(Register rd, Label& L) { - adr(rd, target(L)); - } - -#define F(mnemonic, sf, opc, N) \ - void mnemonic(Register rd, Register rn, int immr, int imms) { \ - assert ((immr >> (5 + sf)) == 0, "immr is out of range"); \ - assert ((imms >> (5 + sf)) == 0, "imms is out of range"); \ - emit_int32(sf << 31 | opc << 29 | 0b100110 << 23 | N << 22 | immr << 16 | \ - imms << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ - } - - F(sbfm_w, 0, 0b00, 0) - F(bfm_w, 0, 0b01, 0) - F(ubfm_w, 0, 0b10, 0) - - F(sbfm, 1, 0b00, 1) - F(bfm, 1, 0b01, 1) - F(ubfm, 1, 0b10, 1) -#undef F - -#define F(alias, mnemonic, sf, immr, imms) \ - void alias(Register rd, Register rn, int lsb, int width) { \ - assert ((lsb >> (5 + sf)) == 0, "lsb is out of range"); \ - assert ((1 <= width) && (width <= (32 << sf) - lsb), "width is out of range"); \ - mnemonic(rd, rn, immr, imms); \ - } - - F(bfi_w, bfm_w, 0, (-lsb) & 0x1f, width - 1) - F(bfi, bfm, 1, (-lsb) & 0x3f, width - 1) - F(bfxil_w, bfm_w, 0, lsb, lsb + width - 1) - F(bfxil, bfm, 1, lsb, lsb + width - 1) - F(sbfiz_w, sbfm_w, 0, (-lsb) & 0x1f, width - 1) - F(sbfiz, sbfm, 1, (-lsb) & 0x3f, width - 1) - F(sbfx_w, sbfm_w, 0, lsb, lsb + width - 1) - F(sbfx, sbfm, 1, lsb, lsb + width - 1) - F(ubfiz_w, ubfm_w, 0, (-lsb) & 0x1f, width - 1) - F(ubfiz, ubfm, 1, (-lsb) & 0x3f, width - 1) - F(ubfx_w, ubfm_w, 0, lsb, lsb + width - 1) - F(ubfx, ubfm, 1, lsb, lsb + width - 1) -#undef F - -#define F(alias, mnemonic, sf, immr, imms) \ - void alias(Register rd, Register rn, int shift) { \ - assert ((shift >> (5 + sf)) == 0, "shift is out of range"); \ - mnemonic(rd, rn, immr, imms); \ - } - - F(_asr_w, sbfm_w, 0, shift, 31) - F(_asr, sbfm, 1, shift, 63) - F(_lsl_w, ubfm_w, 0, (-shift) & 0x1f, 31 - shift) - F(_lsl, ubfm, 1, (-shift) & 0x3f, 63 - shift) - F(_lsr_w, ubfm_w, 0, shift, 31) - F(_lsr, ubfm, 1, shift, 63) -#undef F - -#define F(alias, mnemonic, immr, imms) \ - void alias(Register rd, Register rn) { \ - mnemonic(rd, rn, immr, imms); \ - } - - F(sxtb_w, sbfm_w, 0, 7) - F(sxtb, sbfm, 0, 7) - F(sxth_w, sbfm_w, 0, 15) - F(sxth, sbfm, 0, 15) - F(sxtw, sbfm, 0, 31) - F(uxtb_w, ubfm_w, 0, 7) - F(uxtb, ubfm, 0, 7) - F(uxth_w, ubfm_w, 0, 15) - F(uxth, ubfm, 0, 15) -#undef F - - // Branch instructions - -#define F(mnemonic, op) \ - void mnemonic(Register rn) { \ - emit_int32(0b1101011 << 25 | op << 21 | 0b11111 << 16 | rn->encoding_with_zr() << 5); \ - } - - F(br, 0b00) - F(blr, 0b01) - F(ret, 0b10) -#undef F - - void ret() { - ret(LR); - } - -#define F(mnemonic, op) \ - void mnemonic(address target) { \ - intx offset = target - pc(); \ - assert (is_offset_in_range(offset, 26), "offset is out of range"); \ - emit_int32(op << 31 | 0b00101 << 26 | encode_offset(offset, 26, 0)); \ - } - - F(b, 0) - F(bl, 1) -#undef F - - void b(address target, AsmCondition cond) { - if (cond == al) { - b(target); - } else { - intx offset = target - pc(); - assert (is_offset_in_range(offset, 19), "offset is out of range"); - emit_int32(0b0101010 << 25 | encode_offset(offset, 19, 5) | cond); - } - } - - -#define F(mnemonic, sf, op) \ - void mnemonic(Register rt, address target) { \ - intx offset = target - pc(); \ - assert (is_offset_in_range(offset, 19), "offset is out of range"); \ - emit_int32(sf << 31 | 0b011010 << 25 | op << 24 | encode_offset(offset, 19, 5) | rt->encoding_with_zr()); \ - } \ - - F(cbz_w, 0, 0) - F(cbnz_w, 0, 1) - F(cbz, 1, 0) - F(cbnz, 1, 1) -#undef F - -#define F(mnemonic, op) \ - void mnemonic(Register rt, int bit, address target) { \ - intx offset = target - pc(); \ - assert (is_offset_in_range(offset, 14), "offset is out of range"); \ - assert (0 <= bit && bit < 64, "bit number is out of range"); \ - emit_int32((bit >> 5) << 31 | 0b011011 << 25 | op << 24 | (bit & 0x1f) << 19 | \ - encode_offset(offset, 14, 5) | rt->encoding_with_zr()); \ - } \ - - F(tbz, 0) - F(tbnz, 1) -#undef F - - // System instructions - - enum DMB_Opt { - DMB_ld = 0b1101, - DMB_st = 0b1110, - DMB_all = 0b1111 - }; - -#define F(mnemonic, L, op0, op1, CRn, op2, Rt) \ - void mnemonic(DMB_Opt option) { \ - emit_int32(0b1101010100 << 22 | L << 21 | op0 << 19 | op1 << 16 | \ - CRn << 12 | option << 8 | op2 << 5 | Rt); \ - } - - F(dsb, 0, 0b00, 0b011, 0b0011, 0b100, 0b11111) - F(dmb, 0, 0b00, 0b011, 0b0011, 0b101, 0b11111) -#undef F - -#define F(mnemonic, L, op0, op1, CRn, Rt) \ - void mnemonic(int imm) { \ - assert ((imm >> 7) == 0, "immediate is out of range"); \ - emit_int32(0b1101010100 << 22 | L << 21 | op0 << 19 | op1 << 16 | \ - CRn << 12 | imm << 5 | Rt); \ - } - - F(hint, 0, 0b00, 0b011, 0b0010, 0b11111) -#undef F - - void nop() { - hint(0); - } - - void yield() { - hint(1); - } - -#define F(mnemonic, opc, op2, LL) \ - void mnemonic(int imm = 0) { \ - assert ((imm >> 16) == 0, "immediate is out of range"); \ - emit_int32(0b11010100 << 24 | opc << 21 | imm << 5 | op2 << 2 | LL); \ - } - - F(brk, 0b001, 0b000, 0b00) - F(hlt, 0b010, 0b000, 0b00) - F(dpcs1, 0b101, 0b000, 0b01) -#undef F - - enum SystemRegister { // o0<1> op1<3> CRn<4> CRm<4> op2<3> - SysReg_NZCV = 0b101101000010000, - SysReg_FPCR = 0b101101000100000, - }; - - void mrs(Register rt, SystemRegister systemReg) { - assert ((systemReg >> 15) == 0, "systemReg is out of range"); - emit_int32(0b110101010011 << 20 | systemReg << 5 | rt->encoding_with_zr()); - } - - void msr(SystemRegister systemReg, Register rt) { - assert ((systemReg >> 15) == 0, "systemReg is out of range"); - emit_int32(0b110101010001 << 20 | systemReg << 5 | rt->encoding_with_zr()); - } - - // Floating-point instructions - -#define F(mnemonic, M, S, type, opcode2) \ - void mnemonic(FloatRegister rn, FloatRegister rm) { \ - emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ - rm->encoding() << 16 | 0b1000 << 10 | rn->encoding() << 5 | opcode2); \ - } - - F(fcmp_s, 0, 0, 0b00, 0b00000) - F(fcmpe_s, 0, 0, 0b00, 0b01000) - F(fcmp_d, 0, 0, 0b01, 0b00000) - F(fcmpe_d, 0, 0, 0b01, 0b10000) -#undef F - -#define F(mnemonic, M, S, type, opcode2) \ - void mnemonic(FloatRegister rn) { \ - emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ - 0b1000 << 10 | rn->encoding() << 5 | opcode2); \ - } - - F(fcmp0_s, 0, 0, 0b00, 0b01000) - F(fcmpe0_s, 0, 0, 0b00, 0b11000) - F(fcmp0_d, 0, 0, 0b01, 0b01000) - F(fcmpe0_d, 0, 0, 0b01, 0b11000) -#undef F - -#define F(mnemonic, M, S, type, op) \ - void mnemonic(FloatRegister rn, FloatRegister rm, int nzcv, AsmCondition cond) { \ - assert ((nzcv >> 4) == 0, "illegal nzcv"); \ - emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ - rm->encoding() << 16 | cond << 12 | 0b01 << 10 | rn->encoding() << 5 | op << 4 | nzcv); \ - } - - F(fccmp_s, 0, 0, 0b00, 0) - F(fccmpe_s, 0, 0, 0b00, 1) - F(fccmp_d, 0, 0, 0b01, 0) - F(fccmpe_d, 0, 0, 0b01, 1) -#undef F - -#define F(mnemonic, M, S, type) \ - void mnemonic(FloatRegister rd, FloatRegister rn, FloatRegister rm, AsmCondition cond) { \ - emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ - rm->encoding() << 16 | cond << 12 | 0b11 << 10 | rn->encoding() << 5 | rd->encoding()); \ - } - - F(fcsel_s, 0, 0, 0b00) - F(fcsel_d, 0, 0, 0b01) -#undef F - -#define F(mnemonic, M, S, type, opcode) \ - void mnemonic(FloatRegister rd, FloatRegister rn) { \ - emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ - opcode << 15 | 0b10000 << 10 | rn->encoding() << 5 | rd->encoding()); \ - } - - F(fmov_s, 0, 0, 0b00, 0b000000) - F(fabs_s, 0, 0, 0b00, 0b000001) - F(fneg_s, 0, 0, 0b00, 0b000010) - F(fsqrt_s, 0, 0, 0b00, 0b000011) - F(fcvt_ds, 0, 0, 0b00, 0b000101) - F(fcvt_hs, 0, 0, 0b00, 0b000111) - F(frintn_s, 0, 0, 0b00, 0b001000) - F(frintp_s, 0, 0, 0b00, 0b001001) - F(frintm_s, 0, 0, 0b00, 0b001010) - F(frintz_s, 0, 0, 0b00, 0b001011) - F(frinta_s, 0, 0, 0b00, 0b001100) - F(frintx_s, 0, 0, 0b00, 0b001110) - F(frinti_s, 0, 0, 0b00, 0b001111) - - F(fmov_d, 0, 0, 0b01, 0b000000) - F(fabs_d, 0, 0, 0b01, 0b000001) - F(fneg_d, 0, 0, 0b01, 0b000010) - F(fsqrt_d, 0, 0, 0b01, 0b000011) - F(fcvt_sd, 0, 0, 0b01, 0b000100) - F(fcvt_hd, 0, 0, 0b01, 0b000111) - F(frintn_d, 0, 0, 0b01, 0b001000) - F(frintp_d, 0, 0, 0b01, 0b001001) - F(frintm_d, 0, 0, 0b01, 0b001010) - F(frintz_d, 0, 0, 0b01, 0b001011) - F(frinta_d, 0, 0, 0b01, 0b001100) - F(frintx_d, 0, 0, 0b01, 0b001110) - F(frinti_d, 0, 0, 0b01, 0b001111) - - F(fcvt_sh, 0, 0, 0b11, 0b000100) - F(fcvt_dh, 0, 0, 0b11, 0b000101) -#undef F - -#define F(mnemonic, M, S, type, opcode) \ - void mnemonic(FloatRegister rd, FloatRegister rn, FloatRegister rm) { \ - emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ - rm->encoding() << 16 | opcode << 12 | 0b10 << 10 | rn->encoding() << 5 | rd->encoding()); \ - } - - F(fmul_s, 0, 0, 0b00, 0b0000) - F(fdiv_s, 0, 0, 0b00, 0b0001) - F(fadd_s, 0, 0, 0b00, 0b0010) - F(fsub_s, 0, 0, 0b00, 0b0011) - F(fmax_s, 0, 0, 0b00, 0b0100) - F(fmin_s, 0, 0, 0b00, 0b0101) - F(fmaxnm_s, 0, 0, 0b00, 0b0110) - F(fminnm_s, 0, 0, 0b00, 0b0111) - F(fnmul_s, 0, 0, 0b00, 0b1000) - - F(fmul_d, 0, 0, 0b01, 0b0000) - F(fdiv_d, 0, 0, 0b01, 0b0001) - F(fadd_d, 0, 0, 0b01, 0b0010) - F(fsub_d, 0, 0, 0b01, 0b0011) - F(fmax_d, 0, 0, 0b01, 0b0100) - F(fmin_d, 0, 0, 0b01, 0b0101) - F(fmaxnm_d, 0, 0, 0b01, 0b0110) - F(fminnm_d, 0, 0, 0b01, 0b0111) - F(fnmul_d, 0, 0, 0b01, 0b1000) -#undef F - -#define F(mnemonic, M, S, type, o1, o0) \ - void mnemonic(FloatRegister rd, FloatRegister rn, FloatRegister rm, FloatRegister ra) { \ - emit_int32(M << 31 | S << 29 | 0b11111 << 24 | type << 22 | o1 << 21 | rm->encoding() << 16 | \ - o0 << 15 | ra->encoding() << 10 | rn->encoding() << 5 | rd->encoding()); \ - } - - F(fmadd_s, 0, 0, 0b00, 0, 0) - F(fmsub_s, 0, 0, 0b00, 0, 1) - F(fnmadd_s, 0, 0, 0b00, 1, 0) - F(fnmsub_s, 0, 0, 0b00, 1, 1) - - F(fmadd_d, 0, 0, 0b01, 0, 0) - F(fmsub_d, 0, 0, 0b01, 0, 1) - F(fnmadd_d, 0, 0, 0b01, 1, 0) - F(fnmsub_d, 0, 0, 0b01, 1, 1) -#undef F - -#define F(mnemonic, M, S, type) \ - void mnemonic(FloatRegister rd, int imm8) { \ - assert ((imm8 >> 8) == 0, "immediate is out of range"); \ - emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ - imm8 << 13 | 0b100 << 10 | rd->encoding()); \ - } - - F(fmov_s, 0, 0, 0b00) - F(fmov_d, 0, 0, 0b01) -#undef F - -#define F(mnemonic, sf, S, type, rmode, opcode) \ - void mnemonic(Register rd, FloatRegister rn) { \ - emit_int32(sf << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ - rmode << 19 | opcode << 16 | rn->encoding() << 5 | rd->encoding_with_zr()); \ - } - - F(fcvtns_ws, 0, 0, 0b00, 0b00, 0b000) - F(fcvtnu_ws, 0, 0, 0b00, 0b00, 0b001) - F(fcvtas_ws, 0, 0, 0b00, 0b00, 0b100) - F(fcvtau_ws, 0, 0, 0b00, 0b00, 0b101) - F(fmov_ws, 0, 0, 0b00, 0b00, 0b110) - F(fcvtps_ws, 0, 0, 0b00, 0b01, 0b000) - F(fcvtpu_ws, 0, 0, 0b00, 0b01, 0b001) - F(fcvtms_ws, 0, 0, 0b00, 0b10, 0b000) - F(fcvtmu_ws, 0, 0, 0b00, 0b10, 0b001) - F(fcvtzs_ws, 0, 0, 0b00, 0b11, 0b000) - F(fcvtzu_ws, 0, 0, 0b00, 0b11, 0b001) - - F(fcvtns_wd, 0, 0, 0b01, 0b00, 0b000) - F(fcvtnu_wd, 0, 0, 0b01, 0b00, 0b001) - F(fcvtas_wd, 0, 0, 0b01, 0b00, 0b100) - F(fcvtau_wd, 0, 0, 0b01, 0b00, 0b101) - F(fcvtps_wd, 0, 0, 0b01, 0b01, 0b000) - F(fcvtpu_wd, 0, 0, 0b01, 0b01, 0b001) - F(fcvtms_wd, 0, 0, 0b01, 0b10, 0b000) - F(fcvtmu_wd, 0, 0, 0b01, 0b10, 0b001) - F(fcvtzs_wd, 0, 0, 0b01, 0b11, 0b000) - F(fcvtzu_wd, 0, 0, 0b01, 0b11, 0b001) - - F(fcvtns_xs, 1, 0, 0b00, 0b00, 0b000) - F(fcvtnu_xs, 1, 0, 0b00, 0b00, 0b001) - F(fcvtas_xs, 1, 0, 0b00, 0b00, 0b100) - F(fcvtau_xs, 1, 0, 0b00, 0b00, 0b101) - F(fcvtps_xs, 1, 0, 0b00, 0b01, 0b000) - F(fcvtpu_xs, 1, 0, 0b00, 0b01, 0b001) - F(fcvtms_xs, 1, 0, 0b00, 0b10, 0b000) - F(fcvtmu_xs, 1, 0, 0b00, 0b10, 0b001) - F(fcvtzs_xs, 1, 0, 0b00, 0b11, 0b000) - F(fcvtzu_xs, 1, 0, 0b00, 0b11, 0b001) - - F(fcvtns_xd, 1, 0, 0b01, 0b00, 0b000) - F(fcvtnu_xd, 1, 0, 0b01, 0b00, 0b001) - F(fcvtas_xd, 1, 0, 0b01, 0b00, 0b100) - F(fcvtau_xd, 1, 0, 0b01, 0b00, 0b101) - F(fmov_xd, 1, 0, 0b01, 0b00, 0b110) - F(fcvtps_xd, 1, 0, 0b01, 0b01, 0b000) - F(fcvtpu_xd, 1, 0, 0b01, 0b01, 0b001) - F(fcvtms_xd, 1, 0, 0b01, 0b10, 0b000) - F(fcvtmu_xd, 1, 0, 0b01, 0b10, 0b001) - F(fcvtzs_xd, 1, 0, 0b01, 0b11, 0b000) - F(fcvtzu_xd, 1, 0, 0b01, 0b11, 0b001) - - F(fmov_xq, 1, 0, 0b10, 0b01, 0b110) -#undef F - -#define F(mnemonic, sf, S, type, rmode, opcode) \ - void mnemonic(FloatRegister rd, Register rn) { \ - emit_int32(sf << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ - rmode << 19 | opcode << 16 | rn->encoding_with_zr() << 5 | rd->encoding()); \ - } - - F(scvtf_sw, 0, 0, 0b00, 0b00, 0b010) - F(ucvtf_sw, 0, 0, 0b00, 0b00, 0b011) - F(fmov_sw, 0, 0, 0b00, 0b00, 0b111) - F(scvtf_dw, 0, 0, 0b01, 0b00, 0b010) - F(ucvtf_dw, 0, 0, 0b01, 0b00, 0b011) - - F(scvtf_sx, 1, 0, 0b00, 0b00, 0b010) - F(ucvtf_sx, 1, 0, 0b00, 0b00, 0b011) - F(scvtf_dx, 1, 0, 0b01, 0b00, 0b010) - F(ucvtf_dx, 1, 0, 0b01, 0b00, 0b011) - F(fmov_dx, 1, 0, 0b01, 0b00, 0b111) - - F(fmov_qx, 1, 0, 0b10, 0b01, 0b111) -#undef F - -#define F(mnemonic, opcode) \ - void mnemonic(FloatRegister Vd, FloatRegister Vn) { \ - emit_int32( opcode << 10 | Vn->encoding() << 5 | Vd->encoding()); \ - } - - F(aese, 0b0100111000101000010010); - F(aesd, 0b0100111000101000010110); - F(aesmc, 0b0100111000101000011010); - F(aesimc, 0b0100111000101000011110); -#undef F - -#ifdef COMPILER2 - typedef VFP::double_num double_num; - typedef VFP::float_num float_num; -#endif - - void vcnt(FloatRegister Dd, FloatRegister Dn, int quad = 0, int size = 0) { - // emitted at VM startup to detect whether the instruction is available - assert(!VM_Version::is_initialized() || VM_Version::has_simd(), "simd instruction"); - assert(size == 0, "illegal size value"); - emit_int32(0x0e205800 | quad << 30 | size << 22 | Dn->encoding() << 5 | Dd->encoding()); - } - -#ifdef COMPILER2 - void addv(FloatRegister Dd, FloatRegister Dm, int quad, int size) { - // emitted at VM startup to detect whether the instruction is available - assert(VM_Version::has_simd(), "simd instruction"); - assert((quad & ~1) == 0, "illegal value"); - assert(size >= 0 && size < 3, "illegal value"); - assert(((size << 1) | quad) != 4, "illegal values (size 2, quad 0)"); - emit_int32(0x0e31b800 | quad << 30 | size << 22 | Dm->encoding() << 5 | Dd->encoding()); - } - - enum VElem_Size { - VELEM_SIZE_8 = 0x00, - VELEM_SIZE_16 = 0x01, - VELEM_SIZE_32 = 0x02, - VELEM_SIZE_64 = 0x03 - }; - - enum VLD_Type { - VLD1_TYPE_1_REG = 0b0111, - VLD1_TYPE_2_REGS = 0b1010, - VLD1_TYPE_3_REGS = 0b0110, - VLD1_TYPE_4_REGS = 0b0010 - }; - - enum VFloat_Arith_Size { - VFA_SIZE_F32 = 0b0, - VFA_SIZE_F64 = 0b1 - }; - -#define F(mnemonic, U, S, P) \ - void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm, \ - int size, int quad) { \ - assert(VM_Version::has_simd(), "simd instruction"); \ - assert(!(size == VFA_SIZE_F64 && !quad), "reserved"); \ - assert((size & 1) == size, "overflow"); \ - emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | \ - S << 23 | size << 22 | 1 << 21 | P << 11 | 1 << 10 | \ - fm->encoding() << 16 | \ - fn->encoding() << 5 | \ - fd->encoding()); \ - } - - F(vaddF, 0, 0, 0b11010) // Vd = Vn + Vm (float) - F(vsubF, 0, 1, 0b11010) // Vd = Vn - Vm (float) - F(vmulF, 1, 0, 0b11011) // Vd = Vn - Vm (float) - F(vdivF, 1, 0, 0b11111) // Vd = Vn / Vm (float) -#undef F - -#define F(mnemonic, U) \ - void mnemonic(FloatRegister fd, FloatRegister fm, FloatRegister fn, \ - int size, int quad) { \ - assert(VM_Version::has_simd(), "simd instruction"); \ - assert(!(size == VELEM_SIZE_64 && !quad), "reserved"); \ - assert((size & 0b11) == size, "overflow"); \ - int R = 0; /* rounding */ \ - int S = 0; /* saturating */ \ - emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | size << 22 | \ - 1 << 21 | R << 12 | S << 11 | 0b10001 << 10 | \ - fm->encoding() << 16 | \ - fn->encoding() << 5 | \ - fd->encoding()); \ - } - - F(vshlSI, 0) // Vd = ashift(Vn,Vm) (int) - F(vshlUI, 1) // Vd = lshift(Vn,Vm) (int) -#undef F - -#define F(mnemonic, U, P, M) \ - void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm, \ - int size, int quad) { \ - assert(VM_Version::has_simd(), "simd instruction"); \ - assert(!(size == VELEM_SIZE_64 && !quad), "reserved"); \ - assert(!(size == VELEM_SIZE_64 && M), "reserved"); \ - assert((size & 0b11) == size, "overflow"); \ - emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | size << 22 | \ - 1 << 21 | P << 11 | 1 << 10 | \ - fm->encoding() << 16 | \ - fn->encoding() << 5 | \ - fd->encoding()); \ - } - - F(vmulI, 0, 0b10011, true) // Vd = Vn * Vm (int) - F(vaddI, 0, 0b10000, false) // Vd = Vn + Vm (int) - F(vsubI, 1, 0b10000, false) // Vd = Vn - Vm (int) -#undef F - -#define F(mnemonic, U, O) \ - void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm, \ - int quad) { \ - assert(VM_Version::has_simd(), "simd instruction"); \ - emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | O << 22 | \ - 1 << 21 | 0b00011 << 11 | 1 << 10 | \ - fm->encoding() << 16 | \ - fn->encoding() << 5 | \ - fd->encoding()); \ - } - - F(vandI, 0, 0b00) // Vd = Vn & Vm (int) - F(vorI, 0, 0b10) // Vd = Vn | Vm (int) - F(vxorI, 1, 0b00) // Vd = Vn ^ Vm (int) -#undef F - - void vnegI(FloatRegister fd, FloatRegister fn, int size, int quad) { - int U = 1; - assert(VM_Version::has_simd(), "simd instruction"); - assert(quad || size != VELEM_SIZE_64, "reserved"); - emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | - size << 22 | 0b100000101110 << 10 | - fn->encoding() << 5 | - fd->encoding() << 0); - } - - void vshli(FloatRegister fd, FloatRegister fn, int esize, int imm, int quad) { - assert(VM_Version::has_simd(), "simd instruction"); - - if (imm >= esize) { - // maximum shift gives all zeroes, direction doesn't matter, - // but only available for shift right - vshri(fd, fn, esize, esize, true /* unsigned */, quad); - return; - } - assert(imm >= 0 && imm < esize, "out of range"); - - int imm7 = esize + imm; - int immh = imm7 >> 3; - assert(immh != 0, "encoding constraint"); - assert((uint)immh < 16, "sanity"); - assert(((immh >> 2) | quad) != 0b10, "reserved"); - emit_int32(quad << 30 | 0b011110 << 23 | imm7 << 16 | - 0b010101 << 10 | fn->encoding() << 5 | fd->encoding() << 0); - } - - void vshri(FloatRegister fd, FloatRegister fn, int esize, int imm, - bool U /* unsigned */, int quad) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(imm > 0, "out of range"); - if (imm >= esize) { - // maximum shift (all zeroes) - imm = esize; - } - int imm7 = 2 * esize - imm ; - int immh = imm7 >> 3; - assert(immh != 0, "encoding constraint"); - assert((uint)immh < 16, "sanity"); - assert(((immh >> 2) | quad) != 0b10, "reserved"); - emit_int32(quad << 30 | U << 29 | 0b011110 << 23 | imm7 << 16 | - 0b000001 << 10 | fn->encoding() << 5 | fd->encoding() << 0); - } - void vshrUI(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) { - vshri(fd, fm, size, imm, true /* unsigned */, quad); - } - void vshrSI(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) { - vshri(fd, fm, size, imm, false /* signed */, quad); - } - - void vld1(FloatRegister Vt, Address addr, VElem_Size size, int bits) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(bits == 128, "unsupported"); - assert(addr.disp() == 0 || addr.disp() == 16, "must be"); - int type = 0b11; // 2D - int quad = 1; - int L = 1; - int opcode = VLD1_TYPE_1_REG; - emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | - Vt->encoding() << 0 | addr.encoding_simd()); - } - - void vst1(FloatRegister Vt, Address addr, VElem_Size size, int bits) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(bits == 128, "unsupported"); - assert(addr.disp() == 0 || addr.disp() == 16, "must be"); - int type = 0b11; // 2D - int quad = 1; - int L = 0; - int opcode = VLD1_TYPE_1_REG; - emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | - Vt->encoding() << 0 | addr.encoding_simd()); - } - - void vld1(FloatRegister Vt, FloatRegister Vt2, Address addr, VElem_Size size, int bits) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(bits == 128, "unsupported"); - assert(Vt->successor() == Vt2, "Registers must be ordered"); - assert(addr.disp() == 0 || addr.disp() == 32, "must be"); - int type = 0b11; // 2D - int quad = 1; - int L = 1; - int opcode = VLD1_TYPE_2_REGS; - emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | - Vt->encoding() << 0 | addr.encoding_simd()); - } - - void vst1(FloatRegister Vt, FloatRegister Vt2, Address addr, VElem_Size size, int bits) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(Vt->successor() == Vt2, "Registers must be ordered"); - assert(bits == 128, "unsupported"); - assert(addr.disp() == 0 || addr.disp() == 32, "must be"); - int type = 0b11; // 2D - int quad = 1; - int L = 0; - int opcode = VLD1_TYPE_2_REGS; - emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | - Vt->encoding() << 0 | addr.encoding_simd()); - } - - void vld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, - Address addr, VElem_Size size, int bits) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(bits == 128, "unsupported"); - assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, - "Registers must be ordered"); - assert(addr.disp() == 0 || addr.disp() == 48, "must be"); - int type = 0b11; // 2D - int quad = 1; - int L = 1; - int opcode = VLD1_TYPE_3_REGS; - emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | - Vt->encoding() << 0 | addr.encoding_simd()); - } - - void vst1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, - Address addr, VElem_Size size, int bits) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(bits == 128, "unsupported"); - assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, - "Registers must be ordered"); - assert(addr.disp() == 0 || addr.disp() == 48, "must be"); - int type = 0b11; // 2D - int quad = 1; - int L = 0; - int opcode = VLD1_TYPE_3_REGS; - emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | - Vt->encoding() << 0 | addr.encoding_simd()); - } - - void vld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, - FloatRegister Vt4, Address addr, VElem_Size size, int bits) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(bits == 128, "unsupported"); - assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && - Vt3->successor() == Vt4, "Registers must be ordered"); - assert(addr.disp() == 0 || addr.disp() == 64, "must be"); - int type = 0b11; // 2D - int quad = 1; - int L = 1; - int opcode = VLD1_TYPE_4_REGS; - emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | - Vt->encoding() << 0 | addr.encoding_simd()); - } - - void vst1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, - FloatRegister Vt4, Address addr, VElem_Size size, int bits) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(bits == 128, "unsupported"); - assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && - Vt3->successor() == Vt4, "Registers must be ordered"); - assert(addr.disp() == 0 || addr.disp() == 64, "must be"); - int type = 0b11; // 2D - int quad = 1; - int L = 0; - int opcode = VLD1_TYPE_4_REGS; - emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | - Vt->encoding() << 0 | addr.encoding_simd()); - } - - void rev32(FloatRegister Vd, FloatRegister Vn, VElem_Size size, int quad) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(size == VELEM_SIZE_8 || size == VELEM_SIZE_16, "must be"); - emit_int32(quad << 30 | 0b101110 << 24 | size << 22 | - 0b100000000010 << 10 | Vn->encoding() << 5 | Vd->encoding()); - } - - void eor(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, VElem_Size size, int quad) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(size == VELEM_SIZE_8, "must be"); - emit_int32(quad << 30 | 0b101110001 << 21 | Vm->encoding() << 16 | - 0b000111 << 10 | Vn->encoding() << 5 | Vd->encoding()); - } - - void orr(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, VElem_Size size, int quad) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(size == VELEM_SIZE_8, "must be"); - emit_int32(quad << 30 | 0b001110101 << 21 | Vm->encoding() << 16 | - 0b000111 << 10 | Vn->encoding() << 5 | Vd->encoding()); - } - - void vmovI(FloatRegister Dd, int imm8, VElem_Size size, int quad) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(imm8 >= 0 && imm8 < 256, "out of range"); - int op; - int cmode; - switch (size) { - case VELEM_SIZE_8: - op = 0; - cmode = 0b1110; - break; - case VELEM_SIZE_16: - op = 0; - cmode = 0b1000; - break; - case VELEM_SIZE_32: - op = 0; - cmode = 0b0000; - break; - default: - cmode = 0; - ShouldNotReachHere(); - } - int abc = imm8 >> 5; - int defgh = imm8 & 0b11111; - emit_int32(quad << 30 | op << 29 | 0b1111 << 24 | - abc << 16 | cmode << 12 | 0b01 << 10 | - defgh << 5 | Dd->encoding() << 0); - } - - void vdupI(FloatRegister Dd, Register Rn, VElem_Size size, int quad) { - assert(VM_Version::has_simd(), "simd instruction"); - assert(size <= 3, "unallocated encoding"); - assert(size != 3 || quad == 1, "reserved"); - int imm5 = 1 << size; -#ifdef ASSERT - switch (size) { - case VELEM_SIZE_8: - assert(imm5 == 0b00001, "sanity"); - break; - case VELEM_SIZE_16: - assert(imm5 == 0b00010, "sanity"); - break; - case VELEM_SIZE_32: - assert(imm5 == 0b00100, "sanity"); - break; - case VELEM_SIZE_64: - assert(imm5 == 0b01000, "sanity"); - break; - default: - ShouldNotReachHere(); - } -#endif - emit_int32(quad << 30 | 0b111 << 25 | 0b11 << 10 | - imm5 << 16 | Rn->encoding() << 5 | - Dd->encoding() << 0); - } - - void vdup(FloatRegister Vd, FloatRegister Vn, VElem_Size size, int quad) { - assert(VM_Version::has_simd(), "simd instruction"); - int index = 0; - int bytes = 1 << size; - int range = 16 / bytes; - assert(index < range, "overflow"); - - assert(size != VELEM_SIZE_64 || quad, "reserved"); - assert(8 << VELEM_SIZE_8 == 8, "sanity"); - assert(8 << VELEM_SIZE_16 == 16, "sanity"); - assert(8 << VELEM_SIZE_32 == 32, "sanity"); - assert(8 << VELEM_SIZE_64 == 64, "sanity"); - - int imm5 = (index << (size + 1)) | bytes; - - emit_int32(quad << 30 | 0b001110000 << 21 | imm5 << 16 | 0b000001 << 10 | - Vn->encoding() << 5 | Vd->encoding() << 0); - } - - void vdupF(FloatRegister Vd, FloatRegister Vn, int quad) { - vdup(Vd, Vn, VELEM_SIZE_32, quad); - } - - void vdupD(FloatRegister Vd, FloatRegister Vn, int quad) { - vdup(Vd, Vn, VELEM_SIZE_64, quad); - } -#endif -}; - - -#endif // CPU_ARM_VM_ASSEMBLER_ARM_64_HPP --- old/src/hotspot/cpu/arm/nativeInst_arm_64.cpp 2018-09-17 10:31:13.372897178 -0400 +++ /dev/null 2018-04-28 00:25:57.886812021 -0400 @@ -1,243 +0,0 @@ -/* - * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "assembler_arm.inline.hpp" -#include "code/codeCache.hpp" -#include "memory/resourceArea.hpp" -#include "nativeInst_arm.hpp" -#include "oops/compressedOops.inline.hpp" -#include "oops/klass.inline.hpp" -#include "oops/oop.hpp" -#include "runtime/handles.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/stubRoutines.hpp" -#include "utilities/ostream.hpp" -#ifdef COMPILER1 -#include "c1/c1_Runtime1.hpp" -#endif - -void RawNativeInstruction::verify() { - // make sure code pattern is actually an instruction address - address addr = instruction_address(); - if (addr == NULL || ((intptr_t)addr & (instruction_size - 1)) != 0) { - fatal("not an instruction address"); - } -} - -void NativeMovRegMem::set_offset(int x) { - int scale = get_offset_scale(); - assert((x & right_n_bits(scale)) == 0, "offset should be aligned"); - guarantee((x >> 24) == 0, "encoding constraint"); - - if (Assembler::is_unsigned_imm_in_range(x, 12, scale)) { - set_unsigned_imm(x, 12, get_offset_scale(), 10); - return; - } - - // If offset is too large to be placed into single ldr/str instruction, we replace - // ldr/str Rt, [Rn, #offset] - // nop - // with - // add LR, Rn, #offset_hi - // ldr/str Rt, [LR, #offset_lo] - - // Note: Rtemp cannot be used as a temporary register as it could be used - // for value being stored (see LIR_Assembler::reg2mem). - // Patchable NativeMovRegMem instructions are generated in LIR_Assembler::mem2reg and LIR_Assembler::reg2mem - // which do not use LR, so it is free. Also, it does not conflict with LR usages in c1_LIRGenerator_arm.cpp. - const int tmp = LR->encoding(); - const int rn = (encoding() >> 5) & 0x1f; - - NativeInstruction* next = nativeInstruction_at(next_raw_instruction_address()); - assert(next->is_nop(), "must be"); - - next->set_encoding((encoding() & 0xffc0001f) | Assembler::encode_unsigned_imm((x & 0xfff), 12, scale, 10) | tmp << 5); - this->set_encoding(0x91400000 | Assembler::encode_unsigned_imm((x >> 12), 12, 0, 10) | rn << 5 | tmp); -} - -intptr_t NativeMovConstReg::_data() const { -#ifdef COMPILER2 - if (is_movz()) { - // narrow constant or ic call cached value - RawNativeInstruction* ni = next_raw(); - assert(ni->is_movk(), "movz;movk expected"); - uint lo16 = (encoding() >> 5) & 0xffff; - intptr_t hi = 0; - int i = 0; - while (ni->is_movk() && i < 3) { - uint hi16 = (ni->encoding() >> 5) & 0xffff; - int shift = ((ni->encoding() >> 21) & 0x3) << 4; - hi |= (intptr_t)hi16 << shift; - ni = ni->next_raw(); - ++i; - } - return lo16 | hi; - } -#endif - return (intptr_t)(nativeLdrLiteral_at(instruction_address())->literal_value()); -} - -static void raw_set_data(RawNativeInstruction* si, intptr_t x, oop* oop_addr, Metadata** metadata_addr) { -#ifdef COMPILER2 - if (si->is_movz()) { - // narrow constant or ic call cached value - uintptr_t nx = 0; - int val_size = 32; - if (oop_addr != NULL) { - narrowOop encoded_oop = CompressedOops::encode(*oop_addr); - nx = encoded_oop; - } else if (metadata_addr != NULL) { - assert((*metadata_addr)->is_klass(), "expected Klass"); - narrowKlass encoded_k = Klass::encode_klass((Klass *)*metadata_addr); - nx = encoded_k; - } else { - nx = x; - val_size = 64; - } - RawNativeInstruction* ni = si->next_raw(); - uint lo16 = nx & 0xffff; - int shift = 16; - int imm16 = 0xffff << 5; - si->set_encoding((si->encoding() & ~imm16) | (lo16 << 5)); - while (shift < val_size) { - assert(ni->is_movk(), "movk expected"); - assert((((ni->encoding() >> 21) & 0x3) << 4) == shift, "wrong shift"); - uint hi16 = (nx >> shift) & 0xffff; - ni->set_encoding((ni->encoding() & ~imm16) | (hi16 << 5)); - shift += 16; - ni = ni->next_raw(); - } - return; - } -#endif - - assert(si->is_ldr_literal(), "should be"); - - if (oop_addr == NULL && metadata_addr == NULL) { - // A static ldr_literal without oop_relocation - nativeLdrLiteral_at(si->instruction_address())->set_literal_value((address)x); - } else { - // Oop is loaded from oops section - address addr = oop_addr != NULL ? (address)oop_addr : (address)metadata_addr; - int offset = addr - si->instruction_address(); - - assert((((intptr_t)addr) & 0x7) == 0, "target address should be aligned"); - assert((offset & 0x3) == 0, "offset should be aligned"); - - guarantee(Assembler::is_offset_in_range(offset, 19), "offset is not in range"); - nativeLdrLiteral_at(si->instruction_address())->set_literal_address(si->instruction_address() + offset); - } -} - -void NativeMovConstReg::set_data(intptr_t x) { - // Find and replace the oop corresponding to this instruction in oops section - oop* oop_addr = NULL; - Metadata** metadata_addr = NULL; - CodeBlob* cb = CodeCache::find_blob(instruction_address()); - { - nmethod* nm = cb->as_nmethod_or_null(); - if (nm != NULL) { - RelocIterator iter(nm, instruction_address(), next_raw()->instruction_address()); - while (iter.next()) { - if (iter.type() == relocInfo::oop_type) { - oop_addr = iter.oop_reloc()->oop_addr(); - *oop_addr = cast_to_oop(x); - break; - } else if (iter.type() == relocInfo::metadata_type) { - metadata_addr = iter.metadata_reloc()->metadata_addr(); - *metadata_addr = (Metadata*)x; - break; - } - } - } - } - raw_set_data(adjust(this), x, oop_addr, metadata_addr); -} - -void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { -} - -void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { - assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "should be"); - - NativeInstruction* instr = nativeInstruction_at(verified_entry); - assert(instr->is_nop() || instr->encoding() == zombie_illegal_instruction, "required for MT-safe patching"); - instr->set_encoding(zombie_illegal_instruction); -} - -void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { - assert (nativeInstruction_at(instr_addr)->is_b(), "MT-safe patching of arbitrary instructions is not allowed"); - assert (nativeInstruction_at(code_buffer)->is_nop(), "MT-safe patching of arbitrary instructions is not allowed"); - nativeInstruction_at(instr_addr)->set_encoding(*(int*)code_buffer); -} - -void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { - // Insert at code_pos unconditional B instruction jumping to entry - intx offset = entry - code_pos; - assert (Assembler::is_offset_in_range(offset, 26), "offset is out of range"); - - NativeInstruction* instr = nativeInstruction_at(code_pos); - assert (instr->is_b() || instr->is_nop(), "MT-safe patching of arbitrary instructions is not allowed"); - - instr->set_encoding(0x5 << 26 | Assembler::encode_offset(offset, 26, 0)); -} - -static address call_for(address return_address) { - CodeBlob* cb = CodeCache::find_blob(return_address); - nmethod* nm = cb->as_nmethod_or_null(); - if (nm == NULL) { - ShouldNotReachHere(); - return NULL; - } - - // Look back 8 instructions (for LIR_Assembler::ic_call and MacroAssembler::patchable_call) - address begin = return_address - 8*NativeInstruction::instruction_size; - if (begin < nm->code_begin()) { - begin = nm->code_begin(); - } - RelocIterator iter(nm, begin, return_address); - while (iter.next()) { - Relocation* reloc = iter.reloc(); - if (reloc->is_call()) { - address call = reloc->addr(); - if (nativeInstruction_at(call)->is_call()) { - if (nativeCall_at(call)->return_address() == return_address) { - return call; - } - } - } - } - - return NULL; -} - -bool NativeCall::is_call_before(address return_address) { - return (call_for(return_address) != NULL); -} - -NativeCall* nativeCall_before(address return_address) { - assert(NativeCall::is_call_before(return_address), "must be"); - return nativeCall_at(call_for(return_address)); -} --- old/src/hotspot/cpu/arm/nativeInst_arm_64.hpp 2018-09-17 10:31:14.507963475 -0400 +++ /dev/null 2018-04-28 00:25:57.886812021 -0400 @@ -1,771 +0,0 @@ -/* - * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef CPU_ARM_VM_NATIVEINST_ARM_64_HPP -#define CPU_ARM_VM_NATIVEINST_ARM_64_HPP - -#include "asm/macroAssembler.hpp" -#include "code/codeCache.hpp" -#include "runtime/icache.hpp" -#include "runtime/os.hpp" - -// ------------------------------------------------------------------- - -// Some experimental projects extend the ARM back-end by implementing -// what the front-end usually assumes is a single native instruction -// with a sequence of instructions. -// -// The 'Raw' variants are the low level initial code (usually one -// instruction wide but some of them were already composed -// instructions). They should be used only by the back-end. -// -// The non-raw classes are the front-end entry point, hiding potential -// back-end extensions or the actual instructions size. -class NativeInstruction; - -class RawNativeInstruction { - public: - - enum ARM_specific { - instruction_size = Assembler::InstructionSize, - instruction_size_in_bits = instruction_size * BitsPerByte, - }; - - // illegal instruction used by NativeJump::patch_verified_entry - static const int zombie_illegal_instruction = 0xd4000542; // hvc #42 - - address addr_at(int offset) const { return (address)this + offset; } - address instruction_address() const { return addr_at(0); } - address next_raw_instruction_address() const { return addr_at(instruction_size); } - - static RawNativeInstruction* at(address address) { - return (RawNativeInstruction*)address; - } - - RawNativeInstruction* next_raw() const { - return at(next_raw_instruction_address()); - } - - int encoding() const { - return *(int*)this; - } - - void set_encoding(int value) { - int old = encoding(); - if (old != value) { - *(int*)this = value; - ICache::invalidate_word((address)this); - } - } - - bool is_nop() const { return encoding() == (int)0xd503201f; } - bool is_b() const { return (encoding() & 0xfc000000) == 0x14000000; } // unconditional branch - bool is_b_cond() const { return (encoding() & 0xff000010) == 0x54000000; } // conditional branch - bool is_bl() const { return (encoding() & 0xfc000000) == 0x94000000; } - bool is_br() const { return (encoding() & 0xfffffc1f) == 0xd61f0000; } - bool is_blr() const { return (encoding() & 0xfffffc1f) == 0xd63f0000; } - bool is_ldr_literal() const { return (encoding() & 0xff000000) == 0x58000000; } - bool is_adr_aligned() const { return (encoding() & 0xff000000) == 0x10000000; } // adr Xn,