--- old/common/autoconf/flags.m4 2016-12-02 11:14:25.324327569 -0500 +++ new/common/autoconf/flags.m4 2016-12-02 11:14:23.720236604 -0500 @@ -23,6 +23,101 @@ # questions. # +################################################################################ +# +# Setup ABI profile (for arm) +# +AC_DEFUN([FLAGS_SETUP_ABI_PROFILE], +[ + AC_ARG_WITH(abi-profile, [AS_HELP_STRING([--with-abi-profile], + [specify ABI profile for ARM builds (arm-vfp-sflt,arm-vfp-hflt,arm-sflt, armv5-vfp-sflt,armv6-vfp-hflt,arm64,aarch64) @<:@toolchain dependent@:>@ ])]) + + if test "x$with_abi_profile" != x; then + if test "x$OPENJDK_TARGET_CPU" != xarm && \ + test "x$OPENJDK_TARGET_CPU" != xaarch64; then + AC_MSG_ERROR([--with-abi-profile only available on arm/aarch64]) + fi + + OPENJDK_TARGET_ABI_PROFILE=$with_abi_profile + AC_MSG_CHECKING([for ABI profle]) + AC_MSG_RESULT([$OPENJDK_TARGET_ABI_PROFILE]) + + if test "x$OPENJDK_TARGET_ABI_PROFILE" = xarm-vfp-sflt; then + ARM_FLOAT_TYPE=vfp-sflt + ARM_ARCH_TYPE_FLAGS='-march=armv7-a -mthumb' + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarm-vfp-hflt; then + ARM_FLOAT_TYPE=vfp-hflt + ARM_ARCH_TYPE_FLAGS='-march=armv7-a -mthumb' + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarm-sflt; then + ARM_FLOAT_TYPE=sflt + ARM_ARCH_TYPE_FLAGS='-march=armv5t -marm' + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarmv5-vfp-sflt; then + ARM_FLOAT_TYPE=vfp-sflt + ARM_ARCH_TYPE_FLAGS='-march=armv5t -marm' + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarmv6-vfp-hflt; then + ARM_FLOAT_TYPE=vfp-hflt + ARM_ARCH_TYPE_FLAGS='-march=armv6 -marm' + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarm64; then + # No special flags, just need to trigger setting JDK_ARCH_ABI_PROP_NAME + ARM_FLOAT_TYPE= + ARM_ARCH_TYPE_FLAGS= + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xaarch64; then + # No special flags, just need to trigger setting JDK_ARCH_ABI_PROP_NAME + ARM_FLOAT_TYPE= + ARM_ARCH_TYPE_FLAGS= + else + AC_MSG_ERROR([Invalid ABI profile: "$OPENJDK_TARGET_ABI_PROFILE"]) + fi + + if test "x$ARM_FLOAT_TYPE" = xvfp-sflt; then + ARM_FLOAT_TYPE_FLAGS='-mfloat-abi=softfp -mfpu=vfp -DFLOAT_ARCH=-vfp-sflt' + elif test "x$ARM_FLOAT_TYPE" = xvfp-hflt; then + ARM_FLOAT_TYPE_FLAGS='-mfloat-abi=hard -mfpu=vfp -DFLOAT_ARCH=-vfp-hflt' + elif test "x$ARM_FLOAT_TYPE" = xsflt; then + ARM_FLOAT_TYPE_FLAGS='-msoft-float -mfpu=vfp' + fi + AC_MSG_CHECKING([for $ARM_FLOAT_TYPE floating point flags]) + AC_MSG_RESULT([$ARM_FLOAT_TYPE_FLAGS]) + + AC_MSG_CHECKING([for arch type flags]) + AC_MSG_RESULT([$ARM_ARCH_TYPE_FLAGS]) + + # Now set JDK_ARCH_ABI_PROP_NAME. This is equivalent to the last part of the + # autoconf target triplet. + [ JDK_ARCH_ABI_PROP_NAME=`$ECHO $OPENJDK_TARGET_AUTOCONF_NAME | $SED -e 's/.*-\([^-]*\)$/\1/'` ] + # Sanity check that it is a known ABI. + if test "x$JDK_ARCH_ABI_PROP_NAME" != xgnu && \ + test "x$JDK_ARCH_ABI_PROP_NAME" != xgnueabi && \ + test "x$JDK_ARCH_ABI_PROP_NAME" != xgnueabihf; then + AC_MSG_WARN([Unknown autoconf target triplet ABI: "$JDK_ARCH_ABI_PROP_NAME"]) + fi + AC_MSG_CHECKING([for ABI property name]) + AC_MSG_RESULT([$JDK_ARCH_ABI_PROP_NAME]) + AC_SUBST(JDK_ARCH_ABI_PROP_NAME) + + # Pass these on to the open part of configure as if they were set using + # --with-extra-c[xx]flags. + EXTRA_CFLAGS="$EXTRA_CFLAGS $ARM_ARCH_TYPE_FLAGS $ARM_FLOAT_TYPE_FLAGS" + EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS $ARM_ARCH_TYPE_FLAGS $ARM_FLOAT_TYPE_FLAGS" + # Get rid of annoying "note: the mangling of 'va_list' has changed in GCC 4.4" + # FIXME: This should not really be set using extra_cflags. + if test "x$OPENJDK_TARGET_CPU" = xarm; then + EXTRA_CFLAGS="$EXTRA_CFLAGS -Wno-psabi" + EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS -Wno-psabi" + fi + # Also add JDK_ARCH_ABI_PROP_NAME define, but only to CFLAGS. + EXTRA_CFLAGS="$EXTRA_CFLAGS -DJDK_ARCH_ABI_PROP_NAME='\"\$(JDK_ARCH_ABI_PROP_NAME)\"'" + # And pass the architecture flags to the linker as well + EXTRA_LDFLAGS="$EXTRA_LDFLAGS $ARM_ARCH_TYPE_FLAGS $ARM_FLOAT_TYPE_FLAGS" + fi + + # When building with an abi profile, the name of that profile is appended on the + # bundle platform, which is used in bundle names. + if test "x$OPENJDK_TARGET_ABI_PROFILE" != x; then + OPENJDK_TARGET_BUNDLE_PLATFORM="$OPENJDK_TARGET_OS_BUNDLE-$OPENJDK_TARGET_ABI_PROFILE" + fi +]) + # Reset the global CFLAGS/LDFLAGS variables and initialize them with the # corresponding configure arguments instead AC_DEFUN_ONCE([FLAGS_SETUP_USER_SUPPLIED_FLAGS], @@ -306,9 +401,17 @@ PICFLAG='-fPIC' SHARED_LIBRARY_FLAGS='-shared' SET_EXECUTABLE_ORIGIN='-Wl,-rpath,\$$ORIGIN[$]1' - SET_SHARED_LIBRARY_ORIGIN="-Wl,-z,origin $SET_EXECUTABLE_ORIGIN" SET_SHARED_LIBRARY_NAME='-Wl,-soname=[$]1' SET_SHARED_LIBRARY_MAPFILE='-Wl,-version-script=[$]1' + + # arm specific settings + if test "x$OPENJDK_TARGET_CPU_ARCH" = "xarm"; then + # '-Wl,-z,origin' isn't used on arm. + SET_SHARED_LIBRARY_ORIGIN='-Wl,-rpath,\$$$$ORIGIN[$]1' + else + SET_SHARED_LIBRARY_ORIGIN="-Wl,-z,origin $SET_EXECUTABLE_ORIGIN" + fi + fi elif test "x$TOOLCHAIN_TYPE" = xsolstudio; then PICFLAG="-KPIC" @@ -665,6 +768,7 @@ AC_DEFUN([FLAGS_SETUP_COMPILER_FLAGS_FOR_JDK], [ + FLAGS_SETUP_ABI_PROFILE FLAGS_SETUP_COMPILER_FLAGS_FOR_JDK_HELPER([TARGET]) FLAGS_SETUP_COMPILER_FLAGS_FOR_JDK_HELPER([BUILD], [OPENJDK_BUILD_]) @@ -754,6 +858,7 @@ arm ) # on arm we don't prevent gcc to omit frame pointer but do prevent strict aliasing $2CFLAGS_JDK="${$2CFLAGS_JDK} -fno-strict-aliasing" + $2COMMON_CCXXFLAGS_JDK="${$2COMMON_CCXXFLAGS_JDK} -fsigned-char" ;; ppc ) # on ppc we don't prevent gcc to omit frame pointer but do prevent strict aliasing --- old/common/autoconf/generated-configure.sh 2016-12-02 11:14:33.332781714 -0500 +++ new/common/autoconf/generated-configure.sh 2016-12-02 11:14:31.620684624 -0500 @@ -749,6 +749,7 @@ CFLAGS_JDKLIB MACOSX_VERSION_MIN CXXSTD_CXXFLAG +JDK_ARCH_ABI_PROP_NAME CXX_O_FLAG_SIZE CXX_O_FLAG_NONE CXX_O_FLAG_DEBUG @@ -1139,6 +1140,7 @@ enable_debug with_debug_level with_jvm_variants +with_cpu_port with_devkit with_sys_root with_sysroot @@ -1188,6 +1190,7 @@ with_toolchain_version with_build_devkit with_jtreg +with_abi_profile enable_warnings_as_errors with_native_debug_symbols enable_debug_symbols @@ -2030,6 +2033,8 @@ --with-jvm-variants JVM variants (separated by commas) to build (server,client,minimal,core,zero,zeroshark,custom) [server] + --with-cpu-port specify sources to use for Hotspot 64-bit ARM port + (arm64,aarch64) [aarch64] --with-devkit use this devkit for compilers, tools and resources --with-sys-root alias for --with-sysroot for backwards compatability --with-sysroot use this directory as sysroot @@ -2112,6 +2117,10 @@ dependent] --with-build-devkit Devkit to use for the build platform toolchain --with-jtreg Regression Test Harness [probed] + --with-abi-profile specify ABI profile for ARM builds + (arm-vfp-sflt,arm-vfp-hflt,arm-sflt, + armv5-vfp-sflt,armv6-vfp-hflt,arm64,aarch64) + [toolchain dependent] --with-native-debug-symbols set the native debug symbol configuration (none, internal, external, zipped) [varying] @@ -3996,6 +4005,12 @@ # questions. # +################################################################################ +# +# Setup ABI profile (for arm) +# + + # Reset the global CFLAGS/LDFLAGS variables and initialize them with the # corresponding configure arguments instead @@ -4252,7 +4267,8 @@ # All valid JVM features, regardless of platform VALID_JVM_FEATURES="compiler1 compiler2 zero shark minimal dtrace jvmti jvmci \ - fprof vm-structs jni-check services management all-gcs nmt cds static-build" + fprof vm-structs jni-check services management all-gcs nmt cds \ + static-build link-time-opt" # All valid JVM variants VALID_JVM_VARIANTS="server client minimal core zero zeroshark custom" @@ -4307,6 +4323,16 @@ ################################################################################ +# +# Specify which sources will be used to build the 64-bit ARM port +# +# --with-cpu-port=arm64 will use hotspot/src/cpu/arm +# --with-cpu-port=aarch64 will use hotspot/src/cpu/aarch64 +# + + + +################################################################################ # Check if gtest should be built # @@ -5091,7 +5117,7 @@ #CUSTOM_AUTOCONF_INCLUDE # Do not change or remove the following line, it is needed for consistency checks: -DATE_WHEN_GENERATED=1479997584 +DATE_WHEN_GENERATED=1480631660 ############################################################################### # @@ -16708,6 +16734,28 @@ fi + + +# Check whether --with-cpu-port was given. +if test "${with_cpu_port+set}" = set; then : + withval=$with_cpu_port; +fi + + + if test "x$with_cpu_port" != x; then + if test "x$OPENJDK_TARGET_CPU" != xaarch64; then + as_fn_error $? "--with-cpu-port only available on aarch64" "$LINENO" 5 + fi + + if test "x$with_cpu_port" != x; then + if test "x$with_cpu_port" != xarm64 && \ + test "x$with_cpu_port" != xaarch64; then + as_fn_error $? "--with-cpu-port must specify arm64 or aarch64" "$LINENO" 5 + fi + fi + fi + + if test "x$with_jvm_variants" = x; then with_jvm_variants="server" fi @@ -49093,9 +49141,17 @@ PICFLAG='-fPIC' SHARED_LIBRARY_FLAGS='-shared' SET_EXECUTABLE_ORIGIN='-Wl,-rpath,\$$ORIGIN$1' - SET_SHARED_LIBRARY_ORIGIN="-Wl,-z,origin $SET_EXECUTABLE_ORIGIN" SET_SHARED_LIBRARY_NAME='-Wl,-soname=$1' SET_SHARED_LIBRARY_MAPFILE='-Wl,-version-script=$1' + + # arm specific settings + if test "x$OPENJDK_TARGET_CPU_ARCH" = "xarm"; then + # '-Wl,-z,origin' isn't used on arm. + SET_SHARED_LIBRARY_ORIGIN='-Wl,-rpath,\$$$$ORIGIN$1' + else + SET_SHARED_LIBRARY_ORIGIN="-Wl,-z,origin $SET_EXECUTABLE_ORIGIN" + fi + fi elif test "x$TOOLCHAIN_TYPE" = xsolstudio; then PICFLAG="-KPIC" @@ -49681,6 +49737,108 @@ + +# Check whether --with-abi-profile was given. +if test "${with_abi_profile+set}" = set; then : + withval=$with_abi_profile; +fi + + + if test "x$with_abi_profile" != x; then + if test "x$OPENJDK_TARGET_CPU" != xarm && \ + test "x$OPENJDK_TARGET_CPU" != xaarch64; then + as_fn_error $? "--with-abi-profile only available on arm/aarch64" "$LINENO" 5 + fi + + OPENJDK_TARGET_ABI_PROFILE=$with_abi_profile + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ABI profle" >&5 +$as_echo_n "checking for ABI profle... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OPENJDK_TARGET_ABI_PROFILE" >&5 +$as_echo "$OPENJDK_TARGET_ABI_PROFILE" >&6; } + + if test "x$OPENJDK_TARGET_ABI_PROFILE" = xarm-vfp-sflt; then + ARM_FLOAT_TYPE=vfp-sflt + ARM_ARCH_TYPE_FLAGS='-march=armv7-a -mthumb' + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarm-vfp-hflt; then + ARM_FLOAT_TYPE=vfp-hflt + ARM_ARCH_TYPE_FLAGS='-march=armv7-a -mthumb' + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarm-sflt; then + ARM_FLOAT_TYPE=sflt + ARM_ARCH_TYPE_FLAGS='-march=armv5t -marm' + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarmv5-vfp-sflt; then + ARM_FLOAT_TYPE=vfp-sflt + ARM_ARCH_TYPE_FLAGS='-march=armv5t -marm' + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarmv6-vfp-hflt; then + ARM_FLOAT_TYPE=vfp-hflt + ARM_ARCH_TYPE_FLAGS='-march=armv6 -marm' + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xarm64; then + # No special flags, just need to trigger setting JDK_ARCH_ABI_PROP_NAME + ARM_FLOAT_TYPE= + ARM_ARCH_TYPE_FLAGS= + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xaarch64; then + # No special flags, just need to trigger setting JDK_ARCH_ABI_PROP_NAME + ARM_FLOAT_TYPE= + ARM_ARCH_TYPE_FLAGS= + else + as_fn_error $? "Invalid ABI profile: \"$OPENJDK_TARGET_ABI_PROFILE\"" "$LINENO" 5 + fi + + if test "x$ARM_FLOAT_TYPE" = xvfp-sflt; then + ARM_FLOAT_TYPE_FLAGS='-mfloat-abi=softfp -mfpu=vfp -DFLOAT_ARCH=-vfp-sflt' + elif test "x$ARM_FLOAT_TYPE" = xvfp-hflt; then + ARM_FLOAT_TYPE_FLAGS='-mfloat-abi=hard -mfpu=vfp -DFLOAT_ARCH=-vfp-hflt' + elif test "x$ARM_FLOAT_TYPE" = xsflt; then + ARM_FLOAT_TYPE_FLAGS='-msoft-float -mfpu=vfp' + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ARM_FLOAT_TYPE floating point flags" >&5 +$as_echo_n "checking for $ARM_FLOAT_TYPE floating point flags... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ARM_FLOAT_TYPE_FLAGS" >&5 +$as_echo "$ARM_FLOAT_TYPE_FLAGS" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for arch type flags" >&5 +$as_echo_n "checking for arch type flags... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ARM_ARCH_TYPE_FLAGS" >&5 +$as_echo "$ARM_ARCH_TYPE_FLAGS" >&6; } + + # Now set JDK_ARCH_ABI_PROP_NAME. This is equivalent to the last part of the + # autoconf target triplet. + JDK_ARCH_ABI_PROP_NAME=`$ECHO $OPENJDK_TARGET_AUTOCONF_NAME | $SED -e 's/.*-\([^-]*\)$/\1/'` + # Sanity check that it is a known ABI. + if test "x$JDK_ARCH_ABI_PROP_NAME" != xgnu && \ + test "x$JDK_ARCH_ABI_PROP_NAME" != xgnueabi && \ + test "x$JDK_ARCH_ABI_PROP_NAME" != xgnueabihf; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unknown autoconf target triplet ABI: \"$JDK_ARCH_ABI_PROP_NAME\"" >&5 +$as_echo "$as_me: WARNING: Unknown autoconf target triplet ABI: \"$JDK_ARCH_ABI_PROP_NAME\"" >&2;} + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ABI property name" >&5 +$as_echo_n "checking for ABI property name... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JDK_ARCH_ABI_PROP_NAME" >&5 +$as_echo "$JDK_ARCH_ABI_PROP_NAME" >&6; } + + + # Pass these on to the open part of configure as if they were set using + # --with-extra-c[xx]flags. + EXTRA_CFLAGS="$EXTRA_CFLAGS $ARM_ARCH_TYPE_FLAGS $ARM_FLOAT_TYPE_FLAGS" + EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS $ARM_ARCH_TYPE_FLAGS $ARM_FLOAT_TYPE_FLAGS" + # Get rid of annoying "note: the mangling of 'va_list' has changed in GCC 4.4" + # FIXME: This should not really be set using extra_cflags. + if test "x$OPENJDK_TARGET_CPU" = xarm; then + EXTRA_CFLAGS="$EXTRA_CFLAGS -Wno-psabi" + EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS -Wno-psabi" + fi + # Also add JDK_ARCH_ABI_PROP_NAME define, but only to CFLAGS. + EXTRA_CFLAGS="$EXTRA_CFLAGS -DJDK_ARCH_ABI_PROP_NAME='\"\$(JDK_ARCH_ABI_PROP_NAME)\"'" + # And pass the architecture flags to the linker as well + EXTRA_LDFLAGS="$EXTRA_LDFLAGS $ARM_ARCH_TYPE_FLAGS $ARM_FLOAT_TYPE_FLAGS" + fi + + # When building with an abi profile, the name of that profile is appended on the + # bundle platform, which is used in bundle names. + if test "x$OPENJDK_TARGET_ABI_PROFILE" != x; then + OPENJDK_TARGET_BUNDLE_PLATFORM="$OPENJDK_TARGET_OS_BUNDLE-$OPENJDK_TARGET_ABI_PROFILE" + fi + + # Special extras... if test "x$TOOLCHAIN_TYPE" = xsolstudio; then if test "x$OPENJDK_TARGET_CPU_ARCH" = "xsparc"; then @@ -49832,6 +49990,7 @@ arm ) # on arm we don't prevent gcc to omit frame pointer but do prevent strict aliasing CFLAGS_JDK="${CFLAGS_JDK} -fno-strict-aliasing" + COMMON_CCXXFLAGS_JDK="${COMMON_CCXXFLAGS_JDK} -fsigned-char" ;; ppc ) # on ppc we don't prevent gcc to omit frame pointer but do prevent strict aliasing @@ -50655,6 +50814,7 @@ arm ) # on arm we don't prevent gcc to omit frame pointer but do prevent strict aliasing OPENJDK_BUILD_CFLAGS_JDK="${OPENJDK_BUILD_CFLAGS_JDK} -fno-strict-aliasing" + OPENJDK_BUILD_COMMON_CCXXFLAGS_JDK="${OPENJDK_BUILD_COMMON_CCXXFLAGS_JDK} -fsigned-char" ;; ppc ) # on ppc we don't prevent gcc to omit frame pointer but do prevent strict aliasing @@ -52823,6 +52983,19 @@ $as_echo "$JVM_FEATURES" >&6; } fi + # Override hotspot cpu definitions for ARM platforms + if test "x$OPENJDK_TARGET_CPU" = xarm; then + HOTSPOT_TARGET_CPU=arm_32 + HOTSPOT_TARGET_CPU_DEFINE="ARM32" + JVM_LDFLAGS="$JVM_LDFLAGS -fsigned-char" + JVM_CFLAGS="$JVM_CFLAGS -DARM -fsigned-char" + elif test "x$OPENJDK_TARGET_CPU" = xaarch64 && test "x$with_cpu_port" = xarm64; then + HOTSPOT_TARGET_CPU=arm_64 + HOTSPOT_TARGET_CPU_ARCH=arm + JVM_LDFLAGS="$JVM_LDFLAGS -fsigned-char" + JVM_CFLAGS="$JVM_CFLAGS -DARM -fsigned-char" + fi + # Verify that dependencies are met for explicitly set features. if [[ " $JVM_FEATURES " =~ " jvmti " ]] && ! [[ " $JVM_FEATURES " =~ " services " ]] ; then as_fn_error $? "Specified JVM feature 'jvmti' requires feature 'services'" "$LINENO" 5 @@ -52882,6 +53055,13 @@ JVM_FEATURES_jvmci="" fi + if test "x$OPENJDK_TARGET_CPU" = xarm ; then + # Default to use link time optimizations on minimal on arm + JVM_FEATURES_link_time_opt="link-time-opt" + else + JVM_FEATURES_link_time_opt="" + fi + # All variants but minimal (and custom) get these features NON_MINIMAL_FEATURES="$NON_MINIMAL_FEATURES jvmti fprof vm-structs jni-check services management all-gcs nmt cds" @@ -52889,7 +53069,7 @@ JVM_FEATURES_server="compiler1 compiler2 $NON_MINIMAL_FEATURES $JVM_FEATURES $JVM_FEATURES_jvmci" JVM_FEATURES_client="compiler1 $NON_MINIMAL_FEATURES $JVM_FEATURES $JVM_FEATURES_jvmci" JVM_FEATURES_core="$NON_MINIMAL_FEATURES $JVM_FEATURES" - JVM_FEATURES_minimal="compiler1 minimal $JVM_FEATURES" + JVM_FEATURES_minimal="compiler1 minimal $JVM_FEATURES $JVM_FEATURES_link_time_opt" JVM_FEATURES_zero="zero $NON_MINIMAL_FEATURES $JVM_FEATURES" JVM_FEATURES_zeroshark="zero shark $NON_MINIMAL_FEATURES $JVM_FEATURES" JVM_FEATURES_custom="$JVM_FEATURES" --- old/common/autoconf/hotspot.m4 2016-12-02 11:14:39.793148072 -0500 +++ new/common/autoconf/hotspot.m4 2016-12-02 11:14:38.217058694 -0500 @@ -25,7 +25,8 @@ # All valid JVM features, regardless of platform VALID_JVM_FEATURES="compiler1 compiler2 zero shark minimal dtrace jvmti jvmci \ - fprof vm-structs jni-check services management all-gcs nmt cds static-build" + fprof vm-structs jni-check services management all-gcs nmt cds \ + static-build link-time-opt" # All valid JVM variants VALID_JVM_VARIANTS="server client minimal core zero zeroshark custom" @@ -69,6 +70,8 @@ AC_ARG_WITH([jvm-variants], [AS_HELP_STRING([--with-jvm-variants], [JVM variants (separated by commas) to build (server,client,minimal,core,zero,zeroshark,custom) @<:@server@:>@])]) + SETUP_HOTSPOT_TARGET_CPU_PORT + if test "x$with_jvm_variants" = x; then with_jvm_variants="server" fi @@ -204,6 +207,19 @@ AC_MSG_RESULT([$JVM_FEATURES]) fi + # Override hotspot cpu definitions for ARM platforms + if test "x$OPENJDK_TARGET_CPU" = xarm; then + HOTSPOT_TARGET_CPU=arm_32 + HOTSPOT_TARGET_CPU_DEFINE="ARM32" + JVM_LDFLAGS="$JVM_LDFLAGS -fsigned-char" + JVM_CFLAGS="$JVM_CFLAGS -DARM -fsigned-char" + elif test "x$OPENJDK_TARGET_CPU" = xaarch64 && test "x$with_cpu_port" = xarm64; then + HOTSPOT_TARGET_CPU=arm_64 + HOTSPOT_TARGET_CPU_ARCH=arm + JVM_LDFLAGS="$JVM_LDFLAGS -fsigned-char" + JVM_CFLAGS="$JVM_CFLAGS -DARM -fsigned-char" + fi + # Verify that dependencies are met for explicitly set features. if HOTSPOT_CHECK_JVM_FEATURE(jvmti) && ! HOTSPOT_CHECK_JVM_FEATURE(services); then AC_MSG_ERROR([Specified JVM feature 'jvmti' requires feature 'services']) @@ -263,6 +279,13 @@ JVM_FEATURES_jvmci="" fi + if test "x$OPENJDK_TARGET_CPU" = xarm ; then + # Default to use link time optimizations on minimal on arm + JVM_FEATURES_link_time_opt="link-time-opt" + else + JVM_FEATURES_link_time_opt="" + fi + # All variants but minimal (and custom) get these features NON_MINIMAL_FEATURES="$NON_MINIMAL_FEATURES jvmti fprof vm-structs jni-check services management all-gcs nmt cds" @@ -270,7 +293,7 @@ JVM_FEATURES_server="compiler1 compiler2 $NON_MINIMAL_FEATURES $JVM_FEATURES $JVM_FEATURES_jvmci" JVM_FEATURES_client="compiler1 $NON_MINIMAL_FEATURES $JVM_FEATURES $JVM_FEATURES_jvmci" JVM_FEATURES_core="$NON_MINIMAL_FEATURES $JVM_FEATURES" - JVM_FEATURES_minimal="compiler1 minimal $JVM_FEATURES" + JVM_FEATURES_minimal="compiler1 minimal $JVM_FEATURES $JVM_FEATURES_link_time_opt" JVM_FEATURES_zero="zero $NON_MINIMAL_FEATURES $JVM_FEATURES" JVM_FEATURES_zeroshark="zero shark $NON_MINIMAL_FEATURES $JVM_FEATURES" JVM_FEATURES_custom="$JVM_FEATURES" @@ -320,6 +343,33 @@ ]) ################################################################################ +# +# Specify which sources will be used to build the 64-bit ARM port +# +# --with-cpu-port=arm64 will use hotspot/src/cpu/arm +# --with-cpu-port=aarch64 will use hotspot/src/cpu/aarch64 +# +AC_DEFUN([SETUP_HOTSPOT_TARGET_CPU_PORT], +[ + AC_ARG_WITH(cpu-port, [AS_HELP_STRING([--with-cpu-port], + [specify sources to use for Hotspot 64-bit ARM port (arm64,aarch64) @<:@aarch64@:>@ ])]) + + if test "x$with_cpu_port" != x; then + if test "x$OPENJDK_TARGET_CPU" != xaarch64; then + AC_MSG_ERROR([--with-cpu-port only available on aarch64]) + fi + + if test "x$with_cpu_port" != x; then + if test "x$with_cpu_port" != xarm64 && \ + test "x$with_cpu_port" != xaarch64; then + AC_MSG_ERROR([--with-cpu-port must specify arm64 or aarch64]) + fi + fi + fi +]) + + +################################################################################ # Check if gtest should be built # AC_DEFUN_ONCE([HOTSPOT_ENABLE_DISABLE_GTEST], --- old/hotspot/make/gensrc/GensrcAdlc.gmk 2016-12-02 11:14:45.541474048 -0500 +++ new/hotspot/make/gensrc/GensrcAdlc.gmk 2016-12-02 11:14:43.917381949 -0500 @@ -114,6 +114,10 @@ ADLCFLAGS += -U_LP64 endif + ifeq ($(HOTSPOT_TARGET_CPU_ARCH), arm) + ADLCFLAGS += -DARM=1 + endif + ############################################################################## # Concatenate all ad source files into a single file, which will be fed to # adlc. Also include a #line directive at the start of every included file --- old/hotspot/make/lib/CompileJvm.gmk 2016-12-02 11:14:50.777770989 -0500 +++ new/hotspot/make/lib/CompileJvm.gmk 2016-12-02 11:14:49.217682519 -0500 @@ -139,6 +139,20 @@ ################################################################################ # Platform specific setup +# ARM source selection + +ifeq ($(OPENJDK_TARGET_OS)-$(OPENJDK_TARGET_CPU), linux-arm) + JVM_EXCLUDE_PATTERNS += arm_64 + +else ifeq ($(OPENJDK_TARGET_OS)-$(OPENJDK_TARGET_CPU), linux-aarch64) + # Open aarch64 port is named "aarch64", exclude it if the + # HOTSPOT_TARGET_CPU_ARCH is set to arm. In this case we + # want the hybrid sources. + ifeq ($(HOTSPOT_TARGET_CPU_ARCH), arm) + JVM_EXCLUDE_PATTERNS += arm_32 aarch64 + endif +endif + ifneq ($(filter $(OPENJDK_TARGET_OS), linux macosx windows), ) JVM_PRECOMPILED_HEADER := $(HOTSPOT_TOPDIR)/src/share/vm/precompiled/precompiled.hpp endif --- old/hotspot/make/lib/JvmFeatures.gmk 2016-12-02 11:14:55.950064299 -0500 +++ new/hotspot/make/lib/JvmFeatures.gmk 2016-12-02 11:14:54.317971746 -0500 @@ -146,3 +146,109 @@ memBaseline.cpp memReporter.cpp mallocTracker.cpp virtualMemoryTracker.cpp nmtCommon.cpp \ memTracker.cpp nmtDCmd.cpp mallocSiteTable.cpp endif + +################################################################################ + +ifeq ($(call check-jvm-feature, link-time-opt), true) + # NOTE: Disable automatic opimization level and let the explicit cflag control + # optimization level instead. This activates O3 on slowdebug builds, just + # like the old build, but it's probably not right. + JVM_OPTIMIZATION := + JVM_CFLAGS_FEATURES += -O3 -flto + JVM_LDFLAGS_FEATURES += -O3 -flto -fwhole-program -fno-strict-aliasing +endif + +ifeq ($(call check-jvm-feature, minimal), true) + ifeq ($(call check-jvm-feature, link-time-opt), false) + JVM_OPTIMIZATION := SIZE + OPT_SPEED_SRC := \ + allocation.cpp \ + assembler.cpp \ + assembler_linux_arm.cpp \ + barrierSet.cpp \ + basicLock.cpp \ + biasedLocking.cpp \ + bytecode.cpp \ + bytecodeInterpreter.cpp \ + bytecodeInterpreter_x86.cpp \ + c1_Compilation.cpp \ + c1_Compiler.cpp \ + c1_GraphBuilder.cpp \ + c1_LinearScan.cpp \ + c1_LIR.cpp \ + ciEnv.cpp \ + ciObjectFactory.cpp \ + codeBlob.cpp \ + constantPool.cpp \ + constMethod.cpp \ + classLoader.cpp \ + classLoaderData.cpp \ + classFileParser.cpp \ + classFileStream.cpp \ + cpCache.cpp \ + defNewGeneration.cpp \ + frame_arm.cpp \ + genCollectedHeap.cpp \ + generation.cpp \ + genMarkSweep.cpp \ + growableArray.cpp \ + handles.cpp \ + hashtable.cpp \ + heap.cpp \ + icache.cpp \ + icache_arm.cpp \ + instanceKlass.cpp \ + invocationCounter.cpp \ + iterator.cpp \ + javaCalls.cpp \ + javaClasses.cpp \ + jniFastGetField_arm.cpp \ + jvm.cpp \ + jvm_linux.cpp \ + linkResolver.cpp \ + klass.cpp \ + klassVtable.cpp \ + markSweep.cpp \ + memRegion.cpp \ + memoryPool.cpp \ + method.cpp \ + methodHandles.cpp \ + methodHandles_arm.cpp \ + methodLiveness.cpp \ + metablock.cpp \ + metaspace.cpp \ + mutex.cpp \ + mutex_linux.cpp \ + mutexLocker.cpp \ + nativeLookup.cpp \ + objArrayKlass.cpp \ + os_linux.cpp \ + os_linux_arm.cpp \ + placeHolders.cpp \ + quickSort.cpp \ + resourceArea.cpp \ + rewriter.cpp \ + sharedRuntime.cpp \ + signature.cpp \ + space.cpp \ + stackMapTable.cpp \ + symbolTable.cpp \ + systemDictionary.cpp \ + symbol.cpp \ + synchronizer.cpp \ + threadLS_bsd_x86.cpp \ + threadLS_linux_arm.cpp \ + threadLS_linux_x86.cpp \ + timer.cpp \ + typeArrayKlass.cpp \ + unsafe.cpp \ + utf8.cpp \ + vmSymbols.cpp \ + # + + $(foreach s, $(OPT_SPEED_SRC), \ + $(eval BUILD_LIBJVM_$s_OPTIMIZATION := HIGHEST_JVM)) + + BUILD_LIBJVM_systemDictionary.cpp_CXXFLAGS := -fno-optimize-sibling-calls + endif +endif --- old/hotspot/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h 2016-12-02 11:15:01.206362373 -0500 +++ new/hotspot/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h 2016-12-02 11:14:59.610271862 -0500 @@ -80,6 +80,12 @@ #include #endif +#if defined(arm) || defined(arm64) +struct user_regs_struct { + unsigned long regs[ELF_NGREG]; /* integer and fp regs */ +}; +#endif + // This C bool type must be int for compatibility with Linux calls and // it would be a mistake to equivalence it to C++ bool on many platforms --- old/hotspot/src/share/vm/c1/c1_Runtime1.cpp 2016-12-02 11:15:06.514663396 -0500 +++ new/hotspot/src/share/vm/c1/c1_Runtime1.cpp 2016-12-02 11:15:04.906572205 -0500 @@ -33,7 +33,6 @@ #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" #include "code/codeBlob.hpp" -#include "code/codeCacheExtensions.hpp" #include "code/compiledIC.hpp" #include "code/pcDesc.hpp" #include "code/scopeDesc.hpp" @@ -189,52 +188,44 @@ int frame_size; bool must_gc_arguments; - if (!CodeCacheExtensions::skip_compiler_support()) { - // bypass useless code generation - Compilation::setup_code_buffer(&code, 0); - - // create assembler for code generation - StubAssembler* sasm = new StubAssembler(&code, name_for(id), id); - // generate code for runtime stub - oop_maps = generate_code_for(id, sasm); - assert(oop_maps == NULL || sasm->frame_size() != no_frame_size, - "if stub has an oop map it must have a valid frame size"); + Compilation::setup_code_buffer(&code, 0); + + // create assembler for code generation + StubAssembler* sasm = new StubAssembler(&code, name_for(id), id); + // generate code for runtime stub + oop_maps = generate_code_for(id, sasm); + assert(oop_maps == NULL || sasm->frame_size() != no_frame_size, + "if stub has an oop map it must have a valid frame size"); #ifdef ASSERT - // Make sure that stubs that need oopmaps have them - switch (id) { - // These stubs don't need to have an oopmap - case dtrace_object_alloc_id: - case g1_pre_barrier_slow_id: - case g1_post_barrier_slow_id: - case slow_subtype_check_id: - case fpu2long_stub_id: - case unwind_exception_id: - case counter_overflow_id: + // Make sure that stubs that need oopmaps have them + switch (id) { + // These stubs don't need to have an oopmap + case dtrace_object_alloc_id: + case g1_pre_barrier_slow_id: + case g1_post_barrier_slow_id: + case slow_subtype_check_id: + case fpu2long_stub_id: + case unwind_exception_id: + case counter_overflow_id: #if defined(SPARC) || defined(PPC32) - case handle_exception_nofpu_id: // Unused on sparc + case handle_exception_nofpu_id: // Unused on sparc #endif - break; + break; - // All other stubs should have oopmaps - default: - assert(oop_maps != NULL, "must have an oopmap"); - } + // All other stubs should have oopmaps + default: + assert(oop_maps != NULL, "must have an oopmap"); + } #endif - // align so printing shows nop's instead of random code at the end (SimpleStubs are aligned) - sasm->align(BytesPerWord); - // make sure all code is in code buffer - sasm->flush(); - - frame_size = sasm->frame_size(); - must_gc_arguments = sasm->must_gc_arguments(); - } else { - /* ignored values */ - oop_maps = NULL; - frame_size = 0; - must_gc_arguments = false; - } + // align so printing shows nop's instead of random code at the end (SimpleStubs are aligned) + sasm->align(BytesPerWord); + // make sure all code is in code buffer + sasm->flush(); + + frame_size = sasm->frame_size(); + must_gc_arguments = sasm->must_gc_arguments(); // create blob - distinguish a few special cases CodeBlob* blob = RuntimeStub::new_runtime_stub(name_for(id), &code, --- old/hotspot/src/share/vm/code/codeBlob.cpp 2016-12-02 11:15:11.674956025 -0500 +++ new/hotspot/src/share/vm/code/codeBlob.cpp 2016-12-02 11:15:10.106867101 -0500 @@ -25,7 +25,6 @@ #include "precompiled.hpp" #include "code/codeBlob.hpp" #include "code/codeCache.hpp" -#include "code/codeCacheExtensions.hpp" #include "code/relocInfo.hpp" #include "compiler/disassembler.hpp" #include "interpreter/bytecode.hpp" @@ -228,7 +227,6 @@ BufferBlob* blob = NULL; unsigned int size = sizeof(BufferBlob); - CodeCacheExtensions::size_blob(name, &buffer_size); // align the size to CodeEntryAlignment size = CodeBlob::align_code_offset(size); size += round_to(buffer_size, oopSize); @@ -312,7 +310,6 @@ MethodHandlesAdapterBlob* blob = NULL; unsigned int size = sizeof(MethodHandlesAdapterBlob); - CodeCacheExtensions::size_blob("MethodHandles adapters", &buffer_size); // align the size to CodeEntryAlignment size = CodeBlob::align_code_offset(size); size += round_to(buffer_size, oopSize); @@ -354,13 +351,11 @@ { RuntimeStub* stub = NULL; ThreadInVMfromUnknown __tiv; // get to VM state in case we block on CodeCache_lock - if (!CodeCacheExtensions::skip_code_generation()) { - // bypass useless code generation + { MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); unsigned int size = CodeBlob::allocation_size(cb, sizeof(RuntimeStub)); stub = new (size) RuntimeStub(stub_name, cb, size, frame_complete, frame_size, oop_maps, caller_must_gc_arguments); } - stub = (RuntimeStub*) CodeCacheExtensions::handle_generated_blob(stub, stub_name); trace_new_stub(stub, "RuntimeStub - ", stub_name); --- old/hotspot/src/share/vm/code/codeBlob.hpp 2016-12-02 11:15:16.959255688 -0500 +++ new/hotspot/src/share/vm/code/codeBlob.hpp 2016-12-02 11:15:15.143152699 -0500 @@ -40,8 +40,7 @@ MethodProfiled = 1, // Execution level 2 and 3 (profiled) nmethods NonNMethod = 2, // Non-nmethods like Buffers, Adapters and Runtime Stubs All = 3, // All types (No code cache segmentation) - Pregenerated = 4, // Special blobs, managed by CodeCacheExtensions - NumTypes = 5 // Number of CodeBlobTypes + NumTypes = 4 // Number of CodeBlobTypes }; }; --- old/hotspot/src/share/vm/code/stubs.cpp 2016-12-02 11:15:22.183551948 -0500 +++ new/hotspot/src/share/vm/code/stubs.cpp 2016-12-02 11:15:20.579460980 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -262,16 +262,3 @@ } } -// Fixup for pregenerated code -void StubQueue::fix_buffer(address buffer, address queue_end, address buffer_end, int number_of_stubs) { - const int extra_bytes = CodeEntryAlignment; - _stub_buffer = buffer; - _queue_begin = 0; - _queue_end = queue_end - buffer; - _number_of_stubs = number_of_stubs; - int size = buffer_end - buffer; - // Note: _buffer_limit must differ from _queue_end in the iteration loops - // => add extra space at the end (preserving alignment for asserts) if needed - if (buffer_end == queue_end) size += extra_bytes; - _buffer_limit = _buffer_size = size; -} --- old/hotspot/src/share/vm/code/stubs.hpp 2016-12-02 11:15:27.263840039 -0500 +++ new/hotspot/src/share/vm/code/stubs.hpp 2016-12-02 11:15:25.655748848 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -217,8 +217,6 @@ void verify(); // verifies the stub queue void print(); // prints information about the stub queue - // Fixup for pregenerated code - void fix_buffer(address buffer, address queue_end, address buffer_end, int number_of_stubs); }; #endif // SHARE_VM_CODE_STUBS_HPP --- old/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp 2016-12-02 11:15:34.352242006 -0500 +++ new/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp 2016-12-02 11:15:32.736150362 -0500 @@ -27,7 +27,6 @@ #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" #include "code/codeCache.hpp" -#include "code/codeCacheExtensions.hpp" #include "compiler/compileBroker.hpp" #include "compiler/disassembler.hpp" #include "gc/shared/collectedHeap.hpp" @@ -1199,7 +1198,6 @@ ICache::invalidate_range(handler, insts_size); _handler = handler + insts_size; } - CodeCacheExtensions::handle_generated_handler(handler, buffer->name(), _handler); return handler; } @@ -1208,7 +1206,7 @@ // use slow signature handler if we can't do better int handler_index = -1; // check if we can use customized (fast) signature handler - if (UseFastSignatureHandlers && CodeCacheExtensions::support_fast_signature_handlers() && method->size_of_parameters() <= Fingerprinter::max_size_of_parameters) { + if (UseFastSignatureHandlers && method->size_of_parameters() <= Fingerprinter::max_size_of_parameters) { // use customized signature handler MutexLocker mu(SignatureHandlerLibrary_lock); // make sure data structure is initialized @@ -1225,15 +1223,6 @@ round_to((intptr_t)_buffer, CodeEntryAlignment) - (address)_buffer; CodeBuffer buffer((address)(_buffer + align_offset), SignatureHandlerLibrary::buffer_size - align_offset); - if (!CodeCacheExtensions::support_dynamic_code()) { - // we need a name for the signature (for lookups or saving) - const int SYMBOL_SIZE = 50; - char *symbolName = NEW_RESOURCE_ARRAY(char, SYMBOL_SIZE); - // support for named signatures - jio_snprintf(symbolName, SYMBOL_SIZE, - "native_" UINT64_FORMAT, fingerprint); - buffer.set_name(symbolName); - } InterpreterRuntime::SignatureHandlerGenerator(method, &buffer).generate(fingerprint); // copy into code heap address handler = set_handler(&buffer); --- old/hotspot/src/share/vm/interpreter/templateInterpreter.cpp 2016-12-02 11:15:39.876555277 -0500 +++ new/hotspot/src/share/vm/interpreter/templateInterpreter.cpp 2016-12-02 11:15:38.024450250 -0500 @@ -23,7 +23,6 @@ */ #include "precompiled.hpp" -#include "code/codeCacheExtensions.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterRuntime.hpp" #include "interpreter/interp_masm.hpp" @@ -52,29 +51,10 @@ TraceTime timer("Interpreter generation", TRACETIME_LOG(Info, startuptime)); int code_size = InterpreterCodeSize; NOT_PRODUCT(code_size *= 4;) // debug uses extra interpreter code space -#if INCLUDE_JVMTI - if (CodeCacheExtensions::saving_generated_interpreter()) { - // May requires several versions of the codelets. - // Final size will automatically be optimized. - code_size *= 2; - } -#endif _code = new StubQueue(new InterpreterCodeletInterface, code_size, NULL, "Interpreter"); TemplateInterpreterGenerator g(_code); } - if (PrintInterpreter) { - if (CodeCacheExtensions::saving_generated_interpreter() && - CodeCacheExtensions::use_pregenerated_interpreter()) { - ResourceMark rm; - tty->print("Printing the newly generated interpreter first"); - print(); - tty->print("Printing the pregenerated interpreter next"); - } - } - - // Install the pregenerated interpreter code before printing it - CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::TemplateInterpreter); if (PrintInterpreter) { ResourceMark rm; --- old/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.cpp 2016-12-02 11:15:45.384867643 -0500 +++ new/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.cpp 2016-12-02 11:15:43.516761705 -0500 @@ -23,7 +23,6 @@ */ #include "precompiled.hpp" -#include "code/codeCacheExtensions.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterRuntime.hpp" #include "interpreter/interp_masm.hpp" @@ -55,219 +54,213 @@ }; void TemplateInterpreterGenerator::generate_all() { - // Loop, in case we need several variants of the interpreter entries - do { - if (!CodeCacheExtensions::skip_code_generation()) { - // bypass code generation when useless - { CodeletMark cm(_masm, "slow signature handler"); - AbstractInterpreter::_slow_signature_handler = generate_slow_signature_handler(); - } + { CodeletMark cm(_masm, "slow signature handler"); + AbstractInterpreter::_slow_signature_handler = generate_slow_signature_handler(); + } - { CodeletMark cm(_masm, "error exits"); - _unimplemented_bytecode = generate_error_exit("unimplemented bytecode"); - _illegal_bytecode_sequence = generate_error_exit("illegal bytecode sequence - method not verified"); - } + { CodeletMark cm(_masm, "error exits"); + _unimplemented_bytecode = generate_error_exit("unimplemented bytecode"); + _illegal_bytecode_sequence = generate_error_exit("illegal bytecode sequence - method not verified"); + } #ifndef PRODUCT - if (TraceBytecodes) { - CodeletMark cm(_masm, "bytecode tracing support"); - Interpreter::_trace_code = - EntryPoint( - generate_trace_code(btos), - generate_trace_code(ztos), - generate_trace_code(ctos), - generate_trace_code(stos), - generate_trace_code(atos), - generate_trace_code(itos), - generate_trace_code(ltos), - generate_trace_code(ftos), - generate_trace_code(dtos), - generate_trace_code(vtos) - ); - } + if (TraceBytecodes) { + CodeletMark cm(_masm, "bytecode tracing support"); + Interpreter::_trace_code = + EntryPoint( + generate_trace_code(btos), + generate_trace_code(ztos), + generate_trace_code(ctos), + generate_trace_code(stos), + generate_trace_code(atos), + generate_trace_code(itos), + generate_trace_code(ltos), + generate_trace_code(ftos), + generate_trace_code(dtos), + generate_trace_code(vtos) + ); + } #endif // !PRODUCT - { CodeletMark cm(_masm, "return entry points"); - const int index_size = sizeof(u2); - for (int i = 0; i < Interpreter::number_of_return_entries; i++) { - Interpreter::_return_entry[i] = - EntryPoint( - generate_return_entry_for(itos, i, index_size), - generate_return_entry_for(itos, i, index_size), - generate_return_entry_for(itos, i, index_size), - generate_return_entry_for(itos, i, index_size), - generate_return_entry_for(atos, i, index_size), - generate_return_entry_for(itos, i, index_size), - generate_return_entry_for(ltos, i, index_size), - generate_return_entry_for(ftos, i, index_size), - generate_return_entry_for(dtos, i, index_size), - generate_return_entry_for(vtos, i, index_size) - ); - } - } + { CodeletMark cm(_masm, "return entry points"); + const int index_size = sizeof(u2); + for (int i = 0; i < Interpreter::number_of_return_entries; i++) { + Interpreter::_return_entry[i] = + EntryPoint( + generate_return_entry_for(itos, i, index_size), + generate_return_entry_for(itos, i, index_size), + generate_return_entry_for(itos, i, index_size), + generate_return_entry_for(itos, i, index_size), + generate_return_entry_for(atos, i, index_size), + generate_return_entry_for(itos, i, index_size), + generate_return_entry_for(ltos, i, index_size), + generate_return_entry_for(ftos, i, index_size), + generate_return_entry_for(dtos, i, index_size), + generate_return_entry_for(vtos, i, index_size) + ); + } + } - { CodeletMark cm(_masm, "invoke return entry points"); - // These states are in order specified in TosState, except btos/ztos/ctos/stos are - // really the same as itos since there is no top of stack optimization for these types - const TosState states[] = {itos, itos, itos, itos, itos, ltos, ftos, dtos, atos, vtos, ilgl}; - const int invoke_length = Bytecodes::length_for(Bytecodes::_invokestatic); - const int invokeinterface_length = Bytecodes::length_for(Bytecodes::_invokeinterface); - const int invokedynamic_length = Bytecodes::length_for(Bytecodes::_invokedynamic); - - for (int i = 0; i < Interpreter::number_of_return_addrs; i++) { - TosState state = states[i]; - assert(state != ilgl, "states array is wrong above"); - Interpreter::_invoke_return_entry[i] = generate_return_entry_for(state, invoke_length, sizeof(u2)); - Interpreter::_invokeinterface_return_entry[i] = generate_return_entry_for(state, invokeinterface_length, sizeof(u2)); - Interpreter::_invokedynamic_return_entry[i] = generate_return_entry_for(state, invokedynamic_length, sizeof(u4)); - } - } + { CodeletMark cm(_masm, "invoke return entry points"); + // These states are in order specified in TosState, except btos/ztos/ctos/stos are + // really the same as itos since there is no top of stack optimization for these types + const TosState states[] = {itos, itos, itos, itos, itos, ltos, ftos, dtos, atos, vtos, ilgl}; + const int invoke_length = Bytecodes::length_for(Bytecodes::_invokestatic); + const int invokeinterface_length = Bytecodes::length_for(Bytecodes::_invokeinterface); + const int invokedynamic_length = Bytecodes::length_for(Bytecodes::_invokedynamic); + + for (int i = 0; i < Interpreter::number_of_return_addrs; i++) { + TosState state = states[i]; + assert(state != ilgl, "states array is wrong above"); + Interpreter::_invoke_return_entry[i] = generate_return_entry_for(state, invoke_length, sizeof(u2)); + Interpreter::_invokeinterface_return_entry[i] = generate_return_entry_for(state, invokeinterface_length, sizeof(u2)); + Interpreter::_invokedynamic_return_entry[i] = generate_return_entry_for(state, invokedynamic_length, sizeof(u4)); + } + } - { CodeletMark cm(_masm, "earlyret entry points"); - Interpreter::_earlyret_entry = - EntryPoint( - generate_earlyret_entry_for(btos), - generate_earlyret_entry_for(ztos), - generate_earlyret_entry_for(ctos), - generate_earlyret_entry_for(stos), - generate_earlyret_entry_for(atos), - generate_earlyret_entry_for(itos), - generate_earlyret_entry_for(ltos), - generate_earlyret_entry_for(ftos), - generate_earlyret_entry_for(dtos), - generate_earlyret_entry_for(vtos) - ); - } + { CodeletMark cm(_masm, "earlyret entry points"); + Interpreter::_earlyret_entry = + EntryPoint( + generate_earlyret_entry_for(btos), + generate_earlyret_entry_for(ztos), + generate_earlyret_entry_for(ctos), + generate_earlyret_entry_for(stos), + generate_earlyret_entry_for(atos), + generate_earlyret_entry_for(itos), + generate_earlyret_entry_for(ltos), + generate_earlyret_entry_for(ftos), + generate_earlyret_entry_for(dtos), + generate_earlyret_entry_for(vtos) + ); + } - { CodeletMark cm(_masm, "deoptimization entry points"); - for (int i = 0; i < Interpreter::number_of_deopt_entries; i++) { - Interpreter::_deopt_entry[i] = - EntryPoint( - generate_deopt_entry_for(itos, i), - generate_deopt_entry_for(itos, i), - generate_deopt_entry_for(itos, i), - generate_deopt_entry_for(itos, i), - generate_deopt_entry_for(atos, i), - generate_deopt_entry_for(itos, i), - generate_deopt_entry_for(ltos, i), - generate_deopt_entry_for(ftos, i), - generate_deopt_entry_for(dtos, i), - generate_deopt_entry_for(vtos, i) - ); - } - } + { CodeletMark cm(_masm, "deoptimization entry points"); + for (int i = 0; i < Interpreter::number_of_deopt_entries; i++) { + Interpreter::_deopt_entry[i] = + EntryPoint( + generate_deopt_entry_for(itos, i), + generate_deopt_entry_for(itos, i), + generate_deopt_entry_for(itos, i), + generate_deopt_entry_for(itos, i), + generate_deopt_entry_for(atos, i), + generate_deopt_entry_for(itos, i), + generate_deopt_entry_for(ltos, i), + generate_deopt_entry_for(ftos, i), + generate_deopt_entry_for(dtos, i), + generate_deopt_entry_for(vtos, i) + ); + } + } - { CodeletMark cm(_masm, "result handlers for native calls"); - // The various result converter stublets. - int is_generated[Interpreter::number_of_result_handlers]; - memset(is_generated, 0, sizeof(is_generated)); - - for (int i = 0; i < Interpreter::number_of_result_handlers; i++) { - BasicType type = types[i]; - if (!is_generated[Interpreter::BasicType_as_index(type)]++) { - Interpreter::_native_abi_to_tosca[Interpreter::BasicType_as_index(type)] = generate_result_handler_for(type); - } - } + { CodeletMark cm(_masm, "result handlers for native calls"); + // The various result converter stublets. + int is_generated[Interpreter::number_of_result_handlers]; + memset(is_generated, 0, sizeof(is_generated)); + + for (int i = 0; i < Interpreter::number_of_result_handlers; i++) { + BasicType type = types[i]; + if (!is_generated[Interpreter::BasicType_as_index(type)]++) { + Interpreter::_native_abi_to_tosca[Interpreter::BasicType_as_index(type)] = generate_result_handler_for(type); } + } + } - { CodeletMark cm(_masm, "continuation entry points"); - Interpreter::_continuation_entry = - EntryPoint( - generate_continuation_for(btos), - generate_continuation_for(ztos), - generate_continuation_for(ctos), - generate_continuation_for(stos), - generate_continuation_for(atos), - generate_continuation_for(itos), - generate_continuation_for(ltos), - generate_continuation_for(ftos), - generate_continuation_for(dtos), - generate_continuation_for(vtos) - ); - } + { CodeletMark cm(_masm, "continuation entry points"); + Interpreter::_continuation_entry = + EntryPoint( + generate_continuation_for(btos), + generate_continuation_for(ztos), + generate_continuation_for(ctos), + generate_continuation_for(stos), + generate_continuation_for(atos), + generate_continuation_for(itos), + generate_continuation_for(ltos), + generate_continuation_for(ftos), + generate_continuation_for(dtos), + generate_continuation_for(vtos) + ); + } - { CodeletMark cm(_masm, "safepoint entry points"); - Interpreter::_safept_entry = - EntryPoint( - generate_safept_entry_for(btos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), - generate_safept_entry_for(ztos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), - generate_safept_entry_for(ctos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), - generate_safept_entry_for(stos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), - generate_safept_entry_for(atos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), - generate_safept_entry_for(itos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), - generate_safept_entry_for(ltos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), - generate_safept_entry_for(ftos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), - generate_safept_entry_for(dtos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), - generate_safept_entry_for(vtos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)) - ); - } + { CodeletMark cm(_masm, "safepoint entry points"); + Interpreter::_safept_entry = + EntryPoint( + generate_safept_entry_for(btos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), + generate_safept_entry_for(ztos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), + generate_safept_entry_for(ctos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), + generate_safept_entry_for(stos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), + generate_safept_entry_for(atos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), + generate_safept_entry_for(itos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), + generate_safept_entry_for(ltos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), + generate_safept_entry_for(ftos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), + generate_safept_entry_for(dtos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)), + generate_safept_entry_for(vtos, CAST_FROM_FN_PTR(address, InterpreterRuntime::at_safepoint)) + ); + } - { CodeletMark cm(_masm, "exception handling"); - // (Note: this is not safepoint safe because thread may return to compiled code) - generate_throw_exception(); - } + { CodeletMark cm(_masm, "exception handling"); + // (Note: this is not safepoint safe because thread may return to compiled code) + generate_throw_exception(); + } - { CodeletMark cm(_masm, "throw exception entrypoints"); - Interpreter::_throw_ArrayIndexOutOfBoundsException_entry = generate_ArrayIndexOutOfBounds_handler("java/lang/ArrayIndexOutOfBoundsException"); - Interpreter::_throw_ArrayStoreException_entry = generate_klass_exception_handler("java/lang/ArrayStoreException" ); - Interpreter::_throw_ArithmeticException_entry = generate_exception_handler("java/lang/ArithmeticException" , "/ by zero"); - Interpreter::_throw_ClassCastException_entry = generate_ClassCastException_handler(); - Interpreter::_throw_NullPointerException_entry = generate_exception_handler("java/lang/NullPointerException" , NULL ); - Interpreter::_throw_StackOverflowError_entry = generate_StackOverflowError_handler(); - } + { CodeletMark cm(_masm, "throw exception entrypoints"); + Interpreter::_throw_ArrayIndexOutOfBoundsException_entry = generate_ArrayIndexOutOfBounds_handler("java/lang/ArrayIndexOutOfBoundsException"); + Interpreter::_throw_ArrayStoreException_entry = generate_klass_exception_handler("java/lang/ArrayStoreException" ); + Interpreter::_throw_ArithmeticException_entry = generate_exception_handler("java/lang/ArithmeticException" , "/ by zero"); + Interpreter::_throw_ClassCastException_entry = generate_ClassCastException_handler(); + Interpreter::_throw_NullPointerException_entry = generate_exception_handler("java/lang/NullPointerException" , NULL ); + Interpreter::_throw_StackOverflowError_entry = generate_StackOverflowError_handler(); + } #define method_entry(kind) \ - { CodeletMark cm(_masm, "method entry point (kind = " #kind ")"); \ - Interpreter::_entry_table[Interpreter::kind] = generate_method_entry(Interpreter::kind); \ - Interpreter::update_cds_entry_table(Interpreter::kind); \ - } + { CodeletMark cm(_masm, "method entry point (kind = " #kind ")"); \ + Interpreter::_entry_table[Interpreter::kind] = generate_method_entry(Interpreter::kind); \ + Interpreter::update_cds_entry_table(Interpreter::kind); \ + } - // all non-native method kinds - method_entry(zerolocals) - method_entry(zerolocals_synchronized) - method_entry(empty) - method_entry(accessor) - method_entry(abstract) - method_entry(java_lang_math_sin ) - method_entry(java_lang_math_cos ) - method_entry(java_lang_math_tan ) - method_entry(java_lang_math_abs ) - method_entry(java_lang_math_sqrt ) - method_entry(java_lang_math_log ) - method_entry(java_lang_math_log10) - method_entry(java_lang_math_exp ) - method_entry(java_lang_math_pow ) - method_entry(java_lang_math_fmaF ) - method_entry(java_lang_math_fmaD ) - method_entry(java_lang_ref_reference_get) - - AbstractInterpreter::initialize_method_handle_entries(); - - // all native method kinds (must be one contiguous block) - Interpreter::_native_entry_begin = Interpreter::code()->code_end(); - method_entry(native) - method_entry(native_synchronized) - Interpreter::_native_entry_end = Interpreter::code()->code_end(); - - method_entry(java_util_zip_CRC32_update) - method_entry(java_util_zip_CRC32_updateBytes) - method_entry(java_util_zip_CRC32_updateByteBuffer) - method_entry(java_util_zip_CRC32C_updateBytes) - method_entry(java_util_zip_CRC32C_updateDirectByteBuffer) - - method_entry(java_lang_Float_intBitsToFloat); - method_entry(java_lang_Float_floatToRawIntBits); - method_entry(java_lang_Double_longBitsToDouble); - method_entry(java_lang_Double_doubleToRawLongBits); + // all non-native method kinds + method_entry(zerolocals) + method_entry(zerolocals_synchronized) + method_entry(empty) + method_entry(accessor) + method_entry(abstract) + method_entry(java_lang_math_sin ) + method_entry(java_lang_math_cos ) + method_entry(java_lang_math_tan ) + method_entry(java_lang_math_abs ) + method_entry(java_lang_math_sqrt ) + method_entry(java_lang_math_log ) + method_entry(java_lang_math_log10) + method_entry(java_lang_math_exp ) + method_entry(java_lang_math_pow ) + method_entry(java_lang_math_fmaF ) + method_entry(java_lang_math_fmaD ) + method_entry(java_lang_ref_reference_get) + + AbstractInterpreter::initialize_method_handle_entries(); + + // all native method kinds (must be one contiguous block) + Interpreter::_native_entry_begin = Interpreter::code()->code_end(); + method_entry(native) + method_entry(native_synchronized) + Interpreter::_native_entry_end = Interpreter::code()->code_end(); + + method_entry(java_util_zip_CRC32_update) + method_entry(java_util_zip_CRC32_updateBytes) + method_entry(java_util_zip_CRC32_updateByteBuffer) + method_entry(java_util_zip_CRC32C_updateBytes) + method_entry(java_util_zip_CRC32C_updateDirectByteBuffer) + + method_entry(java_lang_Float_intBitsToFloat); + method_entry(java_lang_Float_floatToRawIntBits); + method_entry(java_lang_Double_longBitsToDouble); + method_entry(java_lang_Double_doubleToRawLongBits); #undef method_entry - // Bytecodes - set_entry_points_for_all_bytes(); - } - } while (CodeCacheExtensions::needs_other_interpreter_variant()); + // Bytecodes + set_entry_points_for_all_bytes(); // installation of code in other places in the runtime // (ExcutableCodeManager calls not needed to copy the entries) @@ -314,9 +307,6 @@ void TemplateInterpreterGenerator::set_entry_points(Bytecodes::Code code) { - if (CodeCacheExtensions::skip_template_interpreter_entries(code)) { - return; - } CodeletMark cm(_masm, Bytecodes::name(code), code); // initialize entry points assert(_unimplemented_bytecode != NULL, "should have been generated before"); @@ -347,7 +337,6 @@ EntryPoint entry(bep, zep, cep, sep, aep, iep, lep, fep, dep, vep); Interpreter::_normal_table.set_entry(code, entry); Interpreter::_wentry_point[code] = wep; - CodeCacheExtensions::completed_template_interpreter_entries(_masm, code); } --- old/hotspot/src/share/vm/memory/virtualspace.cpp 2016-12-02 11:15:50.381150970 -0500 +++ new/hotspot/src/share/vm/memory/virtualspace.cpp 2016-12-02 11:15:48.629051613 -0500 @@ -23,7 +23,6 @@ */ #include "precompiled.hpp" -#include "code/codeCacheExtensions.hpp" #include "logging/log.hpp" #include "memory/resourceArea.hpp" #include "memory/virtualspace.hpp" @@ -592,7 +591,7 @@ ReservedCodeSpace::ReservedCodeSpace(size_t r_size, size_t rs_align, bool large) : - ReservedSpace(r_size, rs_align, large, /*executable*/ CodeCacheExtensions::support_dynamic_code()) { + ReservedSpace(r_size, rs_align, large, /*executable*/ true) { MemTracker::record_virtual_memory_type((address)base(), mtCode); } --- old/hotspot/src/share/vm/precompiled/precompiled.hpp 2016-12-02 11:15:55.761456075 -0500 +++ new/hotspot/src/share/vm/precompiled/precompiled.hpp 2016-12-02 11:15:54.105362162 -0500 @@ -66,7 +66,6 @@ # include "classfile/vmSymbols.hpp" # include "code/codeBlob.hpp" # include "code/codeCache.hpp" -# include "code/codeCacheExtensions.hpp" # include "code/compressedStream.hpp" # include "code/debugInfo.hpp" # include "code/debugInfoRec.hpp" --- old/hotspot/src/share/vm/prims/methodHandles.cpp 2016-12-02 11:16:01.073757323 -0500 +++ new/hotspot/src/share/vm/prims/methodHandles.cpp 2016-12-02 11:15:59.509668628 -0500 @@ -26,7 +26,6 @@ #include "classfile/javaClasses.inline.hpp" #include "classfile/stringTable.hpp" #include "code/codeCache.hpp" -#include "code/codeCacheExtensions.hpp" #include "code/dependencyContext.hpp" #include "compiler/compileBroker.hpp" #include "interpreter/interpreter.hpp" @@ -94,7 +93,6 @@ StubCodeMark mark(this, "MethodHandle::interpreter_entry", vmIntrinsics::name_at(iid)); address entry = MethodHandles::generate_method_handle_interpreter_entry(_masm, iid); if (entry != NULL) { - CodeCacheExtensions::handle_generated_pc(entry, vmIntrinsics::name_at(iid)); Interpreter::set_entry_for_kind(mk, entry); } // If the entry is not set, it will throw AbstractMethodError. --- old/hotspot/src/share/vm/runtime/arguments.cpp 2016-12-02 11:16:06.098042239 -0500 +++ new/hotspot/src/share/vm/runtime/arguments.cpp 2016-12-02 11:16:04.385945150 -0500 @@ -27,7 +27,6 @@ #include "classfile/javaAssertions.hpp" #include "classfile/stringTable.hpp" #include "classfile/symbolTable.hpp" -#include "code/codeCacheExtensions.hpp" #include "gc/shared/cardTableRS.hpp" #include "gc/shared/genCollectedHeap.hpp" #include "gc/shared/referenceProcessor.hpp" @@ -1877,7 +1876,6 @@ #endif // _LP64 #endif // !ZERO - CodeCacheExtensions::set_ergonomics_flags(); } void Arguments::set_parallel_gc_flags() { --- old/hotspot/src/share/vm/runtime/init.cpp 2016-12-02 11:16:11.266335320 -0500 +++ new/hotspot/src/share/vm/runtime/init.cpp 2016-12-02 11:16:09.526236644 -0500 @@ -25,7 +25,6 @@ #include "precompiled.hpp" #include "classfile/stringTable.hpp" #include "classfile/symbolTable.hpp" -#include "code/codeCacheExtensions.hpp" #include "code/icBuffer.hpp" #include "gc/shared/collectedHeap.hpp" #include "interpreter/bytecodes.hpp" @@ -105,20 +104,15 @@ classLoader_init1(); compilationPolicy_init(); codeCache_init(); - CodeCacheExtensions::initialize(); VM_Version_init(); - CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::VMVersion); os_init_globals(); stubRoutines_init1(); - CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::StubRoutines1); jint status = universe_init(); // dependent on codeCache_init and // stubRoutines_init1 and metaspace_init. if (status != JNI_OK) return status; - CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::Universe); interpreter_init(); // before any methods loaded - CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::Interpreter); invocationCounter_init(); // before any methods loaded marksweep_init(); accessFlags_init(); @@ -148,7 +142,6 @@ javaClasses_init(); // must happen after vtable initialization stubRoutines_init2(); // note: StubRoutines need 2-phase init MethodHandles::generate_adapters(); - CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::StubRoutines2); #if INCLUDE_NMT // Solaris stack is walkable only after stubRoutines are set up. @@ -162,7 +155,6 @@ CommandLineFlags::printFlags(tty, false, PrintFlagsRanges); } - CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::InitGlobals); return JNI_OK; } --- old/hotspot/src/share/vm/runtime/sharedRuntime.cpp 2016-12-02 11:16:16.362624319 -0500 +++ new/hotspot/src/share/vm/runtime/sharedRuntime.cpp 2016-12-02 11:16:14.690529498 -0500 @@ -28,7 +28,6 @@ #include "classfile/vmSymbols.hpp" #include "code/codeCache.hpp" #include "code/compiledIC.hpp" -#include "code/codeCacheExtensions.hpp" #include "code/scopeDesc.hpp" #include "code/vtableStubs.hpp" #include "compiler/abstractCompiler.hpp" @@ -2567,27 +2566,15 @@ if (_adapters != NULL) return; _adapters = new AdapterHandlerTable(); - if (!CodeCacheExtensions::skip_compiler_support()) { - // Create a special handler for abstract methods. Abstract methods - // are never compiled so an i2c entry is somewhat meaningless, but - // throw AbstractMethodError just in case. - // Pass wrong_method_abstract for the c2i transitions to return - // AbstractMethodError for invalid invocations. - address wrong_method_abstract = SharedRuntime::get_handle_wrong_method_abstract_stub(); - _abstract_method_handler = AdapterHandlerLibrary::new_entry(new AdapterFingerPrint(0, NULL), - StubRoutines::throw_AbstractMethodError_entry(), - wrong_method_abstract, wrong_method_abstract); - } else { - // Adapters are not supposed to be used. - // Generate a special one to cause an error if used (and store this - // singleton in place of the useless _abstract_method_error adapter). - address entry = (address) &unexpected_adapter_call; - _abstract_method_handler = AdapterHandlerLibrary::new_entry(new AdapterFingerPrint(0, NULL), - entry, - entry, - entry); - - } + // Create a special handler for abstract methods. Abstract methods + // are never compiled so an i2c entry is somewhat meaningless, but + // throw AbstractMethodError just in case. + // Pass wrong_method_abstract for the c2i transitions to return + // AbstractMethodError for invalid invocations. + address wrong_method_abstract = SharedRuntime::get_handle_wrong_method_abstract_stub(); + _abstract_method_handler = AdapterHandlerLibrary::new_entry(new AdapterFingerPrint(0, NULL), + StubRoutines::throw_AbstractMethodError_entry(), + wrong_method_abstract, wrong_method_abstract); } AdapterHandlerEntry* AdapterHandlerLibrary::new_entry(AdapterFingerPrint* fingerprint, @@ -2638,17 +2625,6 @@ // make sure data structure is initialized initialize(); - // during dump time, always generate adapters, even if the - // compiler has been turned off. - if (!DumpSharedSpaces && CodeCacheExtensions::skip_compiler_support()) { - // adapters are useless and should not be used, including the - // abstract_method_handler. However, some callers check that - // an adapter was installed. - // Return the singleton adapter, stored into _abstract_method_handler - // and modified to cause an error if we ever call it. - return _abstract_method_handler; - } - if (method->is_abstract()) { return _abstract_method_handler; } --- old/hotspot/src/share/vm/runtime/stubCodeGenerator.cpp 2016-12-02 11:16:21.866936456 -0500 +++ new/hotspot/src/share/vm/runtime/stubCodeGenerator.cpp 2016-12-02 11:16:20.214842770 -0500 @@ -26,7 +26,6 @@ #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" #include "code/codeCache.hpp" -#include "code/codeCacheExtensions.hpp" #include "compiler/disassembler.hpp" #include "oops/oop.inline.hpp" #include "prims/forte.hpp" --- old/hotspot/src/share/vm/runtime/stubRoutines.cpp 2016-12-02 11:16:27.099233167 -0500 +++ new/hotspot/src/share/vm/runtime/stubRoutines.cpp 2016-12-02 11:16:25.495142200 -0500 @@ -24,7 +24,6 @@ #include "precompiled.hpp" #include "asm/codeBuffer.hpp" -#include "code/codeCacheExtensions.hpp" #include "memory/resourceArea.hpp" #include "oops/oop.inline.hpp" #include "runtime/interfaceSupport.hpp" @@ -204,12 +203,6 @@ // simple tests of generated arraycopy functions static void test_arraycopy_func(address func, int alignment) { - if (CodeCacheExtensions::use_pregenerated_interpreter() || !CodeCacheExtensions::is_executable(func)) { - // Exit safely if stubs were generated but cannot be used. - // Also excluding pregenerated interpreter since the code may depend on - // some registers being properly initialized (for instance Rthread) - return; - } int v = 0xcc; int v2 = 0x11; jlong lbuffer[8]; --- old/hotspot/src/share/vm/runtime/thread.cpp 2016-12-02 11:16:33.847615848 -0500 +++ new/hotspot/src/share/vm/runtime/thread.cpp 2016-12-02 11:16:31.635490404 -0500 @@ -29,7 +29,6 @@ #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" #include "code/codeCache.hpp" -#include "code/codeCacheExtensions.hpp" #include "code/scopeDesc.hpp" #include "compiler/compileBroker.hpp" #include "compiler/compileTask.hpp" @@ -3842,8 +3841,6 @@ } } - CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::CreateVM); - create_vm_timer.end(); #ifdef ASSERT _vm_complete = true; --- old/hotspot/src/share/vm/runtime/vm_operations.cpp 2016-12-02 11:16:38.975906660 -0500 +++ new/hotspot/src/share/vm/runtime/vm_operations.cpp 2016-12-02 11:16:37.363815242 -0500 @@ -26,7 +26,6 @@ #include "classfile/symbolTable.hpp" #include "classfile/vmSymbols.hpp" #include "code/codeCache.hpp" -#include "code/codeCacheExtensions.hpp" #include "compiler/compileBroker.hpp" #include "gc/shared/isGCActiveMark.hpp" #include "logging/log.hpp" @@ -390,7 +389,6 @@ Thread * VM_Exit::_shutdown_thread = NULL; int VM_Exit::set_vm_exited() { - CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::LastStep); Thread * thr_cur = Thread::current(); --- old/hotspot/src/share/vm/runtime/vm_version.cpp 2016-12-02 11:16:43.996191347 -0500 +++ new/hotspot/src/share/vm/runtime/vm_version.cpp 2016-12-02 11:16:42.380099706 -0500 @@ -23,7 +23,6 @@ */ #include "precompiled.hpp" -#include "code/codeCacheExtensions.hpp" #include "logging/log.hpp" #include "memory/universe.hpp" #include "oops/oop.inline.hpp" @@ -127,9 +126,6 @@ const char* Abstract_VM_Version::vm_info_string() { - if (CodeCacheExtensions::use_pregenerated_interpreter()) { - return "interpreted mode, pregenerated"; - } switch (Arguments::mode()) { case Arguments::_int: return UseSharedSpaces ? "interpreted mode, sharing" : "interpreted mode"; --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/abstractInterpreter_arm.cpp 2016-12-02 11:16:47.768405262 -0500 @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "interpreter/bytecode.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/constMethod.hpp" +#include "oops/method.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/synchronizer.hpp" +#include "utilities/macros.hpp" + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { +#ifdef AARCH64 + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : // fall through + case T_LONG : // fall through + case T_VOID : // fall through + case T_FLOAT : // fall through + case T_DOUBLE : i = 4; break; + case T_OBJECT : // fall through + case T_ARRAY : i = 5; break; +#else + case T_VOID : i = 0; break; + case T_BOOLEAN: i = 1; break; + case T_CHAR : i = 2; break; + case T_BYTE : i = 3; break; + case T_SHORT : i = 4; break; + case T_INT : i = 5; break; + case T_OBJECT : // fall through + case T_ARRAY : i = 6; break; + case T_LONG : i = 7; break; + case T_FLOAT : i = 8; break; + case T_DOUBLE : i = 9; break; +#endif // AARCH64 + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds"); + return i; +} + +// These should never be compiled since the interpreter will prefer +// the compiled version to the intrinsic version. +bool AbstractInterpreter::can_be_compiled(methodHandle m) { + switch (method_kind(m)) { + case Interpreter::java_lang_math_sin : // fall thru + case Interpreter::java_lang_math_cos : // fall thru + case Interpreter::java_lang_math_tan : // fall thru + case Interpreter::java_lang_math_abs : // fall thru + case Interpreter::java_lang_math_log : // fall thru + case Interpreter::java_lang_math_log10 : // fall thru + case Interpreter::java_lang_math_sqrt : + return false; + default: + return true; + } +} + +// How much stack a method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + const int stub_code = AARCH64_ONLY(24) NOT_AARCH64(12); // see generate_call_stub + // Save space for one monitor to get into the interpreted method in case + // the method is synchronized + int monitor_size = method->is_synchronized() ? + 1*frame::interpreter_frame_monitor_size() : 0; + + // total overhead size: monitor_size + (sender SP, thru expr stack bottom). + // be sure to change this if you add/subtract anything to/from the overhead area + const int overhead_size = monitor_size + + (frame::sender_sp_offset - frame::interpreter_frame_initial_sp_offset); + const int method_stack = (method->max_locals() + method->max_stack()) * + Interpreter::stackElementWords; + return overhead_size + method_stack + stub_code; +} + +// asm based interpreter deoptimization helpers +int AbstractInterpreter::size_activation(int max_stack, + int tempcount, + int extra_args, + int moncount, + int callee_param_count, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in TemplateInterpreterGenerator::generate_fixed_frame. + // fixed size of an interpreter frame: + int overhead = frame::sender_sp_offset - frame::interpreter_frame_initial_sp_offset; + + // Our locals were accounted for by the caller (or last_frame_adjust on the transistion) + // Since the callee parameters already account for the callee's params we only need to account for + // the extra locals. + + int size = overhead + + ((callee_locals - callee_param_count)*Interpreter::stackElementWords) + + (moncount*frame::interpreter_frame_monitor_size()) + + tempcount*Interpreter::stackElementWords + extra_args; + +#ifdef AARCH64 + size = round_to(size, StackAlignmentInBytes/BytesPerWord); +#endif // AARCH64 + + return size; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + + // Set up the method, locals, and monitors. + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a skeletal state + // NOTE: return size is in words not bytes + + // fixed size of an interpreter frame: + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; + +#ifdef ASSERT + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable"); +#endif + + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and interpreter_frame_sender_sp + // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) + // and sender_sp is (fp + sender_sp_offset*wordSize) + +#ifdef AARCH64 + intptr_t* locals; + if (caller->is_interpreted_frame()) { + // attach locals to the expression stack of caller interpreter frame + locals = caller->interpreter_frame_tos_address() + caller_actual_parameters*Interpreter::stackElementWords - 1; + } else { + assert (is_bottom_frame, "should be"); + locals = interpreter_frame->fp() + frame::sender_sp_offset + method->max_locals() - 1; + } + + if (TraceDeoptimization) { + tty->print_cr("layout_activation:"); + + if (caller->is_entry_frame()) { + tty->print("entry "); + } + if (caller->is_compiled_frame()) { + tty->print("compiled "); + } + if (caller->is_interpreted_frame()) { + tty->print("interpreted "); + } + tty->print_cr("caller: sp=%p, unextended_sp=%p, fp=%p, pc=%p", caller->sp(), caller->unextended_sp(), caller->fp(), caller->pc()); + tty->print_cr("interpreter_frame: sp=%p, unextended_sp=%p, fp=%p, pc=%p", interpreter_frame->sp(), interpreter_frame->unextended_sp(), interpreter_frame->fp(), interpreter_frame->pc()); + tty->print_cr("method: max_locals = %d, size_of_parameters = %d", method->max_locals(), method->size_of_parameters()); + tty->print_cr("caller_actual_parameters = %d", caller_actual_parameters); + tty->print_cr("locals = %p", locals); + } + +#ifdef ASSERT + if (caller_actual_parameters != method->size_of_parameters()) { + assert(caller->is_interpreted_frame(), "adjusted caller_actual_parameters, but caller is not interpreter frame"); + Bytecode_invoke inv(caller->interpreter_frame_method(), caller->interpreter_frame_bci()); + + if (is_bottom_frame) { + assert(caller_actual_parameters == 0, "invalid adjusted caller_actual_parameters value for bottom frame"); + assert(inv.is_invokedynamic() || inv.is_invokehandle(), "adjusted caller_actual_parameters for bottom frame, but not invokedynamic/invokehandle"); + } else { + assert(caller_actual_parameters == method->size_of_parameters()+1, "invalid adjusted caller_actual_parameters value"); + assert(!inv.is_invokedynamic() && MethodHandles::has_member_arg(inv.klass(), inv.name()), "adjusted caller_actual_parameters, but no member arg"); + } + } + if (caller->is_interpreted_frame()) { + intptr_t* locals_base = (locals - method->max_locals()*Interpreter::stackElementWords + 1); + locals_base = (intptr_t*)round_down((intptr_t)locals_base, StackAlignmentInBytes); + assert(interpreter_frame->sender_sp() <= locals_base, "interpreter-to-interpreter frame chaining"); + + } else if (caller->is_compiled_frame()) { + assert(locals + 1 <= caller->unextended_sp(), "compiled-to-interpreter frame chaining"); + + } else { + assert(caller->is_entry_frame(), "should be"); + assert(locals + 1 <= caller->fp(), "entry-to-interpreter frame chaining"); + } +#endif // ASSERT + +#else + intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; +#endif // AARCH64 + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(monbot); + + // Set last_sp + intptr_t* stack_top = (intptr_t*) monbot - + tempcount*Interpreter::stackElementWords - + popframe_extra_args; +#ifdef AARCH64 + interpreter_frame->interpreter_frame_set_stack_top(stack_top); + + intptr_t* extended_sp = (intptr_t*) monbot - + (method->max_stack() + 1) * Interpreter::stackElementWords - // +1 is reserved slot for exception handler + popframe_extra_args; + extended_sp = (intptr_t*)round_down((intptr_t)extended_sp, StackAlignmentInBytes); + interpreter_frame->interpreter_frame_set_extended_sp(extended_sp); +#else + interpreter_frame->interpreter_frame_set_last_sp(stack_top); +#endif // AARCH64 + + // All frames but the initial (oldest) interpreter frame we fill in have a + // value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + +#ifdef AARCH64 + if (caller->is_interpreted_frame()) { + intptr_t* sender_sp = (intptr_t*)round_down((intptr_t)caller->interpreter_frame_tos_address(), StackAlignmentInBytes); + interpreter_frame->set_interpreter_frame_sender_sp(sender_sp); + + } else { + // in case of non-interpreter caller sender_sp of the oldest frame is already + // set to valid value + } +#else + if (extra_locals != 0 && + interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); + } +#endif // AARCH64 + + *interpreter_frame->interpreter_frame_cache_addr() = + method->constants()->cache(); + *interpreter_frame->interpreter_frame_mirror_addr() = + method->method_holder()->java_mirror(); +} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/arm.ad 2016-12-02 11:16:52.812691310 -0500 @@ -0,0 +1,14428 @@ +// +// Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +// ARM Architecture Description File + +//----------DEFINITION BLOCK--------------------------------------------------- +// Define name --> value mappings to inform the ADLC of an integer valued name +// Current support includes integer values in the range [0, 0x7FFFFFFF] +// Format: +// int_def ( , ); +// Generated Code in ad_.hpp +// #define () +// // value == +// Generated code in ad_.cpp adlc_verification() +// assert( == , "Expect () to equal "); +// +definitions %{ +// The default cost (of an ALU instruction). + int_def DEFAULT_COST ( 100, 100); + int_def HUGE_COST (1000000, 1000000); + +// Memory refs are twice as expensive as run-of-the-mill. + int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); + +// Branches are even more expensive. + int_def BRANCH_COST ( 300, DEFAULT_COST * 3); + int_def CALL_COST ( 300, DEFAULT_COST * 3); +%} + + +//----------SOURCE BLOCK------------------------------------------------------- +// This is a block of C++ code which provides values, functions, and +// definitions necessary in the rest of the architecture description +source_hpp %{ +// Header information of the source block. +// Method declarations/definitions which are used outside +// the ad-scope can conveniently be defined here. +// +// To keep related declarations/definitions/uses close together, +// we switch between source %{ }% and source_hpp %{ }% freely as needed. + +// Does destination need to be loaded in a register then passed to a +// branch instruction? +extern bool maybe_far_call(const CallNode *n); +extern bool maybe_far_call(const MachCallNode *n); +static inline bool cache_reachable() { + return MacroAssembler::_cache_fully_reachable(); +} + +#ifdef AARCH64 +#define ldr_32 ldr_w +#define str_32 str_w +#else +#define ldr_32 ldr +#define str_32 str +#define tst_32 tst +#define teq_32 teq +#endif +#if 1 +extern bool PrintOptoAssembly; +#endif + +class c2 { +public: + static OptoRegPair return_value(int ideal_reg); +}; + +class CallStubImpl { + + //-------------------------------------------------------------- + //---< Used for optimization in Compile::Shorten_branches >--- + //-------------------------------------------------------------- + + public: + // Size of call trampoline stub. + static uint size_call_trampoline() { + return 0; // no call trampolines on this platform + } + + // number of relocations needed by a call trampoline stub + static uint reloc_call_trampoline() { + return 0; // no call trampolines on this platform + } +}; + +class HandlerImpl { + + public: + + static int emit_exception_handler(CodeBuffer &cbuf); + static int emit_deopt_handler(CodeBuffer& cbuf); + + static uint size_exception_handler() { +#ifdef AARCH64 + // ldr_literal; br; (pad); + return 3 * Assembler::InstructionSize + wordSize; +#else + return ( 3 * 4 ); +#endif + } + + + static uint size_deopt_handler() { + return ( 9 * 4 ); + } + +}; + +%} + +source %{ +#define __ _masm. + +static FloatRegister reg_to_FloatRegister_object(int register_encoding); +static Register reg_to_register_object(int register_encoding); + + +// **************************************************************************** + +// REQUIRED FUNCTIONALITY + +// Indicate if the safepoint node needs the polling page as an input. +// Since ARM does not have absolute addressing, it does. +bool SafePointNode::needs_polling_address_input() { + return true; +} + +// emit an interrupt that is caught by the debugger (for debugging compiler) +void emit_break(CodeBuffer &cbuf) { + MacroAssembler _masm(&cbuf); + __ breakpoint(); +} + +#ifndef PRODUCT +void MachBreakpointNode::format( PhaseRegAlloc *, outputStream *st ) const { + st->print("TA"); +} +#endif + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + emit_break(cbuf); +} + +uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + + +void emit_nop(CodeBuffer &cbuf) { + MacroAssembler _masm(&cbuf); + __ nop(); +} + + +void emit_call_reloc(CodeBuffer &cbuf, const MachCallNode *n, MachOper *m, RelocationHolder const& rspec) { + int ret_addr_offset0 = n->as_MachCall()->ret_addr_offset(); + int call_site_offset = cbuf.insts()->mark_off(); + MacroAssembler _masm(&cbuf); + __ set_inst_mark(); // needed in emit_to_interp_stub() to locate the call + address target = (address)m->method(); + assert(n->as_MachCall()->entry_point() == target, "sanity"); + assert(maybe_far_call(n) == !__ reachable_from_cache(target), "sanity"); + assert(cache_reachable() == __ cache_fully_reachable(), "sanity"); + + assert(target != NULL, "need real address"); + + int ret_addr_offset = -1; + if (rspec.type() == relocInfo::runtime_call_type) { + __ call(target, rspec); + ret_addr_offset = __ offset(); + } else { + // scratches Rtemp + ret_addr_offset = __ patchable_call(target, rspec, true); + } + assert(ret_addr_offset - call_site_offset == ret_addr_offset0, "fix ret_addr_offset()"); +} + +//============================================================================= +// REQUIRED FUNCTIONALITY for encoding +void emit_lo(CodeBuffer &cbuf, int val) { } +void emit_hi(CodeBuffer &cbuf, int val) { } + + +//============================================================================= +const RegMask& MachConstantBaseNode::_out_RegMask = PTR_REG_mask(); + +int Compile::ConstantTable::calculate_table_base_offset() const { +#ifdef AARCH64 + return 0; +#else + int offset = -(size() / 2); + // flds, fldd: 8-bit offset multiplied by 4: +/- 1024 + // ldr, ldrb : 12-bit offset: +/- 4096 + if (!Assembler::is_simm10(offset)) { + offset = Assembler::min_simm10(); + } + return offset; +#endif +} + +bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } +void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { + ShouldNotReachHere(); +} + +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + Compile* C = ra_->C; + Compile::ConstantTable& constant_table = C->constant_table(); + MacroAssembler _masm(&cbuf); + + Register r = as_Register(ra_->get_encode(this)); + CodeSection* consts_section = __ code()->consts(); + int consts_size = consts_section->align_at_start(consts_section->size()); + assert(constant_table.size() == consts_size, "must be: %d == %d", constant_table.size(), consts_size); + + // Materialize the constant table base. + address baseaddr = consts_section->start() + -(constant_table.table_base_offset()); + RelocationHolder rspec = internal_word_Relocation::spec(baseaddr); + __ mov_address(r, baseaddr, rspec); +} + +uint MachConstantBaseNode::size(PhaseRegAlloc*) const { +#ifdef AARCH64 + return 5 * Assembler::InstructionSize; +#else + return 8; +#endif +} + +#ifndef PRODUCT +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + char reg[128]; + ra_->dump_register(this, reg); + st->print("MOV_SLOW &constanttable,%s\t! constant table base", reg); +} +#endif + +#ifndef PRODUCT +void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + Compile* C = ra_->C; + + for (int i = 0; i < OptoPrologueNops; i++) { + st->print_cr("NOP"); st->print("\t"); + } +#ifdef AARCH64 + if (OptoPrologueNops <= 0) { + st->print_cr("NOP\t! required for safe patching"); + st->print("\t"); + } +#endif + + size_t framesize = C->frame_size_in_bytes(); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + int bangsize = C->bang_size_in_bytes(); + // Remove two words for return addr and rbp, + framesize -= 2*wordSize; + bangsize -= 2*wordSize; + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be careful, because + // some VM calls (such as call site linkage) can use several kilobytes of + // stack. But the stack safety zone should account for that. + // See bugs 4446381, 4468289, 4497237. + if (C->need_stack_bang(bangsize)) { + st->print_cr("! stack bang (%d bytes)", bangsize); st->print("\t"); + } + st->print_cr("PUSH R_FP|R_LR_LR"); st->print("\t"); + if (framesize != 0) { + st->print ("SUB R_SP, R_SP, " SIZE_FORMAT,framesize); + } +} +#endif + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + + for (int i = 0; i < OptoPrologueNops; i++) { + __ nop(); + } +#ifdef AARCH64 + if (OptoPrologueNops <= 0) { + __ nop(); // required for safe patching by patch_verified_entry() + } +#endif + + size_t framesize = C->frame_size_in_bytes(); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + int bangsize = C->bang_size_in_bytes(); + // Remove two words for return addr and fp, + framesize -= 2*wordSize; + bangsize -= 2*wordSize; + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be careful, because + // some VM calls (such as call site linkage) can use several kilobytes of + // stack. But the stack safety zone should account for that. + // See bugs 4446381, 4468289, 4497237. + if (C->need_stack_bang(bangsize)) { + __ arm_stack_overflow_check(bangsize, Rtemp); + } + + __ raw_push(FP, LR); + if (framesize != 0) { + __ sub_slow(SP, SP, framesize); + } + + // offset from scratch buffer is not valid + if (strcmp(cbuf.name(), "Compile::Fill_buffer") == 0) { + C->set_frame_complete( __ offset() ); + } + + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. + Compile::ConstantTable& constant_table = C->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } +} + +uint MachPrologNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +int MachPrologNode::reloc() const { + return 10; // a large enough number +} + +//============================================================================= +#ifndef PRODUCT +void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + Compile* C = ra_->C; + + size_t framesize = C->frame_size_in_bytes(); + framesize -= 2*wordSize; + + if (framesize != 0) { + st->print("ADD R_SP, R_SP, " SIZE_FORMAT "\n\t",framesize); + } + st->print("POP R_FP|R_LR_LR"); + + if (do_polling() && ra_->C->is_method_compilation()) { + st->print("\n\t"); +#ifdef AARCH64 + if (MacroAssembler::page_reachable_from_cache(os::get_polling_page())) { + st->print("ADRP Rtemp, #PollAddr\t! Load Polling address\n\t"); + st->print("LDR ZR,[Rtemp + #PollAddr & 0xfff]\t!Poll for Safepointing"); + } else { + st->print("mov_slow Rtemp, #PollAddr\t! Load Polling address\n\t"); + st->print("LDR ZR,[Rtemp]\t!Poll for Safepointing"); + } +#else + st->print("MOV Rtemp, #PollAddr\t! Load Polling address\n\t"); + st->print("LDR Rtemp,[Rtemp]\t!Poll for Safepointing"); +#endif + } +} +#endif + +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + Compile* C = ra_->C; + + size_t framesize = C->frame_size_in_bytes(); + framesize -= 2*wordSize; + if (framesize != 0) { + __ add_slow(SP, SP, framesize); + } + __ raw_pop(FP, LR); + + // If this does safepoint polling, then do it here + if (do_polling() && ra_->C->is_method_compilation()) { +#ifdef AARCH64 + if (false && MacroAssembler::page_reachable_from_cache(os::get_polling_page())) { +/* FIXME: TODO + __ relocate(relocInfo::xxx); + __ adrp(Rtemp, (intptr_t)os::get_polling_page()); + __ relocate(relocInfo::poll_return_type); + int offset = os::get_polling_page() & 0xfff; + __ ldr(ZR, Address(Rtemp + offset)); +*/ + } else { + __ mov_address(Rtemp, (address)os::get_polling_page(), symbolic_Relocation::polling_page_reference); + __ relocate(relocInfo::poll_return_type); + __ ldr(ZR, Address(Rtemp)); + } +#else + // mov_slow here is usually one or two instruction + __ mov_address(Rtemp, (address)os::get_polling_page(), symbolic_Relocation::polling_page_reference); + __ relocate(relocInfo::poll_return_type); + __ ldr(Rtemp, Address(Rtemp)); +#endif + } +} + +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { +#ifdef AARCH64 + // allow for added alignment nop from mov_address bind_literal + return MachNode::size(ra_) + 1 * Assembler::InstructionSize; +#else + return MachNode::size(ra_); +#endif +} + +int MachEpilogNode::reloc() const { + return 16; // a large enough number +} + +const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); +} + +int MachEpilogNode::safepoint_offset() const { + assert( do_polling(), "no return for this epilog node"); + // return MacroAssembler::size_of_sethi(os::get_polling_page()); + Unimplemented(); + return 0; +} + +//============================================================================= + +// Figure out which register class each belongs in: rc_int, rc_float, rc_stack +enum RC { rc_bad, rc_int, rc_float, rc_stack }; +static enum RC rc_class( OptoReg::Name reg ) { + if (!OptoReg::is_valid(reg)) return rc_bad; + if (OptoReg::is_stack(reg)) return rc_stack; + VMReg r = OptoReg::as_VMReg(reg); + if (r->is_Register()) return rc_int; + assert(r->is_FloatRegister(), "must be"); + return rc_float; +} + +static inline bool is_iRegLd_memhd(OptoReg::Name src_first, OptoReg::Name src_second, int offset) { +#ifdef AARCH64 + return is_memoryHD(offset); +#else + int rlo = Matcher::_regEncode[src_first]; + int rhi = Matcher::_regEncode[src_second]; + if (!((rlo&1)==0 && (rlo+1 == rhi))) { + tty->print_cr("CAUGHT BAD LDRD/STRD"); + } + return (rlo&1)==0 && (rlo+1 == rhi) && is_memoryHD(offset); +#endif +} + +uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, + PhaseRegAlloc *ra_, + bool do_size, + outputStream* st ) const { + // Get registers to move + OptoReg::Name src_second = ra_->get_reg_second(in(1)); + OptoReg::Name src_first = ra_->get_reg_first(in(1)); + OptoReg::Name dst_second = ra_->get_reg_second(this ); + OptoReg::Name dst_first = ra_->get_reg_first(this ); + + enum RC src_second_rc = rc_class(src_second); + enum RC src_first_rc = rc_class(src_first); + enum RC dst_second_rc = rc_class(dst_second); + enum RC dst_first_rc = rc_class(dst_first); + + assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); + + // Generate spill code! + int size = 0; + + if (src_first == dst_first && src_second == dst_second) + return size; // Self copy, no move + +#ifdef TODO + if (bottom_type()->isa_vect() != NULL) { + } +#endif + + // Shared code does not expect instruction set capability based bailouts here. + // Handle offset unreachable bailout with minimal change in shared code. + // Bailout only for real instruction emit. + // This requires a single comment change in shared code. ( see output.cpp "Normal" instruction case ) + + MacroAssembler _masm(cbuf); + + // -------------------------------------- + // Check for mem-mem move. Load into unused float registers and fall into + // the float-store case. + if (src_first_rc == rc_stack && dst_first_rc == rc_stack) { + int offset = ra_->reg2offset(src_first); + if (cbuf && !is_memoryfp(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + if (src_second_rc != rc_bad) { + assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous"); + src_first = OptoReg::Name(R_mem_copy_lo_num); + src_second = OptoReg::Name(R_mem_copy_hi_num); + src_first_rc = rc_float; + src_second_rc = rc_float; + if (cbuf) { + __ ldr_double(Rmemcopy, Address(SP, offset)); + } else if (!do_size) { + st->print(LDR_DOUBLE " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset); + } + } else { + src_first = OptoReg::Name(R_mem_copy_lo_num); + src_first_rc = rc_float; + if (cbuf) { + __ ldr_float(Rmemcopy, Address(SP, offset)); + } else if (!do_size) { + st->print(LDR_FLOAT " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset); + } + } + size += 4; + } + } + + if (src_second_rc == rc_stack && dst_second_rc == rc_stack) { + Unimplemented(); + } + + // -------------------------------------- + // Check for integer reg-reg copy + if (src_first_rc == rc_int && dst_first_rc == rc_int) { + // Else normal reg-reg copy + assert( src_second != dst_first, "smashed second before evacuating it" ); + if (cbuf) { + __ mov(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else if (!do_size) { + st->print("MOV R_%s, R_%s\t# spill", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } +#ifdef AARCH64 + if (src_first+1 == src_second && dst_first+1 == dst_second) { + return size + 4; + } +#endif + size += 4; + } + + // Check for integer store + if (src_first_rc == rc_int && dst_first_rc == rc_stack) { + int offset = ra_->reg2offset(dst_first); + if (cbuf && !is_memoryI(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + if (src_second_rc != rc_bad && is_iRegLd_memhd(src_first, src_second, offset)) { + assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous"); + if (cbuf) { + __ str_64(reg_to_register_object(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(STR_64 " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first), offset); +#endif + } + return size + 4; + } else { + if (cbuf) { + __ str_32(reg_to_register_object(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(STR_32 " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first), offset); +#endif + } + } + } + size += 4; + } + + // Check for integer load + if (dst_first_rc == rc_int && src_first_rc == rc_stack) { + int offset = ra_->reg2offset(src_first); + if (cbuf && !is_memoryI(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + if (src_second_rc != rc_bad && is_iRegLd_memhd(dst_first, dst_second, offset)) { + assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous"); + if (cbuf) { + __ ldr_64(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(LDR_64 " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first), offset); +#endif + } + return size + 4; + } else { + if (cbuf) { + __ ldr_32(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(LDR_32 " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first), offset); +#endif + } + } + } + size += 4; + } + + // Check for float reg-reg copy + if (src_first_rc == rc_float && dst_first_rc == rc_float) { + if (src_second_rc != rc_bad) { + assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous"); + if (cbuf) { + __ mov_double(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else if (!do_size) { + st->print(MOV_DOUBLE " R_%s, R_%s\t# spill", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + return 4; + } + if (cbuf) { + __ mov_float(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else if (!do_size) { + st->print(MOV_FLOAT " R_%s, R_%s\t# spill", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + size = 4; + } + + // Check for float store + if (src_first_rc == rc_float && dst_first_rc == rc_stack) { + int offset = ra_->reg2offset(dst_first); + if (cbuf && !is_memoryfp(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + // Further check for aligned-adjacent pair, so we can use a double store + if (src_second_rc != rc_bad) { + assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous"); + if (cbuf) { + __ str_double(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(STR_DOUBLE " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset); +#endif + } + return size + 4; + } else { + if (cbuf) { + __ str_float(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(STR_FLOAT " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset); +#endif + } + } + } + size += 4; + } + + // Check for float load + if (dst_first_rc == rc_float && src_first_rc == rc_stack) { + int offset = ra_->reg2offset(src_first); + if (cbuf && !is_memoryfp(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + // Further check for aligned-adjacent pair, so we can use a double store + if (src_second_rc != rc_bad) { + assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous"); + if (cbuf) { + __ ldr_double(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(LDR_DOUBLE " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first),offset); +#endif + } + return size + 4; + } else { + if (cbuf) { + __ ldr_float(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(LDR_FLOAT " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first),offset); +#endif + } + } + } + size += 4; + } + + // check for int reg -> float reg move + if (src_first_rc == rc_int && dst_first_rc == rc_float) { + // Further check for aligned-adjacent pair, so we can use a single instruction + if (src_second_rc != rc_bad) { + assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous"); + assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous"); + assert(src_second_rc == rc_int && dst_second_rc == rc_float, "unsupported"); + if (cbuf) { +#ifdef AARCH64 + __ fmov_dx(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first])); +#else + __ fmdrr(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]), reg_to_register_object(Matcher::_regEncode[src_second])); +#endif +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); +#ifdef AARCH64 + st->print("FMOV_DX R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first)); +#else + st->print("FMDRR R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first), OptoReg::regname(src_second)); +#endif +#endif + } + return size + 4; + } else { + if (cbuf) { + __ fmsr(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(FMSR " R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first)); +#endif + } + size += 4; + } + } + + // check for float reg -> int reg move + if (src_first_rc == rc_float && dst_first_rc == rc_int) { + // Further check for aligned-adjacent pair, so we can use a single instruction + if (src_second_rc != rc_bad) { + assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous"); + assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous"); + assert(src_second_rc == rc_float && dst_second_rc == rc_int, "unsupported"); + if (cbuf) { +#ifdef AARCH64 + __ fmov_xd(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first])); +#else + __ fmrrd(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first])); +#endif +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); +#ifdef AARCH64 + st->print("FMOV_XD R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first)); +#else + st->print("FMRRD R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(dst_second), OptoReg::regname(src_first)); +#endif +#endif + } + return size + 4; + } else { + if (cbuf) { + __ fmrs(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(FMRS " R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first)); +#endif + } + size += 4; + } + } + + // -------------------------------------------------------------------- + // Check for hi bits still needing moving. Only happens for misaligned + // arguments to native calls. + if (src_second == dst_second) + return size; // Self copy; no move + assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); + +#ifndef AARCH64 + // Check for integer reg-reg copy. Hi bits are stuck up in the top + // 32-bits of a 64-bit register, but are needed in low bits of another + // register (else it's a hi-bits-to-hi-bits copy which should have + // happened already as part of a 64-bit move) + if (src_second_rc == rc_int && dst_second_rc == rc_int) { + if (cbuf) { + __ mov(reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_register_object(Matcher::_regEncode[src_second])); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("MOV R_%s, R_%s\t# spill high", + Matcher::regName[dst_second], + Matcher::regName[src_second]); +#endif + } + return size+4; + } + + // Check for high word integer store + if (src_second_rc == rc_int && dst_second_rc == rc_stack) { + int offset = ra_->reg2offset(dst_second); + + if (cbuf && !is_memoryP(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + if (cbuf) { + __ str(reg_to_register_object(Matcher::_regEncode[src_second]), Address(SP, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("STR R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_second), offset); +#endif + } + } + return size + 4; + } + + // Check for high word integer load + if (dst_second_rc == rc_int && src_second_rc == rc_stack) { + int offset = ra_->reg2offset(src_second); + if (cbuf && !is_memoryP(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + if (cbuf) { + __ ldr(reg_to_register_object(Matcher::_regEncode[dst_second]), Address(SP, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("LDR R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_second), offset); +#endif + } + } + return size + 4; + } +#endif + + Unimplemented(); + return 0; // Mute compiler +} + +#ifndef PRODUCT +void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + implementation( NULL, ra_, false, st ); +} +#endif + +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + implementation( &cbuf, ra_, false, NULL ); +} + +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return implementation( NULL, ra_, true, NULL ); +} + +//============================================================================= +#ifndef PRODUCT +void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const { + st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); +} +#endif + +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { + MacroAssembler _masm(&cbuf); + for(int i = 0; i < _count; i += 1) { + __ nop(); + } +} + +uint MachNopNode::size(PhaseRegAlloc *ra_) const { + return 4 * _count; +} + + +//============================================================================= +#ifndef PRODUCT +void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_reg_first(this); + st->print("ADD %s,R_SP+#%d",Matcher::regName[reg], offset); +} +#endif + +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_encode(this); + Register dst = reg_to_register_object(reg); + + if (is_aimm(offset)) { + __ add(dst, SP, offset); + } else { + __ mov_slow(dst, offset); +#ifdef AARCH64 + __ add(dst, SP, dst, ex_lsl); +#else + __ add(dst, SP, dst); +#endif + } +} + +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { + // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_) + assert(ra_ == ra_->C->regalloc(), "sanity"); + return ra_->C->scratch_emit_size(this); +} + +//============================================================================= +#ifndef PRODUCT +#ifdef AARCH64 +#define R_RTEMP "R_R16" +#else +#define R_RTEMP "R_R12" +#endif +void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + st->print_cr("\nUEP:"); + if (UseCompressedClassPointers) { + st->print_cr("\tLDR_w " R_RTEMP ",[R_R0 + oopDesc::klass_offset_in_bytes]\t! Inline cache check"); + st->print_cr("\tdecode_klass " R_RTEMP); + } else { + st->print_cr("\tLDR " R_RTEMP ",[R_R0 + oopDesc::klass_offset_in_bytes]\t! Inline cache check"); + } + st->print_cr("\tCMP " R_RTEMP ",R_R8" ); + st->print ("\tB.NE SharedRuntime::handle_ic_miss_stub"); +} +#endif + +void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + Register iCache = reg_to_register_object(Matcher::inline_cache_reg_encode()); + assert(iCache == Ricklass, "should be"); + Register receiver = R0; + + __ load_klass(Rtemp, receiver); + __ cmp(Rtemp, iCache); +#ifdef AARCH64 + Label match; + __ b(match, eq); + __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp); + __ bind(match); +#else + __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne); +#endif +} + +uint MachUEPNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + + +//============================================================================= + +// Emit exception handler code. +int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + // OK to trash LR, because exception blob will kill it + __ jump(OptoRuntime::exception_blob()->entry_point(), relocInfo::runtime_call_type, LR_tmp); + + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); + + __ end_a_stub(); + + return offset; +} + +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { + // Can't use any of the current frame's registers as we may have deopted + // at a poll and everything can be live. + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + address deopt_pc = __ pc(); + +#ifdef AARCH64 + // See LR saved by caller in sharedRuntime_arm.cpp + // see also hse1 ws + // see also LIR_Assembler::emit_deopt_handler + + __ raw_push(LR, LR); // preserve LR in both slots + __ mov_relative_address(LR, deopt_pc); + __ str(LR, Address(SP, 1 * wordSize)); // save deopt PC + // OK to kill LR, because deopt blob will restore it from SP[0] + __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, LR_tmp); +#else + __ sub(SP, SP, wordSize); // make room for saved PC + __ push(LR); // save LR that may be live when we get here + __ mov_relative_address(LR, deopt_pc); + __ str(LR, Address(SP, wordSize)); // save deopt PC + __ pop(LR); // restore LR + __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, noreg); +#endif + + assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); + + __ end_a_stub(); + return offset; +} + +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + switch (opcode) { + case Op_PopCountI: + case Op_PopCountL: + if (!UsePopCountInstruction) + return false; + break; + case Op_LShiftCntV: + case Op_RShiftCntV: + case Op_AddVB: + case Op_AddVS: + case Op_AddVI: + case Op_AddVL: + case Op_SubVB: + case Op_SubVS: + case Op_SubVI: + case Op_SubVL: + case Op_MulVS: + case Op_MulVI: + case Op_LShiftVB: + case Op_LShiftVS: + case Op_LShiftVI: + case Op_LShiftVL: + case Op_RShiftVB: + case Op_RShiftVS: + case Op_RShiftVI: + case Op_RShiftVL: + case Op_URShiftVB: + case Op_URShiftVS: + case Op_URShiftVI: + case Op_URShiftVL: + case Op_AndV: + case Op_OrV: + case Op_XorV: + return VM_Version::has_simd(); + case Op_LoadVector: + case Op_StoreVector: + case Op_AddVF: + case Op_SubVF: + case Op_MulVF: +#ifdef AARCH64 + return VM_Version::has_simd(); +#else + return VM_Version::has_vfp() || VM_Version::has_simd(); +#endif + case Op_AddVD: + case Op_SubVD: + case Op_MulVD: + case Op_DivVF: + case Op_DivVD: +#ifdef AARCH64 + return VM_Version::has_simd(); +#else + return VM_Version::has_vfp(); +#endif + } + + return true; // Per default match rules are supported. +} + +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + // Add rules here. + + return ret_value; // Per default match rules are supported. +} + +const bool Matcher::has_predicated_vectors(void) { + return false; +} + +const int Matcher::float_pressure(int default_pressure_threshold) { + return default_pressure_threshold; +} + +int Matcher::regnum_to_fpu_offset(int regnum) { + return regnum - 32; // The FP registers are in the second chunk +} + +// Vector width in bytes +const int Matcher::vector_width_in_bytes(BasicType bt) { + return MaxVectorSize; +} + +// Vector ideal reg corresponding to specified size in bytes +const int Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize >= size, ""); + switch(size) { + case 8: return Op_VecD; + case 16: return Op_VecX; + } + ShouldNotReachHere(); + return 0; +} + +const int Matcher::vector_shift_count_ideal_reg(int size) { + return vector_ideal_reg(size); +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} + +const int Matcher::min_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return 8/type2aelembytes(bt); +} + +// ARM doesn't support misaligned vectors store/load. +const bool Matcher::misaligned_vectors_ok() { + return false; +} + +// ARM doesn't support AES intrinsics +const bool Matcher::pass_original_key_for_aes() { + return false; +} + +const bool Matcher::convL2FSupported(void) { +#ifdef AARCH64 + return true; +#else + return false; +#endif +} + +// Is this branch offset short enough that a short branch can be used? +// +// NOTE: If the platform does not provide any short branch variants, then +// this method should return false for offset 0. +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + // The passed offset is relative to address of the branch. + // On ARM a branch displacement is calculated relative to address + // of the branch + 8. + // + // offset -= 8; + // return (Assembler::is_simm24(offset)); + return false; +} + +const bool Matcher::isSimpleConstant64(jlong value) { + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. +#ifdef AARCH64 + return (value == 0); +#else + return false; +#endif +} + +// No scaling for the parameter the ClearArray node. +const bool Matcher::init_array_count_is_in_bytes = true; + +#ifdef AARCH64 +const int Matcher::long_cmove_cost() { return 1; } +#else +// Needs 2 CMOV's for longs. +const int Matcher::long_cmove_cost() { return 2; } +#endif + +#ifdef AARCH64 +const int Matcher::float_cmove_cost() { return 1; } +#else +// CMOVF/CMOVD are expensive on ARM. +const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } +#endif + +// Does the CPU require late expand (see block.cpp for description of late expand)? +const bool Matcher::require_postalloc_expand = false; + +// Do we need to mask the count passed to shift instructions or does +// the cpu only look at the lower 5/6 bits anyway? +// FIXME: does this handle vector shifts as well? +#ifdef AARCH64 +const bool Matcher::need_masked_shift_count = false; +#else +const bool Matcher::need_masked_shift_count = true; +#endif + +const bool Matcher::convi2l_type_required = true; + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); +} + +void Compile::reshape_address(AddPNode* addp) { +} + +bool Matcher::narrow_oop_use_complex_address() { + NOT_LP64(ShouldNotCallThis()); + assert(UseCompressedOops, "only for compressed oops code"); + return false; +} + +bool Matcher::narrow_klass_use_complex_address() { + NOT_LP64(ShouldNotCallThis()); + assert(UseCompressedClassPointers, "only for compressed klass code"); + return false; +} + +bool Matcher::const_oop_prefer_decode() { + NOT_LP64(ShouldNotCallThis()); + return true; +} + +bool Matcher::const_klass_prefer_decode() { + NOT_LP64(ShouldNotCallThis()); + return true; +} + +// Is it better to copy float constants, or load them directly from memory? +// Intel can load a float constant from a direct address, requiring no +// extra registers. Most RISCs will have to materialize an address into a +// register first, so they would do better to copy the constant from stack. +const bool Matcher::rematerialize_float_constants = false; + +// If CPU can load and store mis-aligned doubles directly then no fixup is +// needed. Else we split the double into 2 integer pieces and move it +// piece-by-piece. Only happens when passing doubles into C code as the +// Java calling convention forces doubles to be aligned. +#ifdef AARCH64 +// On stack replacement support: +// We don't need Load[DL]_unaligned support, because interpreter stack +// has correct alignment +const bool Matcher::misaligned_doubles_ok = true; +#else +const bool Matcher::misaligned_doubles_ok = false; +#endif + +// No-op on ARM. +void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { +} + +// Advertise here if the CPU requires explicit rounding operations +// to implement the UseStrictFP mode. +const bool Matcher::strict_fp_requires_explicit_rounding = false; + +// Are floats converted to double when stored to stack during deoptimization? +// ARM does not handle callee-save floats. +bool Matcher::float_in_double() { + return false; +} + +// Do ints take an entire long register or just half? +// Note that we if-def off of _LP64. +// The relevant question is how the int is callee-saved. In _LP64 +// the whole long is written but de-opt'ing will have to extract +// the relevant 32 bits, in not-_LP64 only the low 32 bits is written. +#ifdef _LP64 +const bool Matcher::int_in_long = true; +#else +const bool Matcher::int_in_long = false; +#endif + +// Return whether or not this register is ever used as an argument. This +// function is used on startup to build the trampoline stubs in generateOptoStub. +// Registers not mentioned will be killed by the VM call in the trampoline, and +// arguments in those registers not be available to the callee. +bool Matcher::can_be_java_arg( int reg ) { +#ifdef AARCH64 + if (reg >= R_R0_num && reg < R_R8_num) return true; + if (reg >= R_V0_num && reg <= R_V7b_num && ((reg & 3) < 2)) return true; +#else + if (reg == R_R0_num || + reg == R_R1_num || + reg == R_R2_num || + reg == R_R3_num) return true; + + if (reg >= R_S0_num && + reg <= R_S13_num) return true; +#endif + return false; +} + +bool Matcher::is_spillable_arg( int reg ) { + return can_be_java_arg(reg); +} + +bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { + return false; +} + +// Register for DIVI projection of divmodI +RegMask Matcher::divI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODI projection of divmodI +RegMask Matcher::modI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for DIVL projection of divmodL +RegMask Matcher::divL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODL projection of divmodL +RegMask Matcher::modL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +const RegMask Matcher::method_handle_invoke_SP_save_mask() { + return FP_REGP_mask(); +} + +bool maybe_far_call(const CallNode *n) { + return !MacroAssembler::_reachable_from_cache(n->as_Call()->entry_point()); +} + +bool maybe_far_call(const MachCallNode *n) { + return !MacroAssembler::_reachable_from_cache(n->as_MachCall()->entry_point()); +} + +%} + +//----------ENCODING BLOCK----------------------------------------------------- +// This block specifies the encoding classes used by the compiler to output +// byte streams. Encoding classes are parameterized macros used by +// Machine Instruction Nodes in order to generate the bit encoding of the +// instruction. Operands specify their base encoding interface with the +// interface keyword. There are currently supported four interfaces, +// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an +// operand to generate a function which returns its register number when +// queried. CONST_INTER causes an operand to generate a function which +// returns the value of the constant when queried. MEMORY_INTER causes an +// operand to generate four functions which return the Base Register, the +// Index Register, the Scale Value, and the Offset Value of the operand when +// queried. COND_INTER causes an operand to generate six functions which +// return the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional instruction. +// +// Instructions specify two basic values for encoding. Again, a function +// is available to check if the constant displacement is an oop. They use the +// ins_encode keyword to specify their encoding classes (which must be +// a sequence of enc_class names, and their parameters, specified in +// the encoding block), and they use the +// opcode keyword to specify, in order, their primary, secondary, and +// tertiary opcode. Only the opcode sections which a particular instruction +// needs for encoding need to be specified. +encode %{ + enc_class call_epilog %{ + // nothing + %} + + enc_class Java_To_Runtime (method meth) %{ + // CALL directly to the runtime + emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec()); + %} + + enc_class Java_Static_Call (method meth) %{ + // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine + // who we intended to call. + + if ( !_method) { + emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec()); + } else { + int method_index = resolved_method_index(cbuf); + RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) + : static_call_Relocation::spec(method_index); + emit_call_reloc(cbuf, as_MachCall(), $meth, rspec); + + // Emit stubs for static call. + address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } + %} + + enc_class save_last_PC %{ + // preserve mark + address mark = cbuf.insts()->mark(); + debug_only(int off0 = cbuf.insts_size()); + MacroAssembler _masm(&cbuf); + int ret_addr_offset = as_MachCall()->ret_addr_offset(); + __ adr(LR, mark + ret_addr_offset); + __ str(LR, Address(Rthread, JavaThread::last_Java_pc_offset())); + debug_only(int off1 = cbuf.insts_size()); + assert(off1 - off0 == 2 * Assembler::InstructionSize, "correct size prediction"); + // restore mark + cbuf.insts()->set_mark(mark); + %} + + enc_class preserve_SP %{ + // preserve mark + address mark = cbuf.insts()->mark(); + debug_only(int off0 = cbuf.insts_size()); + MacroAssembler _masm(&cbuf); + // FP is preserved across all calls, even compiled calls. + // Use it to preserve SP in places where the callee might change the SP. + __ mov(Rmh_SP_save, SP); + debug_only(int off1 = cbuf.insts_size()); + assert(off1 - off0 == 4, "correct size prediction"); + // restore mark + cbuf.insts()->set_mark(mark); + %} + + enc_class restore_SP %{ + MacroAssembler _masm(&cbuf); + __ mov(SP, Rmh_SP_save); + %} + + enc_class Java_Dynamic_Call (method meth) %{ + MacroAssembler _masm(&cbuf); + Register R8_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode()); + assert(R8_ic_reg == Ricklass, "should be"); + __ set_inst_mark(); +#ifdef AARCH64 +// TODO: see C1 LIR_Assembler::ic_call() + InlinedAddress oop_literal((address)Universe::non_oop_word()); + int offset = __ offset(); + int fixed_size = mov_oop_size * 4; + if (VM_Version::prefer_moves_over_load_literal()) { + uintptr_t val = (uintptr_t)Universe::non_oop_word(); + __ movz(R8_ic_reg, (val >> 0) & 0xffff, 0); + __ movk(R8_ic_reg, (val >> 16) & 0xffff, 16); + __ movk(R8_ic_reg, (val >> 32) & 0xffff, 32); + __ movk(R8_ic_reg, (val >> 48) & 0xffff, 48); + } else { + __ ldr_literal(R8_ic_reg, oop_literal); + } + assert(__ offset() - offset == fixed_size, "bad mov_oop size"); +#else + __ movw(R8_ic_reg, ((unsigned int)Universe::non_oop_word()) & 0xffff); + __ movt(R8_ic_reg, ((unsigned int)Universe::non_oop_word()) >> 16); +#endif + address virtual_call_oop_addr = __ inst_mark(); + // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine + // who we intended to call. + int method_index = resolved_method_index(cbuf); + __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index)); + emit_call_reloc(cbuf, as_MachCall(), $meth, RelocationHolder::none); +#ifdef AARCH64 + if (!VM_Version::prefer_moves_over_load_literal()) { + Label skip_literal; + __ b(skip_literal); + int off2 = __ offset(); + __ bind_literal(oop_literal); + if (__ offset() - off2 == wordSize) { + // no padding, so insert nop for worst-case sizing + __ nop(); + } + __ bind(skip_literal); + } +#endif + %} + + enc_class LdReplImmI(immI src, regD dst, iRegI tmp, int cnt, int wth) %{ + // FIXME: load from constant table? + // Load a constant replicated "count" times with width "width" + int count = $cnt$$constant; + int width = $wth$$constant; + assert(count*width == 4, "sanity"); + int val = $src$$constant; + if (width < 4) { + int bit_width = width * 8; + val &= (((int)1) << bit_width) - 1; // mask off sign bits + for (int i = 0; i < count - 1; i++) { + val |= (val << bit_width); + } + } + MacroAssembler _masm(&cbuf); + + if (val == -1) { + __ mvn($tmp$$Register, 0); + } else if (val == 0) { + __ mov($tmp$$Register, 0); + } else { + __ movw($tmp$$Register, val & 0xffff); + __ movt($tmp$$Register, (unsigned int)val >> 16); + } + __ fmdrr($dst$$FloatRegister, $tmp$$Register, $tmp$$Register); + %} + + enc_class LdReplImmF(immF src, regD dst, iRegI tmp) %{ + // Replicate float con 2 times and pack into vector (8 bytes) in regD. + float fval = $src$$constant; + int val = *((int*)&fval); + MacroAssembler _masm(&cbuf); + + if (val == -1) { + __ mvn($tmp$$Register, 0); + } else if (val == 0) { + __ mov($tmp$$Register, 0); + } else { + __ movw($tmp$$Register, val & 0xffff); + __ movt($tmp$$Register, (unsigned int)val >> 16); + } + __ fmdrr($dst$$FloatRegister, $tmp$$Register, $tmp$$Register); + %} + + enc_class enc_String_Compare(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, iRegI tmp1, iRegI tmp2) %{ + Label Ldone, Lloop; + MacroAssembler _masm(&cbuf); + + Register str1_reg = $str1$$Register; + Register str2_reg = $str2$$Register; + Register cnt1_reg = $cnt1$$Register; // int + Register cnt2_reg = $cnt2$$Register; // int + Register tmp1_reg = $tmp1$$Register; + Register tmp2_reg = $tmp2$$Register; + Register result_reg = $result$$Register; + + assert_different_registers(str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp1_reg, tmp2_reg); + + // Compute the minimum of the string lengths(str1_reg) and the + // difference of the string lengths (stack) + + // See if the lengths are different, and calculate min in str1_reg. + // Stash diff in tmp2 in case we need it for a tie-breaker. + __ subs_32(tmp2_reg, cnt1_reg, cnt2_reg); +#ifdef AARCH64 + Label Lskip; + __ _lsl_w(cnt1_reg, cnt1_reg, exact_log2(sizeof(jchar))); // scale the limit + __ b(Lskip, mi); + __ _lsl_w(cnt1_reg, cnt2_reg, exact_log2(sizeof(jchar))); // scale the limit + __ bind(Lskip); +#else + __ mov(cnt1_reg, AsmOperand(cnt1_reg, lsl, exact_log2(sizeof(jchar)))); // scale the limit + __ mov(cnt1_reg, AsmOperand(cnt2_reg, lsl, exact_log2(sizeof(jchar))), pl); // scale the limit +#endif + + // reallocate cnt1_reg, cnt2_reg, result_reg + // Note: limit_reg holds the string length pre-scaled by 2 + Register limit_reg = cnt1_reg; + Register chr2_reg = cnt2_reg; + Register chr1_reg = tmp1_reg; + // str{12} are the base pointers + + // Is the minimum length zero? + __ cmp_32(limit_reg, 0); + if (result_reg != tmp2_reg) { + __ mov(result_reg, tmp2_reg, eq); + } + __ b(Ldone, eq); + + // Load first characters + __ ldrh(chr1_reg, Address(str1_reg, 0)); + __ ldrh(chr2_reg, Address(str2_reg, 0)); + + // Compare first characters + __ subs(chr1_reg, chr1_reg, chr2_reg); + if (result_reg != chr1_reg) { + __ mov(result_reg, chr1_reg, ne); + } + __ b(Ldone, ne); + + { + // Check after comparing first character to see if strings are equivalent + // Check if the strings start at same location + __ cmp(str1_reg, str2_reg); + // Check if the length difference is zero + __ cond_cmp(tmp2_reg, 0, eq); + __ mov(result_reg, 0, eq); // result is zero + __ b(Ldone, eq); + // Strings might not be equal + } + + __ subs(chr1_reg, limit_reg, 1 * sizeof(jchar)); + if (result_reg != tmp2_reg) { + __ mov(result_reg, tmp2_reg, eq); + } + __ b(Ldone, eq); + + // Shift str1_reg and str2_reg to the end of the arrays, negate limit + __ add(str1_reg, str1_reg, limit_reg); + __ add(str2_reg, str2_reg, limit_reg); + __ neg(limit_reg, chr1_reg); // limit = -(limit-2) + + // Compare the rest of the characters + __ bind(Lloop); + __ ldrh(chr1_reg, Address(str1_reg, limit_reg)); + __ ldrh(chr2_reg, Address(str2_reg, limit_reg)); + __ subs(chr1_reg, chr1_reg, chr2_reg); + if (result_reg != chr1_reg) { + __ mov(result_reg, chr1_reg, ne); + } + __ b(Ldone, ne); + + __ adds(limit_reg, limit_reg, sizeof(jchar)); + __ b(Lloop, ne); + + // If strings are equal up to min length, return the length difference. + if (result_reg != tmp2_reg) { + __ mov(result_reg, tmp2_reg); + } + + // Otherwise, return the difference between the first mismatched chars. + __ bind(Ldone); + %} + + enc_class enc_String_Equals(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, iRegI tmp2) %{ + Label Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone, Lequal; + MacroAssembler _masm(&cbuf); + + Register str1_reg = $str1$$Register; + Register str2_reg = $str2$$Register; + Register cnt_reg = $cnt$$Register; // int + Register tmp1_reg = $tmp1$$Register; + Register tmp2_reg = $tmp2$$Register; + Register result_reg = $result$$Register; + + assert_different_registers(str1_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, result_reg); + + __ cmp(str1_reg, str2_reg); //same char[] ? + __ b(Lequal, eq); + + __ cbz_32(cnt_reg, Lequal); // count == 0 + + //rename registers + Register limit_reg = cnt_reg; + Register chr1_reg = tmp1_reg; + Register chr2_reg = tmp2_reg; + + __ logical_shift_left(limit_reg, limit_reg, exact_log2(sizeof(jchar))); + + //check for alignment and position the pointers to the ends + __ orr(chr1_reg, str1_reg, str2_reg); + __ tst(chr1_reg, 0x3); + + // notZero means at least one not 4-byte aligned. + // We could optimize the case when both arrays are not aligned + // but it is not frequent case and it requires additional checks. + __ b(Lchar, ne); + + // Compare char[] arrays aligned to 4 bytes. + __ char_arrays_equals(str1_reg, str2_reg, limit_reg, result_reg, + chr1_reg, chr2_reg, Ldone); + + __ b(Lequal); // equal + + // char by char compare + __ bind(Lchar); + __ mov(result_reg, 0); + __ add(str1_reg, limit_reg, str1_reg); + __ add(str2_reg, limit_reg, str2_reg); + __ neg(limit_reg, limit_reg); //negate count + + // Lchar_loop + __ bind(Lchar_loop); + __ ldrh(chr1_reg, Address(str1_reg, limit_reg)); + __ ldrh(chr2_reg, Address(str2_reg, limit_reg)); + __ cmp(chr1_reg, chr2_reg); + __ b(Ldone, ne); + __ adds(limit_reg, limit_reg, sizeof(jchar)); + __ b(Lchar_loop, ne); + + __ bind(Lequal); + __ mov(result_reg, 1); //equal + + __ bind(Ldone); + %} + + enc_class enc_Array_Equals(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI result) %{ + Label Lvector, Ldone, Lloop, Lequal; + MacroAssembler _masm(&cbuf); + + Register ary1_reg = $ary1$$Register; + Register ary2_reg = $ary2$$Register; + Register tmp1_reg = $tmp1$$Register; + Register tmp2_reg = $tmp2$$Register; + Register tmp3_reg = $tmp3$$Register; + Register result_reg = $result$$Register; + + assert_different_registers(ary1_reg, ary2_reg, tmp1_reg, tmp2_reg, tmp3_reg, result_reg); + + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + // return true if the same array +#ifdef AARCH64 + __ cmp(ary1_reg, ary2_reg); + __ b(Lequal, eq); + + __ mov(result_reg, 0); + + __ cbz(ary1_reg, Ldone); // not equal + + __ cbz(ary2_reg, Ldone); // not equal +#else + __ teq(ary1_reg, ary2_reg); + __ mov(result_reg, 1, eq); + __ b(Ldone, eq); // equal + + __ tst(ary1_reg, ary1_reg); + __ mov(result_reg, 0, eq); + __ b(Ldone, eq); // not equal + + __ tst(ary2_reg, ary2_reg); + __ mov(result_reg, 0, eq); + __ b(Ldone, eq); // not equal +#endif + + //load the lengths of arrays + __ ldr_s32(tmp1_reg, Address(ary1_reg, length_offset)); // int + __ ldr_s32(tmp2_reg, Address(ary2_reg, length_offset)); // int + + // return false if the two arrays are not equal length +#ifdef AARCH64 + __ cmp_w(tmp1_reg, tmp2_reg); + __ b(Ldone, ne); // not equal + + __ cbz_w(tmp1_reg, Lequal); // zero-length arrays are equal +#else + __ teq_32(tmp1_reg, tmp2_reg); + __ mov(result_reg, 0, ne); + __ b(Ldone, ne); // not equal + + __ tst(tmp1_reg, tmp1_reg); + __ mov(result_reg, 1, eq); + __ b(Ldone, eq); // zero-length arrays are equal +#endif + + // load array addresses + __ add(ary1_reg, ary1_reg, base_offset); + __ add(ary2_reg, ary2_reg, base_offset); + + // renaming registers + Register chr1_reg = tmp3_reg; // for characters in ary1 + Register chr2_reg = tmp2_reg; // for characters in ary2 + Register limit_reg = tmp1_reg; // length + + // set byte count + __ logical_shift_left_32(limit_reg, limit_reg, exact_log2(sizeof(jchar))); + + // Compare char[] arrays aligned to 4 bytes. + __ char_arrays_equals(ary1_reg, ary2_reg, limit_reg, result_reg, + chr1_reg, chr2_reg, Ldone); + __ bind(Lequal); + __ mov(result_reg, 1); //equal + + __ bind(Ldone); + %} +%} + +//----------FRAME-------------------------------------------------------------- +// Definition of frame structure and management information. +// +// S T A C K L A Y O U T Allocators stack-slot number +// | (to get allocators register number +// G Owned by | | v add VMRegImpl::stack0) +// r CALLER | | +// o | +--------+ pad to even-align allocators stack-slot +// w V | pad0 | numbers; owned by CALLER +// t -----------+--------+----> Matcher::_in_arg_limit, unaligned +// h ^ | in | 5 +// | | args | 4 Holes in incoming args owned by SELF +// | | | | 3 +// | | +--------+ +// V | | old out| Empty on Intel, window on Sparc +// | old |preserve| Must be even aligned. +// | SP-+--------+----> Matcher::_old_SP, 8 (or 16 in LP64)-byte aligned +// | | in | 3 area for Intel ret address +// Owned by |preserve| Empty on Sparc. +// SELF +--------+ +// | | pad2 | 2 pad to align old SP +// | +--------+ 1 +// | | locks | 0 +// | +--------+----> VMRegImpl::stack0, 8 (or 16 in LP64)-byte aligned +// | | pad1 | 11 pad to align new SP +// | +--------+ +// | | | 10 +// | | spills | 9 spills +// V | | 8 (pad0 slot for callee) +// -----------+--------+----> Matcher::_out_arg_limit, unaligned +// ^ | out | 7 +// | | args | 6 Holes in outgoing args owned by CALLEE +// Owned by +--------+ +// CALLEE | new out| 6 Empty on Intel, window on Sparc +// | new |preserve| Must be even-aligned. +// | SP-+--------+----> Matcher::_new_SP, even aligned +// | | | +// +// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is +// known from SELF's arguments and the Java calling convention. +// Region 6-7 is determined per call site. +// Note 2: If the calling convention leaves holes in the incoming argument +// area, those holes are owned by SELF. Holes in the outgoing area +// are owned by the CALLEE. Holes should not be nessecary in the +// incoming area, as the Java calling convention is completely under +// the control of the AD file. Doubles can be sorted and packed to +// avoid holes. Holes in the outgoing arguments may be nessecary for +// varargs C calling conventions. +// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is +// even aligned with pad0 as needed. +// Region 6 is even aligned. Region 6-7 is NOT even aligned; +// region 6-11 is even aligned; it may be padded out more so that +// the region from SP to FP meets the minimum stack alignment. + +frame %{ + // What direction does stack grow in (assumed to be same for native & Java) + stack_direction(TOWARDS_LOW); + + // These two registers define part of the calling convention + // between compiled code and the interpreter. + inline_cache_reg(R_Ricklass); // Inline Cache Register or Method* for I2C + interpreter_method_oop_reg(R_Rmethod); // Method Oop Register when calling interpreter + + // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] + cisc_spilling_operand_name(indOffset); + + // Number of stack slots consumed by a Monitor enter + sync_stack_slots(1 * VMRegImpl::slots_per_word); + + // Compiled code's Frame Pointer +#ifdef AARCH64 + frame_pointer(R_SP); +#else + frame_pointer(R_R13); +#endif + + // Stack alignment requirement + stack_alignment(StackAlignmentInBytes); + // LP64: Alignment size in bytes (128-bit -> 16 bytes) + // !LP64: Alignment size in bytes (64-bit -> 8 bytes) + + // Number of stack slots between incoming argument block and the start of + // a new frame. The PROLOG must add this many slots to the stack. The + // EPILOG must remove this many slots. + // FP + LR + in_preserve_stack_slots(2 * VMRegImpl::slots_per_word); + + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + // ADLC doesn't support parsing expressions, so I folded the math by hand. + varargs_C_out_slots_killed( 0); + + // The after-PROLOG location of the return address. Location of + // return address specifies a type (REG or STACK) and a number + // representing the register number (i.e. - use a register name) or + // stack slot. + // Ret Addr is on stack in slot 0 if no locks or verification or alignment. + // Otherwise, it is above the locks and verification slot and alignment word + return_addr(STACK - 1*VMRegImpl::slots_per_word + + round_to((Compile::current()->in_preserve_stack_slots() + + Compile::current()->fixed_slots()), + stack_alignment_in_slots())); + + // Body of function which returns an OptoRegs array locating + // arguments either in registers or in stack slots for calling + // java + calling_convention %{ + (void) SharedRuntime::java_calling_convention(sig_bt, regs, length, is_outgoing); + + %} + + // Body of function which returns an OptoRegs array locating + // arguments either in registers or in stack slots for callin + // C. + c_calling_convention %{ + // This is obviously always outgoing + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); + %} + + // Location of compiled Java return values. Same as C + return_value %{ + return c2::return_value(ideal_reg); + %} + +%} + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Instruction Attributes--------------------------------------------- +ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute +ins_attrib ins_size(32); // Required size attribute (in bits) +ins_attrib ins_short_branch(0); // Required flag: is this instruction a + // non-matching short branch variant of some + // long branch? + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +//----------Simple Operands---------------------------------------------------- +// Immediate Operands +// Integer Immediate: 32-bit +operand immI() %{ + match(ConI); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 8-bit unsigned - for VMOV +operand immU8() %{ + predicate(0 <= n->get_int() && (n->get_int() <= 255)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 16-bit +operand immI16() %{ + predicate((n->get_int() >> 16) == 0 && VM_Version::supports_movw()); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +#ifndef AARCH64 +// Integer Immediate: offset for half and double word loads and stores +operand immIHD() %{ + predicate(is_memoryHD(n->get_int())); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: offset for fp loads and stores +operand immIFP() %{ + predicate(is_memoryfp(n->get_int()) && ((n->get_int() & 3) == 0)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} +#endif + +// Valid scale values for addressing modes and shifts +operand immU5() %{ + predicate(0 <= n->get_int() && (n->get_int() <= 31)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 6-bit +operand immU6Big() %{ + predicate(n->get_int() >= 32 && n->get_int() <= 63); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 0-bit +operand immI0() %{ + predicate(n->get_int() == 0); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 1 +operand immI_1() %{ + predicate(n->get_int() == 1); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 2 +operand immI_2() %{ + predicate(n->get_int() == 2); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 3 +operand immI_3() %{ + predicate(n->get_int() == 3); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 4 +operand immI_4() %{ + predicate(n->get_int() == 4); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 8 +operand immI_8() %{ + predicate(n->get_int() == 8); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Int Immediate non-negative +operand immU31() +%{ + predicate(n->get_int() >= 0); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the values 32-63 +operand immI_32_63() %{ + predicate(n->get_int() >= 32 && n->get_int() <= 63); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Immediates for special shifts (sign extend) + +// Integer Immediate: the value 16 +operand immI_16() %{ + predicate(n->get_int() == 16); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 24 +operand immI_24() %{ + predicate(n->get_int() == 24); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 255 +operand immI_255() %{ + predicate( n->get_int() == 255 ); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 65535 +operand immI_65535() %{ + predicate(n->get_int() == 65535); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediates for arithmetic instructions + +operand aimmI() %{ + predicate(is_aimm(n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand aimmIneg() %{ + predicate(is_aimm(-n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand aimmU31() %{ + predicate((0 <= n->get_int()) && is_aimm(n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediates for logical instructions + +operand limmI() %{ + predicate(is_limmI(n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand limmIlow8() %{ + predicate(is_limmI_low(n->get_int(), 8)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand limmU31() %{ + predicate(0 <= n->get_int() && is_limmI(n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand limmIn() %{ + predicate(is_limmI(~n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +#ifdef AARCH64 +// Long Immediate: for logical instruction +operand limmL() %{ + predicate(is_limmL(n->get_long())); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand limmLn() %{ + predicate(is_limmL(~n->get_long())); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: for arithmetic instruction +operand aimmL() %{ + predicate(is_aimm(n->get_long())); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand aimmLneg() %{ + predicate(is_aimm(-n->get_long())); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} +#endif // AARCH64 + +// Long Immediate: the value FF +operand immL_FF() %{ + predicate( n->get_long() == 0xFFL ); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: the value FFFF +operand immL_FFFF() %{ + predicate( n->get_long() == 0xFFFFL ); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate: 32 or 64-bit +operand immP() %{ + match(ConP); + + op_cost(5); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immP0() %{ + predicate(n->get_ptr() == 0); + match(ConP); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immP_poll() %{ + predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); + match(ConP); + + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immN() +%{ + match(ConN); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +operand immNKlass() +%{ + match(ConNKlass); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immN0() +%{ + predicate(n->get_narrowcon() == 0); + match(ConN); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immL() %{ + match(ConL); + op_cost(40); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immL0() %{ + predicate(n->get_long() == 0L); + match(ConL); + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 16-bit +operand immL16() %{ + predicate(n->get_long() >= 0 && n->get_long() < (1<<16) && VM_Version::supports_movw()); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: low 32-bit mask +operand immL_32bits() %{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Double Immediate +operand immD() %{ + match(ConD); + + op_cost(40); + format %{ %} + interface(CONST_INTER); +%} + +// Double Immediate: +0.0d. +operand immD0() %{ + predicate(jlong_cast(n->getd()) == 0); + + match(ConD); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand imm8D() %{ + predicate(Assembler::double_num(n->getd()).can_be_imm8()); + match(ConD); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate +operand immF() %{ + match(ConF); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate: +0.0f +operand immF0() %{ + predicate(jint_cast(n->getf()) == 0); + match(ConF); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate: encoded as 8 bits +operand imm8F() %{ + predicate(Assembler::float_num(n->getf()).can_be_imm8()); + match(ConF); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Register Operands +// Integer Register +operand iRegI() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegI); + match(R0RegI); + match(R1RegI); + match(R2RegI); + match(R3RegI); +#ifdef AARCH64 + match(ZRRegI); +#else + match(R12RegI); +#endif + + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register +operand iRegP() %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(RegP); + match(R0RegP); + match(R1RegP); + match(R2RegP); + match(RExceptionRegP); + match(R8RegP); + match(R9RegP); + match(RthreadRegP); // FIXME: move to sp_ptr_RegP? + match(R12RegP); + match(LRRegP); + + match(sp_ptr_RegP); + match(store_ptr_RegP); + + format %{ %} + interface(REG_INTER); +%} + +// GPRs + Rthread + SP +operand sp_ptr_RegP() %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(RegP); + match(iRegP); + match(SPRegP); // FIXME: check cost + + format %{ %} + interface(REG_INTER); +%} + +#ifdef AARCH64 +// Like sp_ptr_reg, but exclude regs (Aarch64 SP) that can't be +// stored directly. Includes ZR, so can't be used as a destination. +operand store_ptr_RegP() %{ + constraint(ALLOC_IN_RC(store_ptr_reg)); + match(RegP); + match(iRegP); + match(ZRRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand store_RegI() %{ + constraint(ALLOC_IN_RC(store_reg)); + match(RegI); + match(iRegI); + match(ZRRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand store_RegL() %{ + constraint(ALLOC_IN_RC(store_ptr_reg)); + match(RegL); + match(iRegL); + match(ZRRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand store_RegN() %{ + constraint(ALLOC_IN_RC(store_reg)); + match(RegN); + match(iRegN); + match(ZRRegN); + + format %{ %} + interface(REG_INTER); +%} +#endif + +operand R0RegP() %{ + constraint(ALLOC_IN_RC(R0_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand R1RegP() %{ + constraint(ALLOC_IN_RC(R1_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand R2RegP() %{ + constraint(ALLOC_IN_RC(R2_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand RExceptionRegP() %{ + constraint(ALLOC_IN_RC(Rexception_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand RthreadRegP() %{ + constraint(ALLOC_IN_RC(Rthread_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand IPRegP() %{ + constraint(ALLOC_IN_RC(IP_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand LRRegP() %{ + constraint(ALLOC_IN_RC(LR_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand R0RegI() %{ + constraint(ALLOC_IN_RC(R0_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand R1RegI() %{ + constraint(ALLOC_IN_RC(R1_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand R2RegI() %{ + constraint(ALLOC_IN_RC(R2_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand R3RegI() %{ + constraint(ALLOC_IN_RC(R3_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +#ifndef AARCH64 +operand R12RegI() %{ + constraint(ALLOC_IN_RC(R12_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} +#endif + +// Long Register +operand iRegL() %{ + constraint(ALLOC_IN_RC(long_reg)); + match(RegL); +#ifdef AARCH64 + match(iRegLd); +#else + match(R0R1RegL); + match(R2R3RegL); +#endif +//match(iRegLex); + + format %{ %} + interface(REG_INTER); +%} + +operand iRegLd() %{ + constraint(ALLOC_IN_RC(long_reg_align)); + match(iRegL); // FIXME: allows unaligned R11/R12? + + format %{ %} + interface(REG_INTER); +%} + +#ifndef AARCH64 +// first long arg, or return value +operand R0R1RegL() %{ + constraint(ALLOC_IN_RC(R0R1_regL)); + match(iRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand R2R3RegL() %{ + constraint(ALLOC_IN_RC(R2R3_regL)); + match(iRegL); + + format %{ %} + interface(REG_INTER); +%} +#endif + +// Condition Code Flag Register +operand flagsReg() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr" %} + interface(REG_INTER); +%} + +// Result of compare to 0 (TST) +operand flagsReg_EQNELTGE() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr_EQNELTGE" %} + interface(REG_INTER); +%} + +// Condition Code Register, unsigned comparisons. +operand flagsRegU() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); +#ifdef TODO + match(RegFlagsP); +#endif + + format %{ "apsr_U" %} + interface(REG_INTER); +%} + +// Condition Code Register, pointer comparisons. +operand flagsRegP() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr_P" %} + interface(REG_INTER); +%} + +// Condition Code Register, long comparisons. +#ifndef AARCH64 +operand flagsRegL_LTGE() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr_L_LTGE" %} + interface(REG_INTER); +%} + +operand flagsRegL_EQNE() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr_L_EQNE" %} + interface(REG_INTER); +%} + +operand flagsRegL_LEGT() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr_L_LEGT" %} + interface(REG_INTER); +%} +#endif + +// Condition Code Register, floating comparisons, unordered same as "less". +operand flagsRegF() %{ + constraint(ALLOC_IN_RC(float_flags)); + match(RegFlags); + + format %{ "fpscr_F" %} + interface(REG_INTER); +%} + +// Vectors +operand vecD() %{ + constraint(ALLOC_IN_RC(actual_dflt_reg)); + match(VecD); + + format %{ %} + interface(REG_INTER); +%} + +operand vecX() %{ + constraint(ALLOC_IN_RC(vectorx_reg)); + match(VecX); + + format %{ %} + interface(REG_INTER); +%} + +operand regD() %{ + constraint(ALLOC_IN_RC(actual_dflt_reg)); + match(RegD); + match(regD_low); + + format %{ %} + interface(REG_INTER); +%} + +operand regF() %{ + constraint(ALLOC_IN_RC(sflt_reg)); + match(RegF); + + format %{ %} + interface(REG_INTER); +%} + +operand regD_low() %{ + constraint(ALLOC_IN_RC(dflt_low_reg)); + match(RegD); + + format %{ %} + interface(REG_INTER); +%} + +// Special Registers + +// Method Register +operand inline_cache_regP(iRegP reg) %{ + constraint(ALLOC_IN_RC(Ricklass_regP)); + match(reg); + format %{ %} + interface(REG_INTER); +%} + +operand interpreter_method_oop_regP(iRegP reg) %{ + constraint(ALLOC_IN_RC(Rmethod_regP)); + match(reg); + format %{ %} + interface(REG_INTER); +%} + + +//----------Complex Operands--------------------------------------------------- +// Indirect Memory Reference +operand indirect(sp_ptr_RegP reg) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(reg); + + op_cost(100); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base($reg); +#ifdef AARCH64 + index(0xff); // 0xff => no index +#else + index(0xf); // PC => no index +#endif + scale(0x0); + disp(0x0); + %} +%} + +#ifdef AARCH64 +// Indirect with scaled*1 uimm12 offset +operand indOffsetU12ScaleB(sp_ptr_RegP reg, immUL12 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); +#ifdef AARCH64 + index(0xff); // 0xff => no index +#else + index(0xf); // PC => no index +#endif + scale(0x0); + disp($offset); + %} +%} + +// Indirect with scaled*2 uimm12 offset +operand indOffsetU12ScaleS(sp_ptr_RegP reg, immUL12x2 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); +#ifdef AARCH64 + index(0xff); // 0xff => no index +#else + index(0xf); // PC => no index +#endif + scale(0x0); + disp($offset); + %} +%} + +// Indirect with scaled*4 uimm12 offset +operand indOffsetU12ScaleI(sp_ptr_RegP reg, immUL12x4 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); +#ifdef AARCH64 + index(0xff); // 0xff => no index +#else + index(0xf); // PC => no index +#endif + scale(0x0); + disp($offset); + %} +%} + +// Indirect with scaled*8 uimm12 offset +operand indOffsetU12ScaleL(sp_ptr_RegP reg, immUL12x8 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); +#ifdef AARCH64 + index(0xff); // 0xff => no index +#else + index(0xf); // PC => no index +#endif + scale(0x0); + disp($offset); + %} +%} + +// Indirect with scaled*16 uimm12 offset +operand indOffsetU12ScaleQ(sp_ptr_RegP reg, immUL12x16 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); +#ifdef AARCH64 + index(0xff); // 0xff => no index +#else + index(0xf); // PC => no index +#endif + scale(0x0); + disp($offset); + %} +%} + +#else // ! AARCH64 + +// Indirect with Offset in ]-4096, 4096[ +operand indOffset12(sp_ptr_RegP reg, immI12 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); +#ifdef AARCH64 + index(0xff); // 0xff => no index +#else + index(0xf); // PC => no index +#endif + scale(0x0); + disp($offset); + %} +%} + +// Indirect with offset for float load/store +operand indOffsetFP(sp_ptr_RegP reg, immIFP offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); +#ifdef AARCH64 + index(0xff); // 0xff => no index +#else + index(0xf); // PC => no index +#endif + scale(0x0); + disp($offset); + %} +%} + +// Indirect with Offset for half and double words +operand indOffsetHD(sp_ptr_RegP reg, immIHD offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); +#ifdef AARCH64 + index(0xff); // 0xff => no index +#else + index(0xf); // PC => no index +#endif + scale(0x0); + disp($offset); + %} +%} + +// Indirect with Offset and Offset+4 in ]-1024, 1024[ +operand indOffsetFPx2(sp_ptr_RegP reg, immX10x2 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); +#ifdef AARCH64 + index(0xff); // 0xff => no index +#else + index(0xf); // PC => no index +#endif + scale(0x0); + disp($offset); + %} +%} + +// Indirect with Offset and Offset+4 in ]-4096, 4096[ +operand indOffset12x2(sp_ptr_RegP reg, immI12x2 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); +#ifdef AARCH64 + index(0xff); // 0xff => no index +#else + index(0xf); // PC => no index +#endif + scale(0x0); + disp($offset); + %} +%} +#endif // !AARCH64 + +// Indirect with Register Index +operand indIndex(iRegP addr, iRegX index) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr index); + + op_cost(100); + format %{ "[$addr + $index]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale(0x0); + disp(0x0); + %} +%} + +#ifdef AARCH64 +// Indirect Memory Times Scale Plus Index Register +operand indIndexScaleS(iRegP addr, iRegX index, immI_1 scale) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr (LShiftX index scale)); + + op_cost(100); + format %{"[$addr + $index << $scale]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale($scale); + disp(0x0); + %} +%} + +// Indirect Memory Times Scale Plus 32-bit Index Register +operand indIndexIScaleS(iRegP addr, iRegI index, immI_1 scale) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr (LShiftX (ConvI2L index) scale)); + + op_cost(100); + format %{"[$addr + $index.w << $scale]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale($scale); + disp(0x7fffffff); // sxtw + %} +%} + +// Indirect Memory Times Scale Plus Index Register +operand indIndexScaleI(iRegP addr, iRegX index, immI_2 scale) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr (LShiftX index scale)); + + op_cost(100); + format %{"[$addr + $index << $scale]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale($scale); + disp(0x0); + %} +%} + +// Indirect Memory Times Scale Plus 32-bit Index Register +operand indIndexIScaleI(iRegP addr, iRegI index, immI_2 scale) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr (LShiftX (ConvI2L index) scale)); + + op_cost(100); + format %{"[$addr + $index.w << $scale]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale($scale); + disp(0x7fffffff); // sxtw + %} +%} + +// Indirect Memory Times Scale Plus Index Register +operand indIndexScaleL(iRegP addr, iRegX index, immI_3 scale) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr (LShiftX index scale)); + + op_cost(100); + format %{"[$addr + $index << $scale]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale($scale); + disp(0x0); + %} +%} + +// Indirect Memory Times Scale Plus 32-bit Index Register +operand indIndexIScaleL(iRegP addr, iRegI index, immI_3 scale) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr (LShiftX (ConvI2L index) scale)); + + op_cost(100); + format %{"[$addr + $index.w << $scale]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale($scale); + disp(0x7fffffff); // sxtw + %} +%} + +// Indirect Memory Times Scale Plus Index Register +operand indIndexScaleQ(iRegP addr, iRegX index, immI_4 scale) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr (LShiftX index scale)); + + op_cost(100); + format %{"[$addr + $index << $scale]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale($scale); + disp(0x0); + %} +%} + +// Indirect Memory Times Scale Plus 32-bit Index Register +operand indIndexIScaleQ(iRegP addr, iRegI index, immI_4 scale) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr (LShiftX (ConvI2L index) scale)); + + op_cost(100); + format %{"[$addr + $index.w << $scale]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale($scale); + disp(0x7fffffff); // sxtw + %} +%} +#else +// Indirect Memory Times Scale Plus Index Register +operand indIndexScale(iRegP addr, iRegX index, immU5 scale) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr (LShiftX index scale)); + + op_cost(100); + format %{"[$addr + $index << $scale]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale($scale); + disp(0x0); + %} +%} +#endif + +// Operands for expressing Control Flow +// NOTE: Label is a predefined operand which should not be redefined in +// the AD file. It is generically handled within the ADLC. + +//----------Conditional Branch Operands---------------------------------------- +// Comparison Op - This is the operation of the comparison, and is limited to +// the following set of codes: +// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) +// +// Other attributes of the comparison, such as unsignedness, are specified +// by the comparison instruction that sets a condition code flags register. +// That result is represented by a flags operand whose subtype is appropriate +// to the unsignedness (etc.) of the comparison. +// +// Later, the instruction which matches both the Comparison Op (a Bool) and +// the flags (produced by the Cmp) specifies the coding of the comparison op +// by matching a specific subtype of Bool operand below, such as cmpOpU. + +operand cmpOp() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x0); + not_equal(0x1); + less(0xb); + greater_equal(0xa); + less_equal(0xd); + greater(0xc); + overflow(0x0); // unsupported/unimplemented + no_overflow(0x0); // unsupported/unimplemented + %} +%} + +// integer comparison with 0, signed +operand cmpOp0() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x0); + not_equal(0x1); + less(0x4); + greater_equal(0x5); + less_equal(0xd); // unsupported + greater(0xc); // unsupported + overflow(0x0); // unsupported/unimplemented + no_overflow(0x0); // unsupported/unimplemented + %} +%} + +// Comparison Op, unsigned +operand cmpOpU() %{ + match(Bool); + + format %{ "u" %} + interface(COND_INTER) %{ + equal(0x0); + not_equal(0x1); + less(0x3); + greater_equal(0x2); + less_equal(0x9); + greater(0x8); + overflow(0x0); // unsupported/unimplemented + no_overflow(0x0); // unsupported/unimplemented + %} +%} + +// Comparison Op, pointer (same as unsigned) +operand cmpOpP() %{ + match(Bool); + + format %{ "p" %} + interface(COND_INTER) %{ + equal(0x0); + not_equal(0x1); + less(0x3); + greater_equal(0x2); + less_equal(0x9); + greater(0x8); + overflow(0x0); // unsupported/unimplemented + no_overflow(0x0); // unsupported/unimplemented + %} +%} + +operand cmpOpL() %{ + match(Bool); + + format %{ "L" %} + interface(COND_INTER) %{ + equal(0x0); + not_equal(0x1); + less(0xb); + greater_equal(0xa); + less_equal(0xd); + greater(0xc); + overflow(0x0); // unsupported/unimplemented + no_overflow(0x0); // unsupported/unimplemented + %} +%} + +operand cmpOpL_commute() %{ + match(Bool); + + format %{ "L" %} + interface(COND_INTER) %{ + equal(0x0); + not_equal(0x1); + less(0xc); + greater_equal(0xd); + less_equal(0xa); + greater(0xb); + overflow(0x0); // unsupported/unimplemented + no_overflow(0x0); // unsupported/unimplemented + %} +%} + +//----------OPERAND CLASSES---------------------------------------------------- +// Operand Classes are groups of operands that are used to simplify +// instruction definitions by not requiring the AD writer to specify separate +// instructions for every form of operand when the instruction accepts +// multiple operand types with the same basic encoding and format. The classic +// case of this is memory operands. +#ifdef AARCH64 +opclass memoryB(indirect, indIndex, indOffsetU12ScaleB); +opclass memoryS(indirect, indIndex, indIndexScaleS, indIndexIScaleS, indOffsetU12ScaleS); +opclass memoryI(indirect, indIndex, indIndexScaleI, indIndexIScaleI, indOffsetU12ScaleI); +opclass memoryL(indirect, indIndex, indIndexScaleL, indIndexIScaleL, indOffsetU12ScaleL); +opclass memoryP(indirect, indIndex, indIndexScaleL, indIndexIScaleL, indOffsetU12ScaleL); +opclass memoryQ(indirect, indIndex, indIndexScaleQ, indIndexIScaleQ, indOffsetU12ScaleQ); +opclass memoryF(indirect, indIndex, indIndexScaleI, indIndexIScaleI, indOffsetU12ScaleI); +opclass memoryD(indirect, indIndex, indIndexScaleL, indIndexIScaleL, indOffsetU12ScaleL); + +opclass memoryScaledS(indIndexScaleS, indIndexIScaleS); +opclass memoryScaledI(indIndexScaleI, indIndexIScaleI); +opclass memoryScaledL(indIndexScaleL, indIndexIScaleL); +opclass memoryScaledP(indIndexScaleL, indIndexIScaleL); +opclass memoryScaledQ(indIndexScaleQ, indIndexIScaleQ); +opclass memoryScaledF(indIndexScaleI, indIndexIScaleI); +opclass memoryScaledD(indIndexScaleL, indIndexIScaleL); +// when ldrex/strex is used: +opclass memoryex ( indirect ); +opclass indIndexMemory( indIndex ); +opclass memoryvld ( indirect /* , write back mode not implemented */ ); + +#else + +opclass memoryI ( indirect, indOffset12, indIndex, indIndexScale ); +opclass memoryP ( indirect, indOffset12, indIndex, indIndexScale ); +opclass memoryF ( indirect, indOffsetFP ); +opclass memoryF2 ( indirect, indOffsetFPx2 ); +opclass memoryD ( indirect, indOffsetFP ); +opclass memoryfp( indirect, indOffsetFP ); +opclass memoryB ( indirect, indIndex, indOffsetHD ); +opclass memoryS ( indirect, indIndex, indOffsetHD ); +opclass memoryL ( indirect, indIndex, indOffsetHD ); + +opclass memoryScaledI(indIndexScale); +opclass memoryScaledP(indIndexScale); + +// when ldrex/strex is used: +opclass memoryex ( indirect ); +opclass indIndexMemory( indIndex ); +opclass memorylong ( indirect, indOffset12x2 ); +opclass memoryvld ( indirect /* , write back mode not implemented */ ); +#endif + +//----------PIPELINE----------------------------------------------------------- +pipeline %{ + +//----------ATTRIBUTES--------------------------------------------------------- +attributes %{ + fixed_size_instructions; // Fixed size instructions + max_instructions_per_bundle = 4; // Up to 4 instructions per bundle + instruction_unit_size = 4; // An instruction is 4 bytes long + instruction_fetch_unit_size = 16; // The processor fetches one line + instruction_fetch_units = 1; // of 16 bytes + + // List of nop instructions + nops( Nop_A0, Nop_A1, Nop_MS, Nop_FA, Nop_BR ); +%} + +//----------RESOURCES---------------------------------------------------------- +// Resources are the functional units available to the machine +resources(A0, A1, MS, BR, FA, FM, IDIV, FDIV, IALU = A0 | A1); + +//----------PIPELINE DESCRIPTION----------------------------------------------- +// Pipeline Description specifies the stages in the machine's pipeline + +pipe_desc(A, P, F, B, I, J, S, R, E, C, M, W, X, T, D); + +//----------PIPELINE CLASSES--------------------------------------------------- +// Pipeline Classes describe the stages in which input and output are +// referenced by the hardware pipeline. + +// Integer ALU reg-reg operation +pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + single_instruction; + dst : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU reg-reg long operation +pipe_class ialu_reg_reg_2(iRegL dst, iRegL src1, iRegL src2) %{ + instruction_count(2); + dst : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; + IALU : R; +%} + +// Integer ALU reg-reg long dependent operation +pipe_class ialu_reg_reg_2_dep(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : E(write); + src1 : R(read); + src2 : R(read); + cr : E(write); + IALU : R(2); +%} + +// Integer ALU reg-imm operaion +pipe_class ialu_reg_imm(iRegI dst, iRegI src1) %{ + single_instruction; + dst : E(write); + src1 : R(read); + IALU : R; +%} + +// Integer ALU reg-reg operation with condition code +pipe_class ialu_cc_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{ + single_instruction; + dst : E(write); + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU zero-reg operation +pipe_class ialu_zero_reg(iRegI dst, immI0 zero, iRegI src2) %{ + single_instruction; + dst : E(write); + src2 : R(read); + IALU : R; +%} + +// Integer ALU zero-reg operation with condition code only +pipe_class ialu_cconly_zero_reg(flagsReg cr, iRegI src) %{ + single_instruction; + cr : E(write); + src : R(read); + IALU : R; +%} + +// Integer ALU reg-reg operation with condition code only +pipe_class ialu_cconly_reg_reg(flagsReg cr, iRegI src1, iRegI src2) %{ + single_instruction; + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU reg-imm operation with condition code only +pipe_class ialu_cconly_reg_imm(flagsReg cr, iRegI src1) %{ + single_instruction; + cr : E(write); + src1 : R(read); + IALU : R; +%} + +// Integer ALU reg-reg-zero operation with condition code only +pipe_class ialu_cconly_reg_reg_zero(flagsReg cr, iRegI src1, iRegI src2, immI0 zero) %{ + single_instruction; + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU reg-imm-zero operation with condition code only +pipe_class ialu_cconly_reg_imm_zero(flagsReg cr, iRegI src1, immI0 zero) %{ + single_instruction; + cr : E(write); + src1 : R(read); + IALU : R; +%} + +// Integer ALU reg-reg operation with condition code, src1 modified +pipe_class ialu_cc_rwreg_reg(flagsReg cr, iRegI src1, iRegI src2) %{ + single_instruction; + cr : E(write); + src1 : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +pipe_class cmpL_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg cr ) %{ + multiple_bundles; + dst : E(write)+4; + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R(3); + BR : R(2); +%} + +// Integer ALU operation +pipe_class ialu_none(iRegI dst) %{ + single_instruction; + dst : E(write); + IALU : R; +%} + +// Integer ALU reg operation +pipe_class ialu_reg(iRegI dst, iRegI src) %{ + single_instruction; may_have_no_code; + dst : E(write); + src : R(read); + IALU : R; +%} + +// Integer ALU reg conditional operation +// This instruction has a 1 cycle stall, and cannot execute +// in the same cycle as the instruction setting the condition +// code. We kludge this by pretending to read the condition code +// 1 cycle earlier, and by marking the functional units as busy +// for 2 cycles with the result available 1 cycle later than +// is really the case. +pipe_class ialu_reg_flags( iRegI op2_out, iRegI op2_in, iRegI op1, flagsReg cr ) %{ + single_instruction; + op2_out : C(write); + op1 : R(read); + cr : R(read); // This is really E, with a 1 cycle stall + BR : R(2); + MS : R(2); +%} + +// Integer ALU reg operation +pipe_class ialu_move_reg_L_to_I(iRegI dst, iRegL src) %{ + single_instruction; may_have_no_code; + dst : E(write); + src : R(read); + IALU : R; +%} +pipe_class ialu_move_reg_I_to_L(iRegL dst, iRegI src) %{ + single_instruction; may_have_no_code; + dst : E(write); + src : R(read); + IALU : R; +%} + +// Two integer ALU reg operations +pipe_class ialu_reg_2(iRegL dst, iRegL src) %{ + instruction_count(2); + dst : E(write); + src : R(read); + A0 : R; + A1 : R; +%} + +// Two integer ALU reg operations +pipe_class ialu_move_reg_L_to_L(iRegL dst, iRegL src) %{ + instruction_count(2); may_have_no_code; + dst : E(write); + src : R(read); + A0 : R; + A1 : R; +%} + +// Integer ALU imm operation +pipe_class ialu_imm(iRegI dst) %{ + single_instruction; + dst : E(write); + IALU : R; +%} + +pipe_class ialu_imm_n(iRegI dst) %{ + single_instruction; + dst : E(write); + IALU : R; +%} + +// Integer ALU reg-reg with carry operation +pipe_class ialu_reg_reg_cy(iRegI dst, iRegI src1, iRegI src2, iRegI cy) %{ + single_instruction; + dst : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU cc operation +pipe_class ialu_cc(iRegI dst, flagsReg cc) %{ + single_instruction; + dst : E(write); + cc : R(read); + IALU : R; +%} + +// Integer ALU cc / second IALU operation +pipe_class ialu_reg_ialu( iRegI dst, iRegI src ) %{ + instruction_count(1); multiple_bundles; + dst : E(write)+1; + src : R(read); + IALU : R; +%} + +// Integer ALU cc / second IALU operation +pipe_class ialu_reg_reg_ialu( iRegI dst, iRegI p, iRegI q ) %{ + instruction_count(1); multiple_bundles; + dst : E(write)+1; + p : R(read); + q : R(read); + IALU : R; +%} + +// Integer ALU hi-lo-reg operation +pipe_class ialu_hi_lo_reg(iRegI dst, immI src) %{ + instruction_count(1); multiple_bundles; + dst : E(write)+1; + IALU : R(2); +%} + +// Long Constant +pipe_class loadConL( iRegL dst, immL src ) %{ + instruction_count(2); multiple_bundles; + dst : E(write)+1; + IALU : R(2); + IALU : R(2); +%} + +// Pointer Constant +pipe_class loadConP( iRegP dst, immP src ) %{ + instruction_count(0); multiple_bundles; + fixed_latency(6); +%} + +// Polling Address +pipe_class loadConP_poll( iRegP dst, immP_poll src ) %{ + dst : E(write); + IALU : R; +%} + +// Long Constant small +pipe_class loadConLlo( iRegL dst, immL src ) %{ + instruction_count(2); + dst : E(write); + IALU : R; + IALU : R; +%} + +// [PHH] This is wrong for 64-bit. See LdImmF/D. +pipe_class loadConFD(regF dst, immF src, iRegP tmp) %{ + instruction_count(1); multiple_bundles; + src : R(read); + dst : M(write)+1; + IALU : R; + MS : E; +%} + +// Integer ALU nop operation +pipe_class ialu_nop() %{ + single_instruction; + IALU : R; +%} + +// Integer ALU nop operation +pipe_class ialu_nop_A0() %{ + single_instruction; + A0 : R; +%} + +// Integer ALU nop operation +pipe_class ialu_nop_A1() %{ + single_instruction; + A1 : R; +%} + +// Integer Multiply reg-reg operation +pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + single_instruction; + dst : E(write); + src1 : R(read); + src2 : R(read); + MS : R(5); +%} + +pipe_class mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + single_instruction; + dst : E(write)+4; + src1 : R(read); + src2 : R(read); + MS : R(6); +%} + +// Integer Divide reg-reg +pipe_class sdiv_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI temp, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : E(write); + temp : E(write); + src1 : R(read); + src2 : R(read); + temp : R(read); + MS : R(38); +%} + +// Long Divide +pipe_class divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + dst : E(write)+71; + src1 : R(read); + src2 : R(read)+1; + MS : R(70); +%} + +// Floating Point Add Float +pipe_class faddF_reg_reg(regF dst, regF src1, regF src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FA : R; +%} + +// Floating Point Add Double +pipe_class faddD_reg_reg(regD dst, regD src1, regD src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FA : R; +%} + +// Floating Point Conditional Move based on integer flags +pipe_class int_conditional_float_move (cmpOp cmp, flagsReg cr, regF dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + cr : R(read); + FA : R(2); + BR : R(2); +%} + +// Floating Point Conditional Move based on integer flags +pipe_class int_conditional_double_move (cmpOp cmp, flagsReg cr, regD dst, regD src) %{ + single_instruction; + dst : X(write); + src : E(read); + cr : R(read); + FA : R(2); + BR : R(2); +%} + +// Floating Point Multiply Float +pipe_class fmulF_reg_reg(regF dst, regF src1, regF src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FM : R; +%} + +// Floating Point Multiply Double +pipe_class fmulD_reg_reg(regD dst, regD src1, regD src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FM : R; +%} + +// Floating Point Divide Float +pipe_class fdivF_reg_reg(regF dst, regF src1, regF src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FM : R; + FDIV : C(14); +%} + +// Floating Point Divide Double +pipe_class fdivD_reg_reg(regD dst, regD src1, regD src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FM : R; + FDIV : C(17); +%} + +// Floating Point Move/Negate/Abs Float +pipe_class faddF_reg(regF dst, regF src) %{ + single_instruction; + dst : W(write); + src : E(read); + FA : R(1); +%} + +// Floating Point Move/Negate/Abs Double +pipe_class faddD_reg(regD dst, regD src) %{ + single_instruction; + dst : W(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert F->D +pipe_class fcvtF2D(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert I->D +pipe_class fcvtI2D(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert LHi->D +pipe_class fcvtLHi2D(regD dst, regD src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert L->D +pipe_class fcvtL2D(regD dst, iRegL src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert L->F +pipe_class fcvtL2F(regF dst, iRegL src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert D->F +pipe_class fcvtD2F(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert I->L +pipe_class fcvtI2L(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert D->F +pipe_class fcvtD2I(iRegI dst, regD src, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : X(write)+6; + src : E(read); + FA : R; +%} + +// Floating Point Convert D->L +pipe_class fcvtD2L(regD dst, regD src, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : X(write)+6; + src : E(read); + FA : R; +%} + +// Floating Point Convert F->I +pipe_class fcvtF2I(regF dst, regF src, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : X(write)+6; + src : E(read); + FA : R; +%} + +// Floating Point Convert F->L +pipe_class fcvtF2L(regD dst, regF src, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : X(write)+6; + src : E(read); + FA : R; +%} + +// Floating Point Convert I->F +pipe_class fcvtI2F(regF dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Compare +pipe_class faddF_fcc_reg_reg_zero(flagsRegF cr, regF src1, regF src2, immI0 zero) %{ + single_instruction; + cr : X(write); + src1 : E(read); + src2 : E(read); + FA : R; +%} + +// Floating Point Compare +pipe_class faddD_fcc_reg_reg_zero(flagsRegF cr, regD src1, regD src2, immI0 zero) %{ + single_instruction; + cr : X(write); + src1 : E(read); + src2 : E(read); + FA : R; +%} + +// Floating Add Nop +pipe_class fadd_nop() %{ + single_instruction; + FA : R; +%} + +// Integer Store to Memory +pipe_class istore_mem_reg(memoryI mem, iRegI src) %{ + single_instruction; + mem : R(read); + src : C(read); + MS : R; +%} + +// Integer Store to Memory +pipe_class istore_mem_spORreg(memoryI mem, sp_ptr_RegP src) %{ + single_instruction; + mem : R(read); + src : C(read); + MS : R; +%} + +// Float Store +pipe_class fstoreF_mem_reg(memoryF mem, RegF src) %{ + single_instruction; + mem : R(read); + src : C(read); + MS : R; +%} + +// Float Store +pipe_class fstoreF_mem_zero(memoryF mem, immF0 src) %{ + single_instruction; + mem : R(read); + MS : R; +%} + +// Double Store +pipe_class fstoreD_mem_reg(memoryD mem, RegD src) %{ + instruction_count(1); + mem : R(read); + src : C(read); + MS : R; +%} + +// Double Store +pipe_class fstoreD_mem_zero(memoryD mem, immD0 src) %{ + single_instruction; + mem : R(read); + MS : R; +%} + +// Integer Load (when sign bit propagation not needed) +pipe_class iload_mem(iRegI dst, memoryI mem) %{ + single_instruction; + mem : R(read); + dst : C(write); + MS : R; +%} + +// Integer Load (when sign bit propagation or masking is needed) +pipe_class iload_mask_mem(iRegI dst, memoryI mem) %{ + single_instruction; + mem : R(read); + dst : M(write); + MS : R; +%} + +// Float Load +pipe_class floadF_mem(regF dst, memoryF mem) %{ + single_instruction; + mem : R(read); + dst : M(write); + MS : R; +%} + +// Float Load +pipe_class floadD_mem(regD dst, memoryD mem) %{ + instruction_count(1); multiple_bundles; // Again, unaligned argument is only multiple case + mem : R(read); + dst : M(write); + MS : R; +%} + +// Memory Nop +pipe_class mem_nop() %{ + single_instruction; + MS : R; +%} + +pipe_class sethi(iRegP dst, immI src) %{ + single_instruction; + dst : E(write); + IALU : R; +%} + +pipe_class loadPollP(iRegP poll) %{ + single_instruction; + poll : R(read); + MS : R; +%} + +pipe_class br(Universe br, label labl) %{ + single_instruction_with_delay_slot; + BR : R; +%} + +pipe_class br_cc(Universe br, cmpOp cmp, flagsReg cr, label labl) %{ + single_instruction_with_delay_slot; + cr : E(read); + BR : R; +%} + +pipe_class br_reg(Universe br, cmpOp cmp, iRegI op1, label labl) %{ + single_instruction_with_delay_slot; + op1 : E(read); + BR : R; + MS : R; +%} + +pipe_class br_nop() %{ + single_instruction; + BR : R; +%} + +pipe_class simple_call(method meth) %{ + instruction_count(2); multiple_bundles; force_serialization; + fixed_latency(100); + BR : R(1); + MS : R(1); + A0 : R(1); +%} + +pipe_class compiled_call(method meth) %{ + instruction_count(1); multiple_bundles; force_serialization; + fixed_latency(100); + MS : R(1); +%} + +pipe_class call(method meth) %{ + instruction_count(0); multiple_bundles; force_serialization; + fixed_latency(100); +%} + +pipe_class tail_call(Universe ignore, label labl) %{ + single_instruction; has_delay_slot; + fixed_latency(100); + BR : R(1); + MS : R(1); +%} + +pipe_class ret(Universe ignore) %{ + single_instruction; has_delay_slot; + BR : R(1); + MS : R(1); +%} + +// The real do-nothing guy +pipe_class empty( ) %{ + instruction_count(0); +%} + +pipe_class long_memory_op() %{ + instruction_count(0); multiple_bundles; force_serialization; + fixed_latency(25); + MS : R(1); +%} + +// Check-cast +pipe_class partial_subtype_check_pipe(Universe ignore, iRegP array, iRegP match ) %{ + array : R(read); + match : R(read); + IALU : R(2); + BR : R(2); + MS : R; +%} + +// Convert FPU flags into +1,0,-1 +pipe_class floating_cmp( iRegI dst, regF src1, regF src2 ) %{ + src1 : E(read); + src2 : E(read); + dst : E(write); + FA : R; + MS : R(2); + BR : R(2); +%} + +// Compare for p < q, and conditionally add y +pipe_class cadd_cmpltmask( iRegI p, iRegI q, iRegI y ) %{ + p : E(read); + q : E(read); + y : E(read); + IALU : R(3) +%} + +// Perform a compare, then move conditionally in a branch delay slot. +pipe_class min_max( iRegI src2, iRegI srcdst ) %{ + src2 : E(read); + srcdst : E(read); + IALU : R; + BR : R; +%} + +// Define the class for the Nop node +define %{ + MachNop = ialu_nop; +%} + +%} + +//----------INSTRUCTIONS------------------------------------------------------- + +//------------Special Nop instructions for bundling - no match rules----------- +// Nop using the A0 functional unit +instruct Nop_A0() %{ + ins_pipe(ialu_nop_A0); +%} + +// Nop using the A1 functional unit +instruct Nop_A1( ) %{ + ins_pipe(ialu_nop_A1); +%} + +// Nop using the memory functional unit +instruct Nop_MS( ) %{ + ins_pipe(mem_nop); +%} + +// Nop using the floating add functional unit +instruct Nop_FA( ) %{ + ins_pipe(fadd_nop); +%} + +// Nop using the branch functional unit +instruct Nop_BR( ) %{ + ins_pipe(br_nop); +%} + +//----------Load/Store/Move Instructions--------------------------------------- +//----------Load Instructions-------------------------------------------------- +// Load Byte (8bit signed) +instruct loadB(iRegI dst, memoryB mem) %{ + match(Set dst (LoadB mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRSB $dst,$mem\t! byte -> int" %} + ins_encode %{ + // High 32 bits are harmlessly set on Aarch64 + __ ldrsb($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Byte (8bit signed) into a Long Register +instruct loadB2L(iRegL dst, memoryB mem) %{ + match(Set dst (ConvI2L (LoadB mem))); + ins_cost(MEMORY_REF_COST); + +#ifdef AARCH64 + size(4); + format %{ "LDRSB $dst,$mem\t! byte -> long" %} + ins_encode %{ + __ ldrsb($dst$$Register, $mem$$Address); + %} +#else + size(8); + format %{ "LDRSB $dst.lo,$mem\t! byte -> long\n\t" + "ASR $dst.hi,$dst.lo,31" %} + ins_encode %{ + __ ldrsb($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), AsmOperand($dst$$Register, asr, 31)); + %} +#endif + ins_pipe(iload_mask_mem); +%} + +// Load Unsigned Byte (8bit UNsigned) into an int reg +instruct loadUB(iRegI dst, memoryB mem) %{ + match(Set dst (LoadUB mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRB $dst,$mem\t! ubyte -> int" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Byte (8bit UNsigned) into a Long Register +instruct loadUB2L(iRegL dst, memoryB mem) %{ + match(Set dst (ConvI2L (LoadUB mem))); + ins_cost(MEMORY_REF_COST); + +#ifdef AARCH64 + size(4); + format %{ "LDRB $dst,$mem\t! ubyte -> long" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + %} +#else + size(8); + format %{ "LDRB $dst.lo,$mem\t! ubyte -> long\n\t" + "MOV $dst.hi,0" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + %} +#endif + ins_pipe(iload_mem); +%} + +// Load Unsigned Byte (8 bit UNsigned) with immediate mask into Long Register +instruct loadUB2L_limmI(iRegL dst, memoryB mem, limmIlow8 mask) %{ + match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); + +#ifdef AARCH64 + ins_cost(MEMORY_REF_COST + DEFAULT_COST); + size(8); + format %{ "LDRB $dst,$mem\t! ubyte -> long\n\t" + "AND $dst,$dst,$mask" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + __ andr($dst$$Register, $dst$$Register, limmI_low($mask$$constant, 8)); + %} +#else + ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST); + size(12); + format %{ "LDRB $dst.lo,$mem\t! ubyte -> long\n\t" + "MOV $dst.hi,0\n\t" + "AND $dst.lo,$dst.lo,$mask" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + __ andr($dst$$Register, $dst$$Register, limmI_low($mask$$constant, 8)); + %} +#endif + ins_pipe(iload_mem); +%} + +// Load Short (16bit signed) +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct loadSoff(iRegI dst, memoryScaledS mem, aimmX off, iRegP tmp) %{ + match(Set dst (LoadS (AddP mem off))); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "LDRSH $dst,$mem+$off\t! short temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ ldrsh($dst$$Register, nmem); + %} + ins_pipe(iload_mask_mem); +%} +#endif + +instruct loadS(iRegI dst, memoryS mem) %{ + match(Set dst (LoadS mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRSH $dst,$mem\t! short" %} + ins_encode %{ + __ ldrsh($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Short (16 bit signed) to Byte (8 bit signed) +instruct loadS2B(iRegI dst, memoryS mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); + ins_cost(MEMORY_REF_COST); + + size(4); + + format %{ "LDRSB $dst,$mem\t! short -> byte" %} + ins_encode %{ + // High 32 bits are harmlessly set on Aarch64 + __ ldrsb($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Short (16bit signed) into a Long Register +instruct loadS2L(iRegL dst, memoryS mem) %{ + match(Set dst (ConvI2L (LoadS mem))); + ins_cost(MEMORY_REF_COST); + +#ifdef AARCH64 + size(4); + format %{ "LDRSH $dst,$mem\t! short -> long" %} + ins_encode %{ + __ ldrsh($dst$$Register, $mem$$Address); + %} +#else + size(8); + format %{ "LDRSH $dst.lo,$mem\t! short -> long\n\t" + "ASR $dst.hi,$dst.lo,31" %} + ins_encode %{ + __ ldrsh($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), AsmOperand($dst$$Register, asr, 31)); + %} +#endif + ins_pipe(iload_mask_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct loadUSoff(iRegI dst, memoryScaledS mem, aimmX off, iRegP tmp) %{ + match(Set dst (LoadUS (AddP mem off))); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "LDRH $dst,$mem+$off\t! ushort/char temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ ldrh($dst$$Register, nmem); + %} + ins_pipe(iload_mem); +%} +#endif + +instruct loadUS(iRegI dst, memoryS mem) %{ + match(Set dst (LoadUS mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRH $dst,$mem\t! ushort/char" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) +instruct loadUS2B(iRegI dst, memoryB mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRSB $dst,$mem\t! ushort -> byte" %} + ins_encode %{ + __ ldrsb($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) into a Long Register +instruct loadUS2L(iRegL dst, memoryS mem) %{ + match(Set dst (ConvI2L (LoadUS mem))); + ins_cost(MEMORY_REF_COST); + +#ifdef AARCH64 + size(4); + format %{ "LDRH $dst,$mem\t! short -> long" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + %} +#else + size(8); + format %{ "LDRH $dst.lo,$mem\t! short -> long\n\t" + "MOV $dst.hi, 0" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + %} +#endif + ins_pipe(iload_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) with mask 0xFF into a Long Register +instruct loadUS2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{ + match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); + ins_cost(MEMORY_REF_COST); + +#ifdef AARCH64 + size(4); + format %{ "LDRB $dst,$mem" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + %} +#else + size(8); + format %{ "LDRB $dst.lo,$mem\t! \n\t" + "MOV $dst.hi, 0" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + %} +#endif + ins_pipe(iload_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) with a immediate mask into a Long Register +instruct loadUS2L_limmI(iRegL dst, memoryS mem, limmI mask) %{ + match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); +#ifdef AARCH64 + ins_cost(MEMORY_REF_COST + 1*DEFAULT_COST); + + size(8); + format %{ "LDRH $dst,$mem\t! ushort/char & mask -> long\n\t" + "AND $dst,$dst,$mask" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + __ andr($dst$$Register, $dst$$Register, (uintx)$mask$$constant); + %} +#else + ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST); + + size(12); + format %{ "LDRH $dst,$mem\t! ushort/char & mask -> long\n\t" + "MOV $dst.hi, 0\n\t" + "AND $dst,$dst,$mask" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + __ andr($dst$$Register, $dst$$Register, $mask$$constant); + %} +#endif + ins_pipe(iload_mem); +%} + +// Load Integer + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct loadIoff(iRegI dst, memoryScaledI mem, aimmX off, iRegP tmp) %{ + match(Set dst (LoadI (AddP mem off))); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "ldr_s32 $dst,$mem+$off\t! int temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ ldr_s32($dst$$Register, nmem); + %} + ins_pipe(iload_mem); +%} +#endif + +instruct loadI(iRegI dst, memoryI mem) %{ + match(Set dst (LoadI mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "ldr_s32 $dst,$mem\t! int" %} + ins_encode %{ + __ ldr_s32($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +// Load Integer to Byte (8 bit signed) +instruct loadI2B(iRegI dst, memoryS mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); + ins_cost(MEMORY_REF_COST); + + size(4); + + format %{ "LDRSB $dst,$mem\t! int -> byte" %} + ins_encode %{ + __ ldrsb($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer to Unsigned Byte (8 bit UNsigned) +instruct loadI2UB(iRegI dst, memoryB mem, immI_255 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + ins_cost(MEMORY_REF_COST); + + size(4); + + format %{ "LDRB $dst,$mem\t! int -> ubyte" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer to Short (16 bit signed) +instruct loadI2S(iRegI dst, memoryS mem, immI_16 sixteen) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRSH $dst,$mem\t! int -> short" %} + ins_encode %{ + __ ldrsh($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer to Unsigned Short (16 bit UNsigned) +instruct loadI2US(iRegI dst, memoryS mem, immI_65535 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRH $dst,$mem\t! int -> ushort/char" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer into a Long Register +instruct loadI2L(iRegL dst, memoryI mem) %{ + match(Set dst (ConvI2L (LoadI mem))); +#ifdef AARCH64 + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRSW $dst.lo,$mem\t! int -> long" %} + ins_encode %{ + __ ldr_s32($dst$$Register, $mem$$Address); + %} +#else + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDR $dst.lo,$mem\t! int -> long\n\t" + "ASR $dst.hi,$dst.lo,31\t! int->long" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), AsmOperand($dst$$Register, asr, 31)); + %} +#endif + ins_pipe(iload_mask_mem); +%} + +// Load Integer with mask 0xFF into a Long Register +instruct loadI2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); +#ifdef AARCH64 + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRB $dst.lo,$mem\t! int & 0xFF -> long" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + %} +#else + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDRB $dst.lo,$mem\t! int & 0xFF -> long\n\t" + "MOV $dst.hi, 0" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + %} +#endif + ins_pipe(iload_mem); +%} + +// Load Integer with mask 0xFFFF into a Long Register +instruct loadI2L_immI_65535(iRegL dst, memoryS mem, immI_65535 mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + ins_cost(MEMORY_REF_COST); + +#ifdef AARCH64 + size(4); + format %{ "LDRH $dst,$mem\t! int & 0xFFFF -> long" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + %} +#else + size(8); + format %{ "LDRH $dst,$mem\t! int & 0xFFFF -> long\n\t" + "MOV $dst.hi, 0" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + %} +#endif + ins_pipe(iload_mask_mem); +%} + +#ifdef AARCH64 +// Load Integer with an immediate mask into a Long Register +instruct loadI2L_limmI(iRegL dst, memoryI mem, limmI mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + ins_cost(MEMORY_REF_COST + 1*DEFAULT_COST); + + size(8); + format %{ "LDRSW $dst,$mem\t! int -> long\n\t" + "AND $dst,$dst,$mask" %} + + ins_encode %{ + __ ldr_s32($dst$$Register, $mem$$Address); + __ andr($dst$$Register, $dst$$Register, (uintx)$mask$$constant); + %} + ins_pipe(iload_mem); +%} +#else +// Load Integer with a 31-bit immediate mask into a Long Register +instruct loadI2L_limmU31(iRegL dst, memoryI mem, limmU31 mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST); + + size(12); + format %{ "LDR $dst.lo,$mem\t! int -> long\n\t" + "MOV $dst.hi, 0\n\t" + "AND $dst,$dst,$mask" %} + + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + __ andr($dst$$Register, $dst$$Register, $mask$$constant); + %} + ins_pipe(iload_mem); +%} +#endif + +#ifdef AARCH64 +// Load Integer with mask into a Long Register +// FIXME: use signedRegI mask, remove tmp? +instruct loadI2L_immI(iRegL dst, memoryI mem, immI mask, iRegI tmp) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + effect(TEMP dst, TEMP tmp); + + ins_cost(MEMORY_REF_COST + 3*DEFAULT_COST); + format %{ "LDRSW $mem,$dst\t! int & 31-bit mask -> long\n\t" + "MOV_SLOW $tmp,$mask\n\t" + "AND $dst,$tmp,$dst" %} + ins_encode %{ + __ ldrsw($dst$$Register, $mem$$Address); + __ mov_slow($tmp$$Register, $mask$$constant); + __ andr($dst$$Register, $dst$$Register, $tmp$$Register); + %} + ins_pipe(iload_mem); +%} +#else +// Load Integer with a 31-bit mask into a Long Register +// FIXME: use iRegI mask, remove tmp? +instruct loadI2L_immU31(iRegL dst, memoryI mem, immU31 mask, iRegI tmp) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + effect(TEMP dst, TEMP tmp); + + ins_cost(MEMORY_REF_COST + 4*DEFAULT_COST); + size(20); + format %{ "LDR $mem,$dst\t! int & 31-bit mask -> long\n\t" + "MOV $dst.hi, 0\n\t" + "MOV_SLOW $tmp,$mask\n\t" + "AND $dst,$tmp,$dst" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + __ mov_slow($tmp$$Register, $mask$$constant); + __ andr($dst$$Register, $dst$$Register, $tmp$$Register); + %} + ins_pipe(iload_mem); +%} +#endif + +// Load Unsigned Integer into a Long Register +instruct loadUI2L(iRegL dst, memoryI mem, immL_32bits mask) %{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + ins_cost(MEMORY_REF_COST); + +#ifdef AARCH64 +//size(4); + format %{ "LDR_w $dst,$mem\t! uint -> long" %} + ins_encode %{ + __ ldr_w($dst$$Register, $mem$$Address); + %} +#else + size(8); + format %{ "LDR $dst.lo,$mem\t! uint -> long\n\t" + "MOV $dst.hi,0" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + %} +#endif + ins_pipe(iload_mem); +%} + +// Load Long + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct loadLoff(iRegLd dst, memoryScaledL mem, aimmX off, iRegP tmp) %{ + match(Set dst (LoadL (AddP mem off))); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "LDR $dst,$mem+$off\t! long temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ ldr($dst$$Register, nmem); + %} + ins_pipe(iload_mem); +%} +#endif + +instruct loadL(iRegLd dst, memoryL mem ) %{ +#ifdef AARCH64 + // already atomic for Aarch64 +#else + predicate(!((LoadLNode*)n)->require_atomic_access()); +#endif + match(Set dst (LoadL mem)); + effect(TEMP dst); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "ldr_64 $dst,$mem\t! long" %} + ins_encode %{ + __ ldr_64($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +#ifndef AARCH64 +instruct loadL_2instr(iRegL dst, memorylong mem ) %{ + predicate(!((LoadLNode*)n)->require_atomic_access()); + match(Set dst (LoadL mem)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); + + size(8); + format %{ "LDR $dst.lo,$mem \t! long order of instrs reversed if $dst.lo == base($mem)\n\t" + "LDR $dst.hi,$mem+4 or $mem" %} + ins_encode %{ + Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); + + if ($dst$$Register == reg_to_register_object($mem$$base)) { + __ ldr($dst$$Register->successor(), Amemhi); + __ ldr($dst$$Register, Amemlo); + } else { + __ ldr($dst$$Register, Amemlo); + __ ldr($dst$$Register->successor(), Amemhi); + } + %} + ins_pipe(iload_mem); +%} + +instruct loadL_volatile(iRegL dst, indirect mem ) %{ + predicate(((LoadLNode*)n)->require_atomic_access()); + match(Set dst (LoadL mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDMIA $dst,$mem\t! long" %} + ins_encode %{ + // FIXME: why is ldmia considered atomic? Should be ldrexd + RegisterSet set($dst$$Register); + set = set | reg_to_register_object($dst$$reg + 1); + __ ldmia(reg_to_register_object($mem$$base), set); + %} + ins_pipe(iload_mem); +%} + +instruct loadL_volatile_fp(iRegL dst, memoryD mem ) %{ + predicate(((LoadLNode*)n)->require_atomic_access()); + match(Set dst (LoadL mem)); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "FLDD S14, $mem" + "FMRRD $dst, S14\t! long \n't" %} + ins_encode %{ + __ fldd(S14, $mem$$Address); + __ fmrrd($dst$$Register, $dst$$Register->successor(), S14); + %} + ins_pipe(iload_mem); +%} + +instruct loadL_unaligned(iRegL dst, memorylong mem ) %{ + match(Set dst (LoadL_unaligned mem)); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDR $dst.lo,$mem\t! long order of instrs reversed if $dst.lo == base($mem)\n\t" + "LDR $dst.hi,$mem+4" %} + ins_encode %{ + Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); + + if ($dst$$Register == reg_to_register_object($mem$$base)) { + __ ldr($dst$$Register->successor(), Amemhi); + __ ldr($dst$$Register, Amemlo); + } else { + __ ldr($dst$$Register, Amemlo); + __ ldr($dst$$Register->successor(), Amemhi); + } + %} + ins_pipe(iload_mem); +%} +#endif // !AARCH64 + +// Load Range +instruct loadRange(iRegI dst, memoryI mem) %{ + match(Set dst (LoadRange mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDR_u32 $dst,$mem\t! range" %} + ins_encode %{ + __ ldr_u32($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +// Load Pointer + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct loadPoff(iRegP dst, memoryScaledP mem, aimmX off, iRegP tmp) %{ + match(Set dst (LoadP (AddP mem off))); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "LDR $dst,$mem+$off\t! ptr temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ ldr($dst$$Register, nmem); + %} + ins_pipe(iload_mem); +%} +#endif + +instruct loadP(iRegP dst, memoryP mem) %{ + match(Set dst (LoadP mem)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "LDR $dst,$mem\t! ptr" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +#ifdef XXX +// FIXME XXXX +//instruct loadSP(iRegP dst, memoryP mem) %{ +instruct loadSP(SPRegP dst, memoryP mem, iRegP tmp) %{ + match(Set dst (LoadP mem)); + effect(TEMP tmp); + ins_cost(MEMORY_REF_COST+1); + size(8); + + format %{ "LDR $tmp,$mem\t! ptr\n\t" + "MOV $dst,$tmp\t! ptr" %} + ins_encode %{ + __ ldr($tmp$$Register, $mem$$Address); + __ mov($dst$$Register, $tmp$$Register); + %} + ins_pipe(iload_mem); +%} +#endif + +#ifdef _LP64 +// Load Compressed Pointer + +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct loadNoff(iRegN dst, memoryScaledI mem, aimmX off, iRegP tmp) %{ + match(Set dst (LoadN (AddP mem off))); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "ldr_u32 $dst,$mem+$off\t! compressed ptr temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ ldr_u32($dst$$Register, nmem); + %} + ins_pipe(iload_mem); +%} + +instruct loadN(iRegN dst, memoryI mem) %{ + match(Set dst (LoadN mem)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "ldr_u32 $dst,$mem\t! compressed ptr" %} + ins_encode %{ + __ ldr_u32($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} +#endif + +// Load Klass Pointer +instruct loadKlass(iRegP dst, memoryI mem) %{ + match(Set dst (LoadKlass mem)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "LDR $dst,$mem\t! klass ptr" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +#ifdef _LP64 +// Load narrow Klass Pointer +instruct loadNKlass(iRegN dst, memoryI mem) %{ + match(Set dst (LoadNKlass mem)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "ldr_u32 $dst,$mem\t! compressed klass ptr" %} + ins_encode %{ + __ ldr_u32($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} +#endif + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct loadDoff(regD dst, memoryScaledD mem, aimmX off, iRegP tmp) %{ + match(Set dst (LoadD (AddP mem off))); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "ldr $dst,$mem+$off\t! double temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ ldr_d($dst$$FloatRegister, nmem); + %} + ins_pipe(floadD_mem); +%} +#endif + +instruct loadD(regD dst, memoryD mem) %{ + match(Set dst (LoadD mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + // FIXME: needs to be atomic, but ARMv7 A.R.M. guarantees + // only LDREXD and STREXD are 64-bit single-copy atomic + format %{ "FLDD $dst,$mem" %} + ins_encode %{ + __ ldr_double($dst$$FloatRegister, $mem$$Address); + %} + ins_pipe(floadD_mem); +%} + +#ifndef AARCH64 +// Load Double - UNaligned +instruct loadD_unaligned(regD_low dst, memoryF2 mem ) %{ + match(Set dst (LoadD_unaligned mem)); + ins_cost(MEMORY_REF_COST*2+DEFAULT_COST); + size(8); + format %{ "FLDS $dst.lo,$mem\t! misaligned double\n" + "\tFLDS $dst.hi,$mem+4\t!" %} + ins_encode %{ + Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); + __ flds($dst$$FloatRegister, Amemlo); + __ flds($dst$$FloatRegister->successor(), Amemhi); + %} + ins_pipe(iload_mem); +%} +#endif + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct loadFoff(regF dst, memoryScaledF mem, aimmX off, iRegP tmp) %{ + match(Set dst (LoadF (AddP mem off))); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "ldr $dst,$mem+$off\t! float temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ ldr_s($dst$$FloatRegister, nmem); + %} + ins_pipe(floadF_mem); +%} +#endif + +instruct loadF(regF dst, memoryF mem) %{ + match(Set dst (LoadF mem)); + + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "FLDS $dst,$mem" %} + ins_encode %{ + __ ldr_float($dst$$FloatRegister, $mem$$Address); + %} + ins_pipe(floadF_mem); +%} + +#ifdef AARCH64 +instruct load_limmI(iRegI dst, limmI src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST + 1); // + 1 because MOV is preferred + format %{ "ORR_w $dst, ZR, $src\t! int" %} + ins_encode %{ + __ orr_w($dst$$Register, ZR, (uintx)$src$$constant); + %} + ins_pipe(ialu_imm); +%} +#endif + +// // Load Constant +instruct loadConI( iRegI dst, immI src ) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 3/2); + format %{ "MOV_SLOW $dst, $src" %} + ins_encode %{ + __ mov_slow($dst$$Register, $src$$constant); + %} + ins_pipe(ialu_hi_lo_reg); +%} + +instruct loadConIMov( iRegI dst, immIMov src ) %{ + match(Set dst src); + size(4); + format %{ "MOV $dst, $src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant); + %} + ins_pipe(ialu_imm); +%} + +#ifndef AARCH64 +instruct loadConIMovn( iRegI dst, immIRotn src ) %{ + match(Set dst src); + size(4); + format %{ "MVN $dst, ~$src" %} + ins_encode %{ + __ mvn($dst$$Register, ~$src$$constant); + %} + ins_pipe(ialu_imm_n); +%} +#endif + +instruct loadConI16( iRegI dst, immI16 src ) %{ + match(Set dst src); + size(4); +#ifdef AARCH64 + format %{ "MOVZ_w $dst, $src" %} +#else + format %{ "MOVW $dst, $src" %} +#endif + ins_encode %{ +#ifdef AARCH64 + __ mov_w($dst$$Register, $src$$constant); +#else + __ movw($dst$$Register, $src$$constant); +#endif + %} + ins_pipe(ialu_imm_n); +%} + +instruct loadConP(iRegP dst, immP src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 3/2); + format %{ "MOV_SLOW $dst,$src\t!ptr" %} + ins_encode %{ + relocInfo::relocType constant_reloc = _opnds[1]->constant_reloc(); + intptr_t val = $src$$constant; + if (constant_reloc == relocInfo::oop_type) { + __ mov_oop($dst$$Register, (jobject)val); + } else if (constant_reloc == relocInfo::metadata_type) { + __ mov_metadata($dst$$Register, (Metadata*)val); + } else { + __ mov_slow($dst$$Register, val); + } + %} + ins_pipe(loadConP); +%} + + +instruct loadConP_poll(iRegP dst, immP_poll src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + format %{ "MOV_SLOW $dst,$src\t!ptr" %} + ins_encode %{ + __ mov_slow($dst$$Register, $src$$constant); + %} + ins_pipe(loadConP_poll); +%} + +#ifdef AARCH64 +instruct loadConP0(iRegP dst, immP0 src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + format %{ "MOV $dst,ZR\t!ptr" %} + ins_encode %{ + __ mov($dst$$Register, ZR); + %} + ins_pipe(ialu_none); +%} + +instruct loadConN(iRegN dst, immN src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 3/2); + format %{ "SET $dst,$src\t! compressed ptr" %} + ins_encode %{ + Register dst = $dst$$Register; + // FIXME: use $constanttablebase? + __ set_narrow_oop(dst, (jobject)$src$$constant); + %} + ins_pipe(ialu_hi_lo_reg); +%} + +instruct loadConN0(iRegN dst, immN0 src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + format %{ "MOV $dst,ZR\t! compressed ptr" %} + ins_encode %{ + __ mov($dst$$Register, ZR); + %} + ins_pipe(ialu_none); +%} + +instruct loadConNKlass(iRegN dst, immNKlass src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 3/2); + format %{ "SET $dst,$src\t! compressed klass ptr" %} + ins_encode %{ + Register dst = $dst$$Register; + // FIXME: use $constanttablebase? + __ set_narrow_klass(dst, (Klass*)$src$$constant); + %} + ins_pipe(ialu_hi_lo_reg); +%} + +instruct load_limmL(iRegL dst, limmL src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + format %{ "ORR $dst, ZR, $src\t! long" %} + ins_encode %{ + __ orr($dst$$Register, ZR, (uintx)$src$$constant); + %} + ins_pipe(loadConL); +%} +instruct load_immLMov(iRegL dst, immLMov src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + format %{ "MOV $dst, $src\t! long" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant); + %} + ins_pipe(loadConL); +%} +instruct loadConL(iRegL dst, immL src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 4); // worst case + format %{ "mov_slow $dst, $src\t! long" %} + ins_encode %{ + // FIXME: use $constanttablebase? + __ mov_slow($dst$$Register, $src$$constant); + %} + ins_pipe(loadConL); +%} +#else +instruct loadConL(iRegL dst, immL src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 4); + format %{ "MOV_SLOW $dst.lo, $src & 0x0FFFFFFFFL \t! long\n\t" + "MOV_SLOW $dst.hi, $src >> 32" %} + ins_encode %{ + __ mov_slow(reg_to_register_object($dst$$reg), $src$$constant & 0x0FFFFFFFFL); + __ mov_slow(reg_to_register_object($dst$$reg + 1), ((julong)($src$$constant)) >> 32); + %} + ins_pipe(loadConL); +%} + +instruct loadConL16( iRegL dst, immL16 src ) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 2); + + size(8); + format %{ "MOVW $dst.lo, $src \n\t" + "MOVW $dst.hi, 0 \n\t" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant); + __ movw($dst$$Register->successor(), 0); + %} + ins_pipe(ialu_imm); +%} +#endif + +instruct loadConF_imm8(regF dst, imm8F src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + size(4); + + format %{ "FCONSTS $dst, $src"%} + + ins_encode %{ + __ fconsts($dst$$FloatRegister, Assembler::float_num($src$$constant).imm8()); + %} + ins_pipe(loadConFD); // FIXME +%} + +#ifdef AARCH64 +instruct loadIConF(iRegI dst, immF src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 2); + + format %{ "MOV_SLOW $dst, $src\t! loadIConF" %} + + ins_encode %{ + // FIXME revisit once 6961697 is in + union { + jfloat f; + int i; + } v; + v.f = $src$$constant; + __ mov_slow($dst$$Register, v.i); + %} + ins_pipe(ialu_imm); +%} +#endif + +instruct loadConF(regF dst, immF src, iRegI tmp) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 2); + effect(TEMP tmp); + size(3*4); + + format %{ "MOV_SLOW $tmp, $src\n\t" + "FMSR $dst, $tmp"%} + + ins_encode %{ + // FIXME revisit once 6961697 is in + union { + jfloat f; + int i; + } v; + v.f = $src$$constant; + __ mov_slow($tmp$$Register, v.i); + __ fmsr($dst$$FloatRegister, $tmp$$Register); + %} + ins_pipe(loadConFD); // FIXME +%} + +instruct loadConD_imm8(regD dst, imm8D src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + size(4); + + format %{ "FCONSTD $dst, $src"%} + + ins_encode %{ + __ fconstd($dst$$FloatRegister, Assembler::double_num($src$$constant).imm8()); + %} + ins_pipe(loadConFD); // FIXME +%} + +instruct loadConD(regD dst, immD src, iRegP tmp) %{ + match(Set dst src); + effect(TEMP tmp); + ins_cost(MEMORY_REF_COST); + format %{ "FLDD $dst, [$constanttablebase + $constantoffset]\t! load from constant table: double=$src" %} + + ins_encode %{ + Register r = $constanttablebase; + int offset = $constantoffset($src); + if (!is_memoryD(offset)) { // can't use a predicate + // in load constant instructs + __ add_slow($tmp$$Register, r, offset); + r = $tmp$$Register; + offset = 0; + } + __ ldr_double($dst$$FloatRegister, Address(r, offset)); + %} + ins_pipe(loadConFD); +%} + +// Prefetch instructions. +// Must be safe to execute with invalid address (cannot fault). + +instruct prefetchAlloc_mp( memoryP mem ) %{ + predicate(os::is_MP()); + match( PrefetchAllocation mem ); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "PLDW $mem\t! Prefetch allocation" %} + ins_encode %{ +#ifdef AARCH64 + __ prfm(pstl1keep, $mem$$Address); +#else + __ pldw($mem$$Address); +#endif + %} + ins_pipe(iload_mem); +%} + +instruct prefetchAlloc_sp( memoryP mem ) %{ + predicate(!os::is_MP()); + match( PrefetchAllocation mem ); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "PLD $mem\t! Prefetch allocation" %} + ins_encode %{ +#ifdef AARCH64 + __ prfm(pstl1keep, $mem$$Address); +#else + __ pld($mem$$Address); +#endif + %} + ins_pipe(iload_mem); +%} + +//----------Store Instructions------------------------------------------------- +// Store Byte +instruct storeB(memoryB mem, store_RegI src) %{ + match(Set mem (StoreB mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "STRB $src,$mem\t! byte" %} + ins_encode %{ + __ strb($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} + +instruct storeCM(memoryB mem, store_RegI src) %{ + match(Set mem (StoreCM mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "STRB $src,$mem\t! CMS card-mark byte" %} + ins_encode %{ + __ strb($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} + +// Store Char/Short + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct storeCoff(store_RegI src, memoryScaledS mem, aimmX off, iRegP tmp) %{ + match(Set mem (StoreC (AddP mem off) src)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "STRH $src,$mem+$off\t! short temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ strh($src$$Register, nmem); + %} + ins_pipe(istore_mem_reg); +%} +#endif + +instruct storeC(memoryS mem, store_RegI src) %{ + match(Set mem (StoreC mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "STRH $src,$mem\t! short" %} + ins_encode %{ + __ strh($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} + +// Store Integer + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct storeIoff(store_RegI src, memoryScaledI mem, aimmX off, iRegP tmp) %{ + match(Set mem (StoreI (AddP mem off) src)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "str_32 $src,$mem+$off\t! int temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ str_32($src$$Register, nmem); + %} + ins_pipe(istore_mem_reg); +%} +#endif + +instruct storeI(memoryI mem, store_RegI src) %{ + match(Set mem (StoreI mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "str_32 $src,$mem" %} + ins_encode %{ + __ str_32($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} + +// Store Long + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct storeLoff(store_RegLd src, memoryScaledL mem, aimmX off, iRegP tmp) %{ + match(Set mem (StoreL (AddP mem off) src)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "str_64 $src,$mem+$off\t! long temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ str_64($src$$Register, nmem); + %} + ins_pipe(istore_mem_reg); +%} +#endif + +instruct storeL(memoryL mem, store_RegLd src) %{ +#ifdef AARCH64 + // already atomic for Aarch64 +#else + predicate(!((StoreLNode*)n)->require_atomic_access()); +#endif + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "str_64 $src,$mem\t! long\n\t" %} + + ins_encode %{ + __ str_64($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} + +#ifndef AARCH64 +instruct storeL_2instr(memorylong mem, iRegL src) %{ + predicate(!((StoreLNode*)n)->require_atomic_access()); + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); + + size(8); + format %{ "STR $src.lo,$mem\t! long\n\t" + "STR $src.hi,$mem+4" %} + + ins_encode %{ + Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); + __ str($src$$Register, Amemlo); + __ str($src$$Register->successor(), Amemhi); + %} + ins_pipe(istore_mem_reg); +%} + +instruct storeL_volatile(indirect mem, iRegL src) %{ + predicate(((StoreLNode*)n)->require_atomic_access()); + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STMIA $src,$mem\t! long" %} + ins_encode %{ + // FIXME: why is stmia considered atomic? Should be strexd + RegisterSet set($src$$Register); + set = set | reg_to_register_object($src$$reg + 1); + __ stmia(reg_to_register_object($mem$$base), set); + %} + ins_pipe(istore_mem_reg); +%} +#endif // !AARCH64 + +#ifndef AARCH64 +instruct storeL_volatile_fp(memoryD mem, iRegL src) %{ + predicate(((StoreLNode*)n)->require_atomic_access()); + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST); + size(8); + format %{ "FMDRR S14, $src\t! long \n\t" + "FSTD S14, $mem" %} + ins_encode %{ + __ fmdrr(S14, $src$$Register, $src$$Register->successor()); + __ fstd(S14, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} +#endif + +#ifdef XXX +// Move SP Pointer +//instruct movSP(sp_ptr_RegP dst, SPRegP src) %{ +//instruct movSP(iRegP dst, SPRegP src) %{ +instruct movSP(store_ptr_RegP dst, SPRegP src) %{ + match(Set dst src); +//predicate(!_kids[1]->_leaf->is_Proj() || _kids[1]->_leaf->as_Proj()->_con == TypeFunc::FramePtr); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "MOV $dst,$src\t! SP ptr\n\t" %} + ins_encode %{ + assert(false, "XXX1 got here"); + __ mov($dst$$Register, SP); + __ mov($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} +#endif + +#ifdef AARCH64 +// FIXME +// Store SP Pointer +instruct storeSP(memoryP mem, SPRegP src, iRegP tmp) %{ + match(Set mem (StoreP mem src)); + predicate(_kids[1]->_leaf->is_Proj() && _kids[1]->_leaf->as_Proj()->_con == TypeFunc::FramePtr); + // Multiple StoreP rules, different only in register mask. + // Matcher makes the last always valid. The others will + // only be valid if they cost less than the last valid + // rule. So cost(rule1) < cost(rule2) < cost(last) + // Unlike immediates, register constraints are not checked + // at match time. + ins_cost(MEMORY_REF_COST+DEFAULT_COST+4); + effect(TEMP tmp); + size(8); + + format %{ "MOV $tmp,$src\t! SP ptr\n\t" + "STR $tmp,$mem\t! SP ptr" %} + ins_encode %{ + assert($src$$Register == SP, "SP expected"); + __ mov($tmp$$Register, $src$$Register); + __ str($tmp$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_spORreg); // FIXME +%} +#endif // AARCH64 + +// Store Pointer + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct storePoff(store_ptr_RegP src, memoryScaledP mem, aimmX off, iRegP tmp) %{ + predicate(!_kids[1]->_leaf->is_Proj() || _kids[1]->_leaf->as_Proj()->_con != TypeFunc::FramePtr); + match(Set mem (StoreP (AddP mem off) src)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "STR $src,$mem+$off\t! ptr temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ str($src$$Register, nmem); + %} + ins_pipe(istore_mem_reg); +%} +#endif + +instruct storeP(memoryP mem, store_ptr_RegP src) %{ + match(Set mem (StoreP mem src)); +#ifdef AARCH64 + predicate(!_kids[1]->_leaf->is_Proj() || _kids[1]->_leaf->as_Proj()->_con != TypeFunc::FramePtr); +#endif + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "STR $src,$mem\t! ptr" %} + ins_encode %{ + __ str($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_spORreg); +%} + +#ifdef AARCH64 +// Store NULL Pointer +instruct storeP0(memoryP mem, immP0 src) %{ + match(Set mem (StoreP mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "STR ZR,$mem\t! ptr" %} + ins_encode %{ + __ str(ZR, $mem$$Address); + %} + ins_pipe(istore_mem_spORreg); +%} +#endif // AARCH64 + +#ifdef _LP64 +// Store Compressed Pointer + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct storeNoff(store_RegN src, memoryScaledI mem, aimmX off, iRegP tmp) %{ + match(Set mem (StoreN (AddP mem off) src)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "str_32 $src,$mem+$off\t! compressed ptr temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ str_32($src$$Register, nmem); + %} + ins_pipe(istore_mem_reg); +%} +#endif + +instruct storeN(memoryI mem, store_RegN src) %{ + match(Set mem (StoreN mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "str_32 $src,$mem\t! compressed ptr" %} + ins_encode %{ + __ str_32($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} + +#ifdef AARCH64 +// Store NULL Pointer +instruct storeN0(memoryI mem, immN0 src) %{ + match(Set mem (StoreN mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "str_32 ZR,$mem\t! compressed ptr" %} + ins_encode %{ + __ str_32(ZR, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} +#endif + +// Store Compressed Klass Pointer +instruct storeNKlass(memoryI mem, store_RegN src) %{ + match(Set mem (StoreNKlass mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "str_32 $src,$mem\t! compressed klass ptr" %} + ins_encode %{ + __ str_32($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} +#endif + +// Store Double + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct storeDoff(regD src, memoryScaledD mem, aimmX off, iRegP tmp) %{ + match(Set mem (StoreD (AddP mem off) src)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "STR $src,$mem+$off\t! double temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ str_d($src$$FloatRegister, nmem); + %} + ins_pipe(fstoreD_mem_reg); +%} +#endif + +instruct storeD(memoryD mem, regD src) %{ + match(Set mem (StoreD mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + // FIXME: needs to be atomic, but ARMv7 A.R.M. guarantees + // only LDREXD and STREXD are 64-bit single-copy atomic + format %{ "FSTD $src,$mem" %} + ins_encode %{ + __ str_double($src$$FloatRegister, $mem$$Address); + %} + ins_pipe(fstoreD_mem_reg); +%} + +#ifdef AARCH64 +instruct movI2F(regF dst, iRegI src) %{ + match(Set dst src); + size(4); + + format %{ "FMOV_sw $dst,$src\t! movI2F" %} + ins_encode %{ + __ fmov_sw($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +instruct movF2I(iRegI dst, regF src) %{ + match(Set dst src); + size(4); + + format %{ "FMOV_ws $dst,$src\t! movF2I" %} + ins_encode %{ + __ fmov_ws($dst$$Register, $src$$FloatRegister); + %} + ins_pipe(ialu_reg); // FIXME +%} +#endif + +// Store Float + +#ifdef AARCH64 +// XXX This variant shouldn't be necessary if 6217251 is implemented +instruct storeFoff(regF src, memoryScaledF mem, aimmX off, iRegP tmp) %{ + match(Set mem (StoreF (AddP mem off) src)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); // assume shift/sign-extend is free + effect(TEMP tmp); + size(4 * 2); + + format %{ "str_s $src,$mem+$off\t! float temp=$tmp" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + __ add($tmp$$Register, base, $off$$constant); + Address nmem = Address::make_raw($tmp$$reg, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + __ str_s($src$$FloatRegister, nmem); + %} + ins_pipe(fstoreF_mem_reg); +%} +#endif + +instruct storeF( memoryF mem, regF src) %{ + match(Set mem (StoreF mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "FSTS $src,$mem" %} + ins_encode %{ + __ str_float($src$$FloatRegister, $mem$$Address); + %} + ins_pipe(fstoreF_mem_reg); +%} + +#ifdef AARCH64 +// Convert oop pointer into compressed form +instruct encodeHeapOop(iRegN dst, iRegP src, flagsReg ccr) %{ + predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); + match(Set dst (EncodeP src)); + effect(KILL ccr); + format %{ "encode_heap_oop $dst, $src" %} + ins_encode %{ + __ encode_heap_oop($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct encodeHeapOop_not_null(iRegN dst, iRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); + match(Set dst (EncodeP src)); + format %{ "encode_heap_oop_not_null $dst, $src" %} + ins_encode %{ + __ encode_heap_oop_not_null($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct decodeHeapOop(iRegP dst, iRegN src, flagsReg ccr) %{ + predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull && + n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant); + match(Set dst (DecodeN src)); + effect(KILL ccr); + format %{ "decode_heap_oop $dst, $src" %} + ins_encode %{ + __ decode_heap_oop($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct decodeHeapOop_not_null(iRegP dst, iRegN src) %{ + predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull || + n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant); + match(Set dst (DecodeN src)); + format %{ "decode_heap_oop_not_null $dst, $src" %} + ins_encode %{ + __ decode_heap_oop_not_null($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct encodeKlass_not_null(iRegN dst, iRegP src) %{ + match(Set dst (EncodePKlass src)); + format %{ "encode_klass_not_null $dst, $src" %} + ins_encode %{ + __ encode_klass_not_null($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct decodeKlass_not_null(iRegP dst, iRegN src) %{ + match(Set dst (DecodeNKlass src)); + format %{ "decode_klass_not_null $dst, $src" %} + ins_encode %{ + __ decode_klass_not_null($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} +#endif // AARCH64 + +//----------MemBar Instructions----------------------------------------------- +// Memory barrier flavors + +// TODO: take advantage of Aarch64 load-acquire, store-release, etc +// pattern-match out unnecessary membars +instruct membar_storestore() %{ + match(MemBarStoreStore); + ins_cost(4*MEMORY_REF_COST); + + size(4); + format %{ "MEMBAR-storestore" %} + ins_encode %{ + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg); + %} + ins_pipe(long_memory_op); +%} + +instruct membar_acquire() %{ + match(MemBarAcquire); + match(LoadFence); + ins_cost(4*MEMORY_REF_COST); + + size(4); + format %{ "MEMBAR-acquire" %} + ins_encode %{ + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), noreg); + %} + ins_pipe(long_memory_op); +%} + +instruct membar_acquire_lock() %{ + match(MemBarAcquireLock); + ins_cost(0); + + size(0); + format %{ "!MEMBAR-acquire (CAS in prior FastLock so empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + +instruct membar_release() %{ + match(MemBarRelease); + match(StoreFence); + ins_cost(4*MEMORY_REF_COST); + + size(4); + format %{ "MEMBAR-release" %} + ins_encode %{ + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), noreg); + %} + ins_pipe(long_memory_op); +%} + +instruct membar_release_lock() %{ + match(MemBarReleaseLock); + ins_cost(0); + + size(0); + format %{ "!MEMBAR-release (CAS in succeeding FastUnlock so empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + +instruct membar_volatile() %{ + match(MemBarVolatile); + ins_cost(4*MEMORY_REF_COST); + + size(4); + format %{ "MEMBAR-volatile" %} + ins_encode %{ + __ membar(MacroAssembler::StoreLoad, noreg); + %} + ins_pipe(long_memory_op); +%} + +instruct unnecessary_membar_volatile() %{ + match(MemBarVolatile); + predicate(Matcher::post_store_load_barrier(n)); + ins_cost(0); + + size(0); + format %{ "!MEMBAR-volatile (unnecessary so empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + +//----------Register Move Instructions----------------------------------------- +// instruct roundDouble_nop(regD dst) %{ +// match(Set dst (RoundDouble dst)); +// ins_pipe(empty); +// %} + + +// instruct roundFloat_nop(regF dst) %{ +// match(Set dst (RoundFloat dst)); +// ins_pipe(empty); +// %} + + +#ifdef AARCH64 +// 0 constant in register +instruct zrImmI0(ZRRegI dst, immI0 imm) %{ + match(Set dst imm); + size(0); + ins_cost(0); + + format %{ "! ZR (int 0)" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(ialu_none); +%} + +// 0 constant in register +instruct zrImmL0(ZRRegL dst, immL0 imm) %{ + match(Set dst imm); + size(0); + ins_cost(0); + + format %{ "! ZR (long 0)" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(ialu_none); +%} + +#ifdef XXX +// 0 constant in register +instruct zrImmN0(ZRRegN dst, immN0 imm) %{ + match(Set dst imm); + size(0); + ins_cost(0); + + format %{ "! ZR (compressed pointer NULL)" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(ialu_none); +%} + +// 0 constant in register +instruct zrImmP0(ZRRegP dst, immP0 imm) %{ + match(Set dst imm); + size(0); + ins_cost(0); + + format %{ "! ZR (NULL)" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(ialu_none); +%} +#endif +#endif // AARCH64 + +// Cast Index to Pointer for unsafe natives +instruct castX2P(iRegX src, iRegP dst) %{ + match(Set dst (CastX2P src)); + + format %{ "MOV $dst,$src\t! IntX->Ptr if $dst != $src" %} + ins_encode %{ + if ($dst$$Register != $src$$Register) { + __ mov($dst$$Register, $src$$Register); + } + %} + ins_pipe(ialu_reg); +%} + +// Cast Pointer to Index for unsafe natives +instruct castP2X(iRegP src, iRegX dst) %{ + match(Set dst (CastP2X src)); + + format %{ "MOV $dst,$src\t! Ptr->IntX if $dst != $src" %} + ins_encode %{ + if ($dst$$Register != $src$$Register) { + __ mov($dst$$Register, $src$$Register); + } + %} + ins_pipe(ialu_reg); +%} + +#ifndef AARCH64 +//----------Conditional Move--------------------------------------------------- +// Conditional move +instruct cmovIP_reg(cmpOpP cmp, flagsRegP pcc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src\t! int" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} +#endif + +#ifdef AARCH64 +instruct cmovI_reg3(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovL_reg3(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovP_reg3(cmpOp cmp, flagsReg icc, iRegP dst, iRegP src1, iRegP src2) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovN_reg3(cmpOp cmp, flagsReg icc, iRegN dst, iRegN src1, iRegN src2) %{ + match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovIP_reg3(cmpOpP cmp, flagsRegP icc, iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovLP_reg3(cmpOpP cmp, flagsRegP icc, iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPP_reg3(cmpOpP cmp, flagsRegP icc, iRegP dst, iRegP src1, iRegP src2) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovNP_reg3(cmpOpP cmp, flagsRegP icc, iRegN dst, iRegN src1, iRegN src2) %{ + match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovIU_reg3(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovLU_reg3(cmpOpU cmp, flagsRegU icc, iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPU_reg3(cmpOpU cmp, flagsRegU icc, iRegP dst, iRegP src1, iRegP src2) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovNU_reg3(cmpOpU cmp, flagsRegU icc, iRegN dst, iRegN src1, iRegN src2) %{ + match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovIZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! int" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovLZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! long" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, iRegP src1, iRegP src2) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! ptr" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovNZ_reg3(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegN dst, iRegN src1, iRegN src2) %{ + match(Set dst (CMoveN (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "CSEL $dst,$src1,$src2,$cmp\t! compressed ptr" %} + ins_encode %{ + __ csel($dst$$Register, $src1$$Register, $src2$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} +#endif // AARCH64 + +#ifndef AARCH64 +instruct cmovIP_immMov(cmpOpP cmp, flagsRegP pcc, iRegI dst, immIMov src) %{ + match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovIP_imm16(cmpOpP cmp, flagsRegP pcc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOVw$cmp $dst,$src" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} +#endif + +instruct cmovI_reg(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +#ifdef AARCH64 +instruct cmovL_reg(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src\t! long" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} +#endif + +#ifndef AARCH64 +instruct cmovI_immMov(cmpOp cmp, flagsReg icc, iRegI dst, immIMov src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovII_imm16(cmpOp cmp, flagsReg icc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOVw$cmp $dst,$src" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} +#endif + +instruct cmovII_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +#ifndef AARCH64 +instruct cmovII_immMov_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, immIMov src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(140); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovII_imm16_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(140); + size(4); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} +#endif + +instruct cmovIIu_reg(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +#ifndef AARCH64 +instruct cmovIIu_immMov(cmpOpU cmp, flagsRegU icc, iRegI dst, immIMov src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovIIu_imm16(cmpOpU cmp, flagsRegU icc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} +#endif + +// Conditional move +instruct cmovPP_reg(cmpOpP cmp, flagsRegP pcc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPP_imm(cmpOpP cmp, flagsRegP pcc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + size(4); +#ifdef AARCH64 + format %{ "MOV$cmp $dst,ZR" %} +#else + format %{ "MOV$cmp $dst,$src" %} +#endif + ins_encode %{ +#ifdef AARCH64 + __ mov($dst$$Register, ZR, (AsmCondition)($cmp$$cmpcode)); +#else + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); +#endif + %} + ins_pipe(ialu_imm); +%} + +// This instruction also works with CmpN so we don't need cmovPN_reg. +instruct cmovPI_reg(cmpOp cmp, flagsReg icc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "MOV$cmp $dst,$src\t! ptr" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(150); + + size(4); + format %{ "MOV$cmp $dst,$src\t! ptr" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPIu_reg(cmpOpU cmp, flagsRegU icc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "MOV$cmp $dst,$src\t! ptr" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPI_imm(cmpOp cmp, flagsReg icc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(140); + + size(4); +#ifdef AARCH64 + format %{ "MOV$cmp $dst,ZR\t! ptr" %} +#else + format %{ "MOV$cmp $dst,$src\t! ptr" %} +#endif + ins_encode %{ +#ifdef AARCH64 + __ mov($dst$$Register, ZR, (AsmCondition)($cmp$$cmpcode)); +#else + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); +#endif + %} + ins_pipe(ialu_imm); +%} + +instruct cmovPI_imm_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(140); + + size(4); +#ifdef AARCH64 + format %{ "MOV$cmp $dst,ZR\t! ptr" %} +#else + format %{ "MOV$cmp $dst,$src\t! ptr" %} +#endif + ins_encode %{ +#ifdef AARCH64 + __ mov($dst$$Register, ZR, (AsmCondition)($cmp$$cmpcode)); +#else + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); +#endif + %} + ins_pipe(ialu_imm); +%} + +instruct cmovPIu_imm(cmpOpU cmp, flagsRegU icc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(140); + + size(4); +#ifdef AARCH64 + format %{ "MOV$cmp $dst,ZR\t! ptr" %} +#else + format %{ "MOV$cmp $dst,$src\t! ptr" %} +#endif + ins_encode %{ +#ifdef AARCH64 + __ mov($dst$$Register, ZR, (AsmCondition)($cmp$$cmpcode)); +#else + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); +#endif + %} + ins_pipe(ialu_imm); +%} + +#ifdef AARCH64 +// Conditional move +instruct cmovF_reg(cmpOp cmp, flagsReg icc, regF dst, regF src1, regF src2) %{ + match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %} + ins_encode %{ + __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovD_reg(cmpOp cmp, flagsReg icc, regD dst, regD src1, regD src2) %{ + match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %} + ins_encode %{ + __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFP_reg(cmpOpP cmp, flagsRegP icc, regF dst, regF src1, regF src2) %{ + match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %} + ins_encode %{ + __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovDP_reg(cmpOpP cmp, flagsRegP icc, regD dst, regD src1, regD src2) %{ + match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %} + ins_encode %{ + __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFU_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src1, regF src2) %{ + match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %} + ins_encode %{ + __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovDU_reg(cmpOpU cmp, flagsRegU icc, regD dst, regD src1, regD src2) %{ + match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %} + ins_encode %{ + __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFZ_reg(cmpOp0 cmp, flagsReg_EQNELTGE icc, regF dst, regF src1, regF src2) %{ + match(Set dst (CMoveF (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "FCSEL_s $dst,$src1,$src2,$cmp" %} + ins_encode %{ + __ fcsel_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovDZ_reg(cmpOp0 cmp, flagsReg_EQNELTGE icc, regD dst, regD src1, regD src2) %{ + match(Set dst (CMoveD (Binary cmp icc) (Binary src2 src1))); + ins_cost(150); + size(4); + format %{ "FCSEL_d $dst,$src1,$src2,$cmp" %} + ins_encode %{ + __ fcsel_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +#else // !AARCH64 + +// Conditional move +instruct cmovFP_reg(cmpOpP cmp, flagsRegP pcc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFI_reg(cmpOp cmp, flagsReg icc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(150); + + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFIu_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +// Conditional move +instruct cmovDP_reg(cmpOpP cmp, flagsRegP pcc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_double_move); +%} + +instruct cmovDI_reg(cmpOp cmp, flagsReg icc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_double_move); +%} + +instruct cmovDI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(150); + + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_double_move); +%} + +instruct cmovDIu_reg(cmpOpU cmp, flagsRegU icc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_double_move); +%} + +// Conditional move +instruct cmovLP_reg(cmpOpP cmp, flagsRegP pcc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst.hi,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct cmovLP_immRot(cmpOpP cmp, flagsRegP pcc, iRegL dst, immLlowRot src) %{ + match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + + size(8); + format %{ "MOV$cmp $dst.lo,$src\t! long\n\t" + "MOV$cmp $dst.hi,0" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLP_imm16(cmpOpP cmp, flagsRegP pcc, iRegL dst, immL16 src) %{ + match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + + size(8); + format %{ "MOV$cmp $dst.lo,$src\t! long\n\t" + "MOV$cmp $dst.hi,0" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLI_reg(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst.hi,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovLI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(150); + + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst.hi,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct cmovLI_immRot(cmpOp cmp, flagsReg icc, iRegL dst, immLlowRot src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + ins_cost(140); + + size(8); + format %{ "MOV$cmp $dst.lo,$src\t! long\n\t" + "MOV$cmp $dst.hi,0" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct cmovLI_immRot_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, immLlowRot src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(140); + + size(8); + format %{ "MOV$cmp $dst.lo,$src\t! long\n\t" + "MOV$cmp $dst.hi,0" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLI_imm16(cmpOp cmp, flagsReg icc, iRegL dst, immL16 src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + ins_cost(140); + + size(8); + format %{ "MOV$cmp $dst.lo,$src\t! long\n\t" + "MOV$cmp $dst.hi,0" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + __ movw($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLI_imm16_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, immL16 src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(140); + + size(8); + format %{ "MOV$cmp $dst.lo,$src\t! long\n\t" + "MOV$cmp $dst.hi,0" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + __ movw($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLIu_reg(cmpOpU cmp, flagsRegU icc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst.hi,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} +#endif // !AARCH64 + + +//----------OS and Locking Instructions---------------------------------------- + +// This name is KNOWN by the ADLC and cannot be changed. +// The ADLC forces a 'TypeRawPtr::BOTTOM' output type +// for this guy. +instruct tlsLoadP(RthreadRegP dst) %{ + match(Set dst (ThreadLocal)); + + size(0); + ins_cost(0); + format %{ "! TLS is in $dst" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(ialu_none); +%} + +instruct checkCastPP( iRegP dst ) %{ + match(Set dst (CheckCastPP dst)); + + size(0); + format %{ "! checkcastPP of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(empty); +%} + + +instruct castPP( iRegP dst ) %{ + match(Set dst (CastPP dst)); + format %{ "! castPP of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(empty); +%} + +instruct castII( iRegI dst ) %{ + match(Set dst (CastII dst)); + format %{ "! castII of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_cost(0); + ins_pipe(empty); +%} + +//----------Arithmetic Instructions-------------------------------------------- +// Addition Instructions +// Register Addition +instruct addI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (AddI src1 src2)); + + size(4); + format %{ "add_32 $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ add_32($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct addshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (AddI (LShiftI src1 src2) src3)); + + size(4); + format %{ "add_32 $dst,$src3,$src1<<$src2\t! int" %} + ins_encode %{ + __ add_32($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsl, $src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +#ifdef AARCH64 +#ifdef TODO +instruct addshlL_reg_imm_reg(iRegL dst, iRegL src1, immU6 src2, iRegL src3) %{ + match(Set dst (AddL (LShiftL src1 src2) src3)); + + size(4); + format %{ "ADD $dst,$src3,$src1<<$src2\t! long" %} + ins_encode %{ + __ add($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsl, $src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif +#endif + +instruct addshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ + match(Set dst (AddI (LShiftI src1 src2) src3)); + + size(4); + format %{ "add_32 $dst,$src3,$src1<<$src2\t! int" %} + ins_encode %{ + __ add_32($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsl, $src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct addsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (AddI (RShiftI src1 src2) src3)); + + size(4); + format %{ "add_32 $dst,$src3,$src1>>$src2\t! int" %} + ins_encode %{ + __ add_32($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, asr, $src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct addsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ + match(Set dst (AddI (RShiftI src1 src2) src3)); + + size(4); + format %{ "add_32 $dst,$src3,$src1>>$src2\t! int" %} + ins_encode %{ + __ add_32($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, asr, $src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct addshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (AddI (URShiftI src1 src2) src3)); + + size(4); + format %{ "add_32 $dst,$src3,$src1>>>$src2\t! int" %} + ins_encode %{ + __ add_32($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsr, $src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct addshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ + match(Set dst (AddI (URShiftI src1 src2) src3)); + + size(4); + format %{ "add_32 $dst,$src3,$src1>>>$src2\t! int" %} + ins_encode %{ + __ add_32($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsr, $src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Immediate Addition +instruct addI_reg_aimmI(iRegI dst, iRegI src1, aimmI src2) %{ + match(Set dst (AddI src1 src2)); + + size(4); + format %{ "add_32 $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ add_32($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +// Pointer Register Addition +instruct addP_reg_reg(iRegP dst, iRegP src1, iRegX src2) %{ + match(Set dst (AddP src1 src2)); + + size(4); + format %{ "ADD $dst,$src1,$src2\t! ptr" %} + ins_encode %{ + __ add($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifdef AARCH64 +// unshifted I2L operand +operand unshiftedI2L(iRegI src2) %{ +//constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(ConvI2L src2); + + op_cost(1); + format %{ "$src2.w" %} + interface(MEMORY_INTER) %{ + base($src2); + index(0xff); + scale(0x0); + disp(0x0); + %} +%} + +// shifted I2L operand +operand shiftedI2L(iRegI src2, immI_0_4 src3) %{ +//constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(LShiftX (ConvI2L src2) src3); + + op_cost(1); + format %{ "$src2.w << $src3" %} + interface(MEMORY_INTER) %{ + base($src2); + index(0xff); + scale($src3); + disp(0x0); + %} +%} + +opclass shiftedRegI(shiftedI2L, unshiftedI2L); + +instruct shlL_reg_regI(iRegL dst, iRegI src1, immU6 src2) %{ + match(Set dst (LShiftL (ConvI2L src1) src2)); + + size(4); + format %{ "LSL $dst,$src1.w,$src2\t! ptr" %} + ins_encode %{ + int c = $src2$$constant; + int r = 64 - c; + int s = 31; + if (s >= r) { + s = r - 1; + } + __ sbfm($dst$$Register, $src1$$Register, r, s); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct addP_reg_regI(iRegP dst, iRegP src1, shiftedRegI src2) %{ + match(Set dst (AddP src1 src2)); + + ins_cost(DEFAULT_COST * 3/2); + size(4); + format %{ "ADD $dst,$src1,$src2, sxtw\t! ptr" %} + ins_encode %{ + Register base = reg_to_register_object($src2$$base); + __ add($dst$$Register, $src1$$Register, base, ex_sxtw, $src2$$scale); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +// shifted iRegX operand +operand shiftedX(iRegX src2, shimmX src3) %{ +//constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(LShiftX src2 src3); + + op_cost(1); + format %{ "$src2 << $src3" %} + interface(MEMORY_INTER) %{ + base($src2); + index(0xff); + scale($src3); + disp(0x0); + %} +%} + +instruct addshlP_reg_reg_imm(iRegP dst, iRegP src1, shiftedX src2) %{ + match(Set dst (AddP src1 src2)); + + ins_cost(DEFAULT_COST * 3/2); + size(4); + format %{ "ADD $dst,$src1,$src2\t! ptr" %} + ins_encode %{ + Register base = reg_to_register_object($src2$$base); + __ add($dst$$Register, $src1$$Register, AsmOperand(base, lsl, $src2$$scale)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Pointer Immediate Addition +instruct addP_reg_aimmX(iRegP dst, iRegP src1, aimmX src2) %{ + match(Set dst (AddP src1 src2)); + + size(4); + format %{ "ADD $dst,$src1,$src2\t! ptr" %} + ins_encode %{ + __ add($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +// Long Addition +#ifdef AARCH64 +instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (AddL src1 src2)); + size(4); + format %{ "ADD $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ add($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct addL_reg_regI(iRegL dst, iRegL src1, shiftedRegI src2) %{ + match(Set dst (AddL src1 src2)); + + ins_cost(DEFAULT_COST * 3/2); + size(4); + format %{ "ADD $dst,$src1,$src2, sxtw\t! long" %} + ins_encode %{ + Register base = reg_to_register_object($src2$$base); + __ add($dst$$Register, $src1$$Register, base, ex_sxtw, $src2$$scale); + %} + ins_pipe(ialu_reg_reg); +%} +#else +instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg ccr) %{ + match(Set dst (AddL src1 src2)); + effect(KILL ccr); + size(8); + format %{ "ADDS $dst.lo,$src1.lo,$src2.lo\t! long\n\t" + "ADC $dst.hi,$src1.hi,$src2.hi" %} + ins_encode %{ + __ adds($dst$$Register, $src1$$Register, $src2$$Register); + __ adc($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +#ifdef AARCH64 +// Immediate Addition +instruct addL_reg_aimm(iRegL dst, iRegL src1, aimmL src2) %{ + match(Set dst (AddL src1 src2)); + + size(4); + format %{ "ADD $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ add($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +instruct addL_reg_immLneg(iRegL dst, iRegL src1, aimmLneg src2) %{ + match(Set dst (SubL src1 src2)); + + size(4); + format %{ "ADD $dst,$src1,-($src2)\t! long" %} + ins_encode %{ + __ add($dst$$Register, $src1$$Register, -$src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} +#else +// TODO +#endif + +#ifndef AARCH64 +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct addL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg ccr) %{ + match(Set dst (AddL src1 con)); + effect(KILL ccr); + size(8); + format %{ "ADDS $dst.lo,$src1.lo,$con\t! long\n\t" + "ADC $dst.hi,$src1.hi,0" %} + ins_encode %{ + __ adds($dst$$Register, $src1$$Register, $con$$constant); + __ adc($dst$$Register->successor(), $src1$$Register->successor(), 0); + %} + ins_pipe(ialu_reg_imm); +%} +#endif + +//----------Conditional_store-------------------------------------------------- +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. +// Sets flags (EQ) on success. + +// TODO: optimize out barriers with AArch64 load-acquire/store-release +// LoadP-locked. +instruct loadPLocked(iRegP dst, memoryex mem) %{ + match(Set dst (LoadPLocked mem)); + size(4); + format %{ "LDREX $dst,$mem" %} + ins_encode %{ +#ifdef AARCH64 + Register base = reg_to_register_object($mem$$base); + __ ldxr($dst$$Register, base); +#else + __ ldrex($dst$$Register,$mem$$Address); +#endif + %} + ins_pipe(iload_mem); +%} + +instruct storePConditional( memoryex heap_top_ptr, iRegP oldval, iRegP newval, iRegI tmp, flagsRegP pcc ) %{ + predicate(_kids[1]->_kids[0]->_leaf->Opcode() == Op_LoadPLocked); // only works in conjunction with a LoadPLocked node + match(Set pcc (StorePConditional heap_top_ptr (Binary oldval newval))); + effect( TEMP tmp ); + size(8); + format %{ "STREX $tmp,$newval,$heap_top_ptr\n\t" + "CMP $tmp, 0" %} + ins_encode %{ +#ifdef AARCH64 + Register base = reg_to_register_object($heap_top_ptr$$base); + __ stxr($tmp$$Register, $newval$$Register, base); +#else + __ strex($tmp$$Register, $newval$$Register, $heap_top_ptr$$Address); +#endif + __ cmp($tmp$$Register, 0); + %} + ins_pipe( long_memory_op ); +%} + +// Conditional-store of an intx value. +instruct storeXConditional( memoryex mem, iRegX oldval, iRegX newval, iRegX tmp, flagsReg icc ) %{ +#ifdef AARCH64 + match(Set icc (StoreLConditional mem (Binary oldval newval))); + effect( TEMP tmp ); + size(28); + format %{ "loop:\n\t" + "LDXR $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem], DOESN'T set $newval=[$mem] in any case\n\t" + "SUBS $tmp, $tmp, $oldval\n\t" + "B.ne done\n\t" + "STXR $tmp, $newval, $mem\n\t" + "CBNZ_w $tmp, loop\n\t" + "CMP $tmp, 0\n\t" + "done:\n\t" + "membar LoadStore|LoadLoad" %} +#else + match(Set icc (StoreIConditional mem (Binary oldval newval))); + effect( TEMP tmp ); + size(28); + format %{ "loop: \n\t" + "LDREX $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem], DOESN'T set $newval=[$mem] in any case\n\t" + "XORS $tmp,$tmp, $oldval\n\t" + "STREX.eq $tmp, $newval, $mem\n\t" + "CMP.eq $tmp, 1 \n\t" + "B.eq loop \n\t" + "TEQ $tmp, 0\n\t" + "membar LoadStore|LoadLoad" %} +#endif + ins_encode %{ + Label loop; + __ bind(loop); +#ifdef AARCH64 +// FIXME: use load-acquire/store-release, remove membar? + Label done; + Register base = reg_to_register_object($mem$$base); + __ ldxr($tmp$$Register, base); + __ subs($tmp$$Register, $tmp$$Register, $oldval$$Register); + __ b(done, ne); + __ stxr($tmp$$Register, $newval$$Register, base); + __ cbnz_w($tmp$$Register, loop); + __ cmp($tmp$$Register, 0); + __ bind(done); +#else + __ ldrex($tmp$$Register, $mem$$Address); + __ eors($tmp$$Register, $tmp$$Register, $oldval$$Register); + __ strex($tmp$$Register, $newval$$Register, $mem$$Address, eq); + __ cmp($tmp$$Register, 1, eq); + __ b(loop, eq); + __ teq($tmp$$Register, 0); +#endif + // used by biased locking only. Requires a membar. + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadStore | MacroAssembler::LoadLoad), noreg); + %} + ins_pipe( long_memory_op ); +%} + +// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them + +#ifdef AARCH64 +// TODO: if combined with membar, elide membar and use +// load-acquire/store-release if appropriate +instruct compareAndSwapL_bool(memoryex mem, iRegL oldval, iRegL newval, iRegI res, iRegI tmp, flagsReg ccr) %{ + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + effect( KILL ccr, TEMP tmp); + size(24); + format %{ "loop:\n\t" + "LDXR $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" + "CMP $tmp, $oldval\n\t" + "B.ne done\n\t" + "STXR $tmp, $newval, $mem\n\t" + "CBNZ_w $tmp, loop\n\t" + "done:\n\t" + "CSET_w $res, eq" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + Label loop, done; + __ bind(loop); + __ ldxr($tmp$$Register, base); + __ cmp($tmp$$Register, $oldval$$Register); + __ b(done, ne); + __ stxr($tmp$$Register, $newval$$Register, base); + __ cbnz_w($tmp$$Register, loop); + __ bind(done); + __ cset_w($res$$Register, eq); + %} + ins_pipe( long_memory_op ); +%} + +instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr) %{ + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + effect( KILL ccr, TEMP tmp); + size(24); + format %{ "loop:\n\t" + "LDXR_w $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" + "CMP_w $tmp, $oldval\n\t" + "B.ne done\n\t" + "STXR_w $tmp, $newval, $mem\n\t" + "CBNZ_w $tmp, loop\n\t" + "done:\n\t" + "CSET_w $res, eq" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + Label loop, done; + __ bind(loop); + __ ldxr_w($tmp$$Register, base); + __ cmp_w($tmp$$Register, $oldval$$Register); + __ b(done, ne); + __ stxr_w($tmp$$Register, $newval$$Register, base); + __ cbnz_w($tmp$$Register, loop); + __ bind(done); + __ cset_w($res$$Register, eq); + %} + ins_pipe( long_memory_op ); +%} + +// tmp must use iRegI instead of iRegN until 8051805 is fixed. +instruct compareAndSwapN_bool(memoryex mem, iRegN oldval, iRegN newval, iRegI res, iRegI tmp, flagsReg ccr) %{ + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + effect( KILL ccr, TEMP tmp); + size(24); + format %{ "loop:\n\t" + "LDXR_w $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" + "CMP_w $tmp, $oldval\n\t" + "B.ne done\n\t" + "STXR_w $tmp, $newval, $mem\n\t" + "CBNZ_w $tmp, loop\n\t" + "done:\n\t" + "CSET_w $res, eq" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + Label loop, done; + __ bind(loop); + __ ldxr_w($tmp$$Register, base); + __ cmp_w($tmp$$Register, $oldval$$Register); + __ b(done, ne); + __ stxr_w($tmp$$Register, $newval$$Register, base); + __ cbnz_w($tmp$$Register, loop); + __ bind(done); + __ cset_w($res$$Register, eq); + %} + ins_pipe( long_memory_op ); +%} + +instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + effect( KILL ccr, TEMP tmp); + size(24); + format %{ "loop:\n\t" + "LDXR $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" + "CMP $tmp, $oldval\n\t" + "B.ne done\n\t" + "STXR $tmp, $newval, $mem\n\t" + "CBNZ_w $tmp, loop\n\t" + "done:\n\t" + "CSET_w $res, eq" %} + ins_encode %{ + Register base = reg_to_register_object($mem$$base); + Label loop, done; + __ bind(loop); + __ ldxr($tmp$$Register, base); + __ cmp($tmp$$Register, $oldval$$Register); + __ b(done, ne); + __ stxr($tmp$$Register, $newval$$Register, base); + __ cbnz_w($tmp$$Register, loop); + __ bind(done); + __ cset_w($res$$Register, eq); + %} + ins_pipe( long_memory_op ); +%} +#else // !AARCH64 +instruct compareAndSwapL_bool(memoryex mem, iRegL oldval, iRegLd newval, iRegI res, iRegLd tmp, flagsReg ccr ) %{ + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + effect( KILL ccr, TEMP tmp); + size(32); + format %{ "loop: \n\t" + "LDREXD $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" + "CMP $tmp.lo, $oldval.lo\n\t" + "CMP.eq $tmp.hi, $oldval.hi\n\t" + "STREXD.eq $tmp, $newval, $mem\n\t" + "MOV.ne $tmp, 0 \n\t" + "XORS.eq $tmp,$tmp, 1 \n\t" + "B.eq loop \n\t" + "MOV $res, $tmp" %} + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrexd($tmp$$Register, $mem$$Address); + __ cmp($tmp$$Register, $oldval$$Register); + __ cmp($tmp$$Register->successor(), $oldval$$Register->successor(), eq); + __ strexd($tmp$$Register, $newval$$Register, $mem$$Address, eq); + __ mov($tmp$$Register, 0, ne); + __ eors($tmp$$Register, $tmp$$Register, 1, eq); + __ b(loop, eq); + __ mov($res$$Register, $tmp$$Register); + %} + ins_pipe( long_memory_op ); +%} + + +instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr ) %{ + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + effect( KILL ccr, TEMP tmp); + size(28); + format %{ "loop: \n\t" + "LDREX $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" + "CMP $tmp, $oldval\n\t" + "STREX.eq $tmp, $newval, $mem\n\t" + "MOV.ne $tmp, 0 \n\t" + "XORS.eq $tmp,$tmp, 1 \n\t" + "B.eq loop \n\t" + "MOV $res, $tmp" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($tmp$$Register,$mem$$Address); + __ cmp($tmp$$Register, $oldval$$Register); + __ strex($tmp$$Register, $newval$$Register, $mem$$Address, eq); + __ mov($tmp$$Register, 0, ne); + __ eors($tmp$$Register, $tmp$$Register, 1, eq); + __ b(loop, eq); + __ mov($res$$Register, $tmp$$Register); + %} + ins_pipe( long_memory_op ); +%} + +instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr ) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + effect( KILL ccr, TEMP tmp); + size(28); + format %{ "loop: \n\t" + "LDREX $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" + "CMP $tmp, $oldval\n\t" + "STREX.eq $tmp, $newval, $mem\n\t" + "MOV.ne $tmp, 0 \n\t" + "EORS.eq $tmp,$tmp, 1 \n\t" + "B.eq loop \n\t" + "MOV $res, $tmp" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($tmp$$Register,$mem$$Address); + __ cmp($tmp$$Register, $oldval$$Register); + __ strex($tmp$$Register, $newval$$Register, $mem$$Address, eq); + __ mov($tmp$$Register, 0, ne); + __ eors($tmp$$Register, $tmp$$Register, 1, eq); + __ b(loop, eq); + __ mov($res$$Register, $tmp$$Register); + %} + ins_pipe( long_memory_op ); +%} +#endif // !AARCH64 + +#ifdef AARCH64 +instruct xaddI_aimmI_no_res(memoryex mem, aimmI add, Universe dummy, iRegI tmp1, iRegI tmp2) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem add)); + effect(TEMP tmp1, TEMP tmp2); + size(16); + format %{ "loop:\n\t" + "LDXR_w $tmp1, $mem\n\t" + "ADD_w $tmp1, $tmp1, $add\n\t" + "STXR_w $tmp2, $tmp1, $mem\n\t" + "CBNZ_w $tmp2, loop" %} + + ins_encode %{ + Label loop; + Register base = reg_to_register_object($mem$$base); + __ bind(loop); + __ ldxr_w($tmp1$$Register, base); + __ add_w($tmp1$$Register, $tmp1$$Register, $add$$constant); + __ stxr_w($tmp2$$Register, $tmp1$$Register, base); + __ cbnz_w($tmp2$$Register, loop); + %} + ins_pipe( long_memory_op ); +%} +#else +instruct xaddI_aimmI_no_res(memoryex mem, aimmI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem add)); + effect(KILL ccr, TEMP tmp1, TEMP tmp2); + size(20); + format %{ "loop: \n\t" + "LDREX $tmp1, $mem\n\t" + "ADD $tmp1, $tmp1, $add\n\t" + "STREX $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($tmp1$$Register,$mem$$Address); + __ add($tmp1$$Register, $tmp1$$Register, $add$$constant); + __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, ne); + %} + ins_pipe( long_memory_op ); +%} +#endif + +#ifdef AARCH64 +instruct xaddI_reg_no_res(memoryex mem, iRegI add, Universe dummy, iRegI tmp1, iRegI tmp2) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem add)); + effect(TEMP tmp1, TEMP tmp2); + size(16); + format %{ "loop:\n\t" + "LDXR_w $tmp1, $mem\n\t" + "ADD_w $tmp1, $tmp1, $add\n\t" + "STXR_w $tmp2, $tmp1, $mem\n\t" + "CBNZ_w $tmp2, loop" %} + + ins_encode %{ + Label loop; + Register base = reg_to_register_object($mem$$base); + __ bind(loop); + __ ldxr_w($tmp1$$Register, base); + __ add_w($tmp1$$Register, $tmp1$$Register, $add$$Register); + __ stxr_w($tmp2$$Register, $tmp1$$Register, base); + __ cbnz_w($tmp2$$Register, loop); + %} + ins_pipe( long_memory_op ); +%} +#else +instruct xaddI_reg_no_res(memoryex mem, iRegI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem add)); + effect(KILL ccr, TEMP tmp1, TEMP tmp2); + size(20); + format %{ "loop: \n\t" + "LDREX $tmp1, $mem\n\t" + "ADD $tmp1, $tmp1, $add\n\t" + "STREX $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($tmp1$$Register,$mem$$Address); + __ add($tmp1$$Register, $tmp1$$Register, $add$$Register); + __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, ne); + %} + ins_pipe( long_memory_op ); +%} +#endif + +#ifdef AARCH64 +instruct xaddI_aimmI(memoryex mem, aimmI add, iRegI res, iRegI tmp1, iRegI tmp2) %{ + match(Set res (GetAndAddI mem add)); + effect(TEMP tmp1, TEMP tmp2, TEMP res); + size(16); + format %{ "loop:\n\t" + "LDXR_w $res, $mem\n\t" + "ADD_w $tmp1, $res, $add\n\t" + "STXR_w $tmp2, $tmp1, $mem\n\t" + "CBNZ_w $tmp2, loop" %} + + ins_encode %{ + Label loop; + Register base = reg_to_register_object($mem$$base); + __ bind(loop); + __ ldxr_w($res$$Register, base); + __ add_w($tmp1$$Register, $res$$Register, $add$$constant); + __ stxr_w($tmp2$$Register, $tmp1$$Register, base); + __ cbnz_w($tmp2$$Register, loop); + %} + ins_pipe( long_memory_op ); +%} +#else +instruct xaddI_aimmI(memoryex mem, aimmI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ + match(Set res (GetAndAddI mem add)); + effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res); + size(20); + format %{ "loop: \n\t" + "LDREX $res, $mem\n\t" + "ADD $tmp1, $res, $add\n\t" + "STREX $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($res$$Register,$mem$$Address); + __ add($tmp1$$Register, $res$$Register, $add$$constant); + __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, ne); + %} + ins_pipe( long_memory_op ); +%} +#endif + +#ifdef AARCH64 +instruct xaddI_reg(memoryex mem, iRegI add, iRegI res, iRegI tmp1, iRegI tmp2) %{ + match(Set res (GetAndAddI mem add)); + effect(TEMP tmp1, TEMP tmp2, TEMP res); + size(16); + format %{ "loop:\n\t" + "LDXR_w $res, $mem\n\t" + "ADD_w $tmp1, $res, $add\n\t" + "STXR_w $tmp2, $tmp1, $mem\n\t" + "CBNZ_w $tmp2, loop" %} + + ins_encode %{ + Label loop; + Register base = reg_to_register_object($mem$$base); + __ bind(loop); + __ ldxr_w($res$$Register, base); + __ add_w($tmp1$$Register, $res$$Register, $add$$Register); + __ stxr_w($tmp2$$Register, $tmp1$$Register, base); + __ cbnz_w($tmp2$$Register, loop); + %} + ins_pipe( long_memory_op ); +%} +#else +instruct xaddI_reg(memoryex mem, iRegI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ + match(Set res (GetAndAddI mem add)); + effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res); + size(20); + format %{ "loop: \n\t" + "LDREX $res, $mem\n\t" + "ADD $tmp1, $res, $add\n\t" + "STREX $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($res$$Register,$mem$$Address); + __ add($tmp1$$Register, $res$$Register, $add$$Register); + __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, ne); + %} + ins_pipe( long_memory_op ); +%} +#endif + +#ifdef AARCH64 +instruct xaddL_reg_no_res(memoryex mem, iRegL add, Universe dummy, iRegL tmp1, iRegI tmp2) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddL mem add)); + effect(TEMP tmp1, TEMP tmp2); + size(16); + format %{ "loop:\n\t" + "LDXR $tmp1, $mem\n\t" + "ADD $tmp1, $tmp1, $add\n\t" + "STXR $tmp2, $tmp1, $mem\n\t" + "CBNZ_w $tmp2, loop" %} + + ins_encode %{ + Label loop; + Register base = reg_to_register_object($mem$$base); + __ bind(loop); + __ ldxr($tmp1$$Register, base); + __ add($tmp1$$Register, $tmp1$$Register, $add$$Register); + __ stxr($tmp2$$Register, $tmp1$$Register, base); + __ cbnz_w($tmp2$$Register, loop); + %} + ins_pipe( long_memory_op ); +%} +#else +instruct xaddL_reg_no_res(memoryex mem, iRegL add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddL mem add)); + effect( KILL ccr, TEMP tmp1, TEMP tmp2); + size(24); + format %{ "loop: \n\t" + "LDREXD $tmp1, $mem\n\t" + "ADDS $tmp1.lo, $tmp1.lo, $add.lo\n\t" + "ADC $tmp1.hi, $tmp1.hi, $add.hi\n\t" + "STREXD $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrexd($tmp1$$Register, $mem$$Address); + __ adds($tmp1$$Register, $tmp1$$Register, $add$$Register); + __ adc($tmp1$$Register->successor(), $tmp1$$Register->successor(), $add$$Register->successor()); + __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, ne); + %} + ins_pipe( long_memory_op ); +%} +#endif + +#ifdef AARCH64 +instruct xaddL_imm_no_res(memoryex mem, aimmL add, Universe dummy, iRegL tmp1, iRegI tmp2) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddL mem add)); + effect(TEMP tmp1, TEMP tmp2); + size(16); + format %{ "loop:\n\t" + "LDXR $tmp1, $mem\n\t" + "ADD $tmp1, $tmp1, $add\n\t" + "STXR $tmp2, $tmp1, $mem\n\t" + "CBNZ_w $tmp2, loop" %} + + ins_encode %{ + Label loop; + Register base = reg_to_register_object($mem$$base); + __ bind(loop); + __ ldxr($tmp1$$Register, base); + __ add($tmp1$$Register, $tmp1$$Register, $add$$constant); + __ stxr($tmp2$$Register, $tmp1$$Register, base); + __ cbnz_w($tmp2$$Register, loop); + %} + ins_pipe( long_memory_op ); +%} +#else +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct xaddL_immRot_no_res(memoryex mem, immLlowRot add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddL mem add)); + effect( KILL ccr, TEMP tmp1, TEMP tmp2); + size(24); + format %{ "loop: \n\t" + "LDREXD $tmp1, $mem\n\t" + "ADDS $tmp1.lo, $tmp1.lo, $add\n\t" + "ADC $tmp1.hi, $tmp1.hi, 0\n\t" + "STREXD $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrexd($tmp1$$Register, $mem$$Address); + __ adds($tmp1$$Register, $tmp1$$Register, $add$$constant); + __ adc($tmp1$$Register->successor(), $tmp1$$Register->successor(), 0); + __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, ne); + %} + ins_pipe( long_memory_op ); +%} +#endif + +#ifdef AARCH64 +instruct xaddL_reg(memoryex mem, iRegL add, iRegL res, iRegL tmp1, iRegI tmp2) %{ + match(Set res (GetAndAddL mem add)); + effect(TEMP tmp1, TEMP tmp2, TEMP res); + size(16); + format %{ "loop:\n\t" + "LDXR $res, $mem\n\t" + "ADD $tmp1, $res, $add\n\t" + "STXR $tmp2, $tmp1, $mem\n\t" + "CBNZ_w $tmp2, loop" %} + + ins_encode %{ + Label loop; + Register base = reg_to_register_object($mem$$base); + __ bind(loop); + __ ldxr($res$$Register, base); + __ add($tmp1$$Register, $res$$Register, $add$$Register); + __ stxr($tmp2$$Register, $tmp1$$Register, base); + __ cbnz_w($tmp2$$Register, loop); + %} + ins_pipe( long_memory_op ); +%} +#else +instruct xaddL_reg(memoryex mem, iRegL add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{ + match(Set res (GetAndAddL mem add)); + effect( KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res); + size(24); + format %{ "loop: \n\t" + "LDREXD $res, $mem\n\t" + "ADDS $tmp1.lo, $res.lo, $add.lo\n\t" + "ADC $tmp1.hi, $res.hi, $add.hi\n\t" + "STREXD $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrexd($res$$Register, $mem$$Address); + __ adds($tmp1$$Register, $res$$Register, $add$$Register); + __ adc($tmp1$$Register->successor(), $res$$Register->successor(), $add$$Register->successor()); + __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, ne); + %} + ins_pipe( long_memory_op ); +%} +#endif + +#ifdef AARCH64 +instruct xaddL_imm(memoryex mem, aimmL add, iRegL res, iRegL tmp1, iRegI tmp2) %{ + match(Set res (GetAndAddL mem add)); + effect(TEMP tmp1, TEMP tmp2, TEMP res); + size(16); + format %{ "loop:\n\t" + "LDXR $res, $mem\n\t" + "ADD $tmp1, $res, $add\n\t" + "STXR $tmp2, $tmp1, $mem\n\t" + "CBNZ_w $tmp2, loop" %} + + ins_encode %{ + Label loop; + Register base = reg_to_register_object($mem$$base); + __ bind(loop); + __ ldxr($res$$Register, base); + __ add($tmp1$$Register, $res$$Register, $add$$constant); + __ stxr($tmp2$$Register, $tmp1$$Register, base); + __ cbnz_w($tmp2$$Register, loop); + %} + ins_pipe( long_memory_op ); +%} +#else +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct xaddL_immRot(memoryex mem, immLlowRot add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{ + match(Set res (GetAndAddL mem add)); + effect( KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res); + size(24); + format %{ "loop: \n\t" + "LDREXD $res, $mem\n\t" + "ADDS $tmp1.lo, $res.lo, $add\n\t" + "ADC $tmp1.hi, $res.hi, 0\n\t" + "STREXD $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrexd($res$$Register, $mem$$Address); + __ adds($tmp1$$Register, $res$$Register, $add$$constant); + __ adc($tmp1$$Register->successor(), $res$$Register->successor(), 0); + __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, ne); + %} + ins_pipe( long_memory_op ); +%} +#endif + +#ifdef AARCH64 +instruct xchgI(memoryex mem, iRegI newval, iRegI res, iRegI tmp) %{ + match(Set res (GetAndSetI mem newval)); + effect(TEMP tmp, TEMP res); + size(12); + format %{ "loop:\n\t" + "LDXR_w $res, $mem\n\t" + "STXR_w $tmp, $newval, $mem\n\t" + "CBNZ_w $tmp, loop" %} + + ins_encode %{ + Label loop; + Register base = reg_to_register_object($mem$$base); + __ bind(loop); + __ ldxr_w($res$$Register, base); + __ stxr_w($tmp$$Register, $newval$$Register, base); + __ cbnz_w($tmp$$Register, loop); + %} + ins_pipe( long_memory_op ); +%} + +#ifdef XXX +// Disabled until 8051805 is fixed. +instruct xchgN(memoryex mem, iRegN newval, iRegN res, iRegN tmp) %{ + match(Set res (GetAndSetN mem newval)); + effect(TEMP tmp, TEMP res); + size(12); + format %{ "loop:\n\t" + "LDXR_w $res, $mem\n\t" + "STXR_w $tmp, $newval, $mem\n\t" + "CBNZ_w $tmp, loop" %} + + ins_encode %{ + Label loop; + Register base = reg_to_register_object($mem$$base); + __ bind(loop); + __ ldxr_w($res$$Register, base); + __ stxr_w($tmp$$Register, $newval$$Register, base); + __ cbnz_w($tmp$$Register, loop); + %} + ins_pipe( long_memory_op ); +%} +#endif +#else +instruct xchgI(memoryex mem, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr) %{ + match(Set res (GetAndSetI mem newval)); + effect(KILL ccr, TEMP tmp, TEMP res); + size(16); + format %{ "loop: \n\t" + "LDREX $res, $mem\n\t" + "STREX $tmp, $newval, $mem\n\t" + "CMP $tmp, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($res$$Register,$mem$$Address); + __ strex($tmp$$Register, $newval$$Register, $mem$$Address); + __ cmp($tmp$$Register, 0); + __ b(loop, ne); + %} + ins_pipe( long_memory_op ); +%} +#endif + +#ifdef AARCH64 +instruct xchgL(memoryex mem, iRegL newval, iRegL res, iRegI tmp) %{ + match(Set res (GetAndSetL mem newval)); + effect(TEMP tmp, TEMP res); + size(12); + format %{ "loop:\n\t" + "LDXR $res, $mem\n\t" + "STXR $tmp, $newval, $mem\n\t" + "CBNZ_w $tmp, loop" %} + + ins_encode %{ + Label loop; + Register base = reg_to_register_object($mem$$base); + __ bind(loop); + __ ldxr($res$$Register, base); + __ stxr($tmp$$Register, $newval$$Register, base); + __ cbnz_w($tmp$$Register, loop); + %} + ins_pipe( long_memory_op ); +%} +#else +instruct xchgL(memoryex mem, iRegLd newval, iRegLd res, iRegI tmp, flagsReg ccr) %{ + match(Set res (GetAndSetL mem newval)); + effect( KILL ccr, TEMP tmp, TEMP res); + size(16); + format %{ "loop: \n\t" + "LDREXD $res, $mem\n\t" + "STREXD $tmp, $newval, $mem\n\t" + "CMP $tmp, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrexd($res$$Register, $mem$$Address); + __ strexd($tmp$$Register, $newval$$Register, $mem$$Address); + __ cmp($tmp$$Register, 0); + __ b(loop, ne); + %} + ins_pipe( long_memory_op ); +%} +#endif // !AARCH64 + +#ifdef AARCH64 +instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp) %{ + match(Set res (GetAndSetP mem newval)); + effect(TEMP tmp, TEMP res); + size(12); + format %{ "loop:\n\t" + "LDREX $res, $mem\n\t" + "STREX $tmp, $newval, $mem\n\t" + "CBNZ_w $tmp, loop" %} + + ins_encode %{ + Label loop; + Register base = reg_to_register_object($mem$$base); + __ bind(loop); + __ ldrex($res$$Register, base); + __ strex($tmp$$Register, $newval$$Register, base); + __ cbnz_w($tmp$$Register, loop); + %} + ins_pipe( long_memory_op ); +%} +#else +instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp, flagsReg ccr) %{ + match(Set res (GetAndSetP mem newval)); + effect(KILL ccr, TEMP tmp, TEMP res); + size(16); + format %{ "loop: \n\t" + "LDREX $res, $mem\n\t" + "STREX $tmp, $newval, $mem\n\t" + "CMP $tmp, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($res$$Register,$mem$$Address); + __ strex($tmp$$Register, $newval$$Register, $mem$$Address); + __ cmp($tmp$$Register, 0); + __ b(loop, ne); + %} + ins_pipe( long_memory_op ); +%} +#endif // !AARCH64 + +//--------------------- +// Subtraction Instructions +// Register Subtraction +instruct subI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (SubI src1 src2)); + + size(4); + format %{ "sub_32 $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ sub_32($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct subshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI src1 (LShiftI src2 src3))); + + size(4); + format %{ "SUB $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct subshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (SubI src1 (LShiftI src2 src3))); + + size(4); + format %{ "sub_32 $dst,$src1,$src2<<$src3\t! int" %} + ins_encode %{ + __ sub_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct subsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI src1 (RShiftI src2 src3))); + + size(4); + format %{ "SUB $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct subsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (SubI src1 (RShiftI src2 src3))); + + size(4); + format %{ "sub_32 $dst,$src1,$src2>>$src3\t! int" %} + ins_encode %{ + __ sub_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct subshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI src1 (URShiftI src2 src3))); + + size(4); + format %{ "SUB $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct subshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (SubI src1 (URShiftI src2 src3))); + + size(4); + format %{ "sub_32 $dst,$src1,$src2>>>$src3\t! int" %} + ins_encode %{ + __ sub_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct rsbshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI (LShiftI src1 src2) src3)); + + size(4); + format %{ "RSB $dst,$src3,$src1<<$src2" %} + ins_encode %{ + __ rsb($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsl, $src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rsbshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ + match(Set dst (SubI (LShiftI src1 src2) src3)); + + size(4); + format %{ "RSB $dst,$src3,$src1<<$src2" %} + ins_encode %{ + __ rsb($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsl, $src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rsbsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI (RShiftI src1 src2) src3)); + + size(4); + format %{ "RSB $dst,$src3,$src1>>$src2" %} + ins_encode %{ + __ rsb($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, asr, $src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rsbsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ + match(Set dst (SubI (RShiftI src1 src2) src3)); + + size(4); + format %{ "RSB $dst,$src3,$src1>>$src2" %} + ins_encode %{ + __ rsb($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, asr, $src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rsbshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI (URShiftI src1 src2) src3)); + + size(4); + format %{ "RSB $dst,$src3,$src1>>>$src2" %} + ins_encode %{ + __ rsb($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsr, $src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rsbshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ + match(Set dst (SubI (URShiftI src1 src2) src3)); + + size(4); + format %{ "RSB $dst,$src3,$src1>>>$src2" %} + ins_encode %{ + __ rsb($dst$$Register, $src3$$Register, AsmOperand($src1$$Register, lsr, $src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +// Immediate Subtraction +instruct subI_reg_aimmI(iRegI dst, iRegI src1, aimmI src2) %{ + match(Set dst (SubI src1 src2)); + + size(4); + format %{ "sub_32 $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ sub_32($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +instruct subI_reg_immRotneg(iRegI dst, iRegI src1, aimmIneg src2) %{ + match(Set dst (AddI src1 src2)); + + size(4); + format %{ "sub_32 $dst,$src1,-($src2)\t! int" %} + ins_encode %{ + __ sub_32($dst$$Register, $src1$$Register, -$src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +#ifndef AARCH64 +instruct subI_immRot_reg(iRegI dst, immIRot src1, iRegI src2) %{ + match(Set dst (SubI src1 src2)); + + size(4); + format %{ "RSB $dst,$src2,src1" %} + ins_encode %{ + __ rsb($dst$$Register, $src2$$Register, $src1$$constant); + %} + ins_pipe(ialu_zero_reg); +%} +#endif + +// Register Subtraction +#ifdef AARCH64 +instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (SubL src1 src2)); + + size(4); + format %{ "SUB $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} +#else +instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg icc ) %{ + match(Set dst (SubL src1 src2)); + effect (KILL icc); + + size(8); + format %{ "SUBS $dst.lo,$src1.lo,$src2.lo\t! long\n\t" + "SBC $dst.hi,$src1.hi,$src2.hi" %} + ins_encode %{ + __ subs($dst$$Register, $src1$$Register, $src2$$Register); + __ sbc($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +#ifdef AARCH64 +// Immediate Subtraction +instruct subL_reg_aimm(iRegL dst, iRegL src1, aimmL src2) %{ + match(Set dst (SubL src1 src2)); + + size(4); + format %{ "SUB $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +instruct subL_reg_immLneg(iRegL dst, iRegL src1, aimmLneg src2) %{ + match(Set dst (AddL src1 src2)); + + size(4); + format %{ "SUB $dst,$src1,-($src2)\t! long" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, -$src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} +#else +// TODO +#endif + +#ifndef AARCH64 +// Immediate Subtraction +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct subL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg icc) %{ + match(Set dst (SubL src1 con)); + effect (KILL icc); + + size(8); + format %{ "SUB $dst.lo,$src1.lo,$con\t! long\n\t" + "SBC $dst.hi,$src1.hi,0" %} + ins_encode %{ + __ subs($dst$$Register, $src1$$Register, $con$$constant); + __ sbc($dst$$Register->successor(), $src1$$Register->successor(), 0); + %} + ins_pipe(ialu_reg_imm); +%} + +// Long negation +instruct negL_reg_reg(iRegL dst, immL0 zero, iRegL src2, flagsReg icc) %{ + match(Set dst (SubL zero src2)); + effect (KILL icc); + + size(8); + format %{ "RSBS $dst.lo,$src2.lo,0\t! long\n\t" + "RSC $dst.hi,$src2.hi,0" %} + ins_encode %{ + __ rsbs($dst$$Register, $src2$$Register, 0); + __ rsc($dst$$Register->successor(), $src2$$Register->successor(), 0); + %} + ins_pipe(ialu_zero_reg); +%} +#endif // !AARCH64 + +// Multiplication Instructions +// Integer Multiplication +// Register Multiplication +instruct mulI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (MulI src1 src2)); + + size(4); + format %{ "mul_32 $dst,$src1,$src2" %} + ins_encode %{ + __ mul_32($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(imul_reg_reg); +%} + +#ifdef AARCH64 +instruct mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (MulL src1 src2)); + size(4); + format %{ "MUL $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ mul($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(imul_reg_reg); +%} +#else +instruct mulL_lo1_hi2(iRegL dst, iRegL src1, iRegL src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "MUL $dst.hi,$src1.lo,$src2.hi\t! long" %} + ins_encode %{ + __ mul($dst$$Register->successor(), $src1$$Register, $src2$$Register->successor()); + %} + ins_pipe(imul_reg_reg); +%} + +instruct mulL_hi1_lo2(iRegL dst, iRegL src1, iRegL src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(8); + format %{ "MLA $dst.hi,$src1.hi,$src2.lo,$dst.hi\t! long\n\t" + "MOV $dst.lo, 0"%} + ins_encode %{ + __ mla($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register, $dst$$Register->successor()); + __ mov($dst$$Register, 0); + %} + ins_pipe(imul_reg_reg); +%} + +instruct mulL_lo1_lo2(iRegL dst, iRegL src1, iRegL src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{ "UMLAL $dst.lo,$dst.hi,$src1,$src2\t! long" %} + ins_encode %{ + __ umlal($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register); + %} + ins_pipe(imul_reg_reg); +%} + +instruct mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (MulL src1 src2)); + + expand %{ + mulL_lo1_hi2(dst, src1, src2); + mulL_hi1_lo2(dst, src1, src2); + mulL_lo1_lo2(dst, src1, src2); + %} +%} +#endif // !AARCH64 + +// Integer Division +// Register Division +#ifdef AARCH64 +instruct divI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (DivI src1 src2)); + + size(4); + format %{ "SDIV $dst,$src1,$src2\t! 32-bit" %} + ins_encode %{ + __ sdiv_w($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); // FIXME +%} +#else +instruct divI_reg_reg(R1RegI dst, R0RegI src1, R2RegI src2, LRRegP lr, flagsReg ccr) %{ + match(Set dst (DivI src1 src2)); + effect( KILL ccr, KILL src1, KILL src2, KILL lr); + ins_cost((2+71)*DEFAULT_COST); + + format %{ "DIV $dst,$src1,$src2 ! call to StubRoutines::Arm::idiv_irem_entry()" %} + ins_encode %{ + __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::runtime_call_type); + %} + ins_pipe(sdiv_reg_reg); +%} +#endif + +// Register Long Division +#ifdef AARCH64 +instruct divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (DivL src1 src2)); + + size(4); + format %{ "SDIV $dst,$src1,$src2" %} + ins_encode %{ + __ sdiv($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); // FIXME +%} +#else +instruct divL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{ + match(Set dst (DivL src1 src2)); + effect(CALL); + ins_cost(DEFAULT_COST*71); + format %{ "DIVL $src1,$src2,$dst\t! long ! call to SharedRuntime::ldiv" %} + ins_encode %{ + address target = CAST_FROM_FN_PTR(address, SharedRuntime::ldiv); + __ call(target, relocInfo::runtime_call_type); + %} + ins_pipe(divL_reg_reg); +%} +#endif + +// Integer Remainder +// Register Remainder +#ifdef AARCH64 +#ifdef TODO +instruct msubI_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI src1 (MulI src2 src3))); + + size(4); + format %{ "MSUB $dst,$src2,$src3,$src1\t! 32-bit\n\t" %} + ins_encode %{ + __ msub_w($dst$$Register, $src2$$Register, $src3$$Register, $src1$$Register); + %} + ins_pipe(ialu_reg_reg); // FIXME +%} +#endif + +instruct modI_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI temp) %{ + match(Set dst (ModI src1 src2)); + effect(TEMP temp); + + size(8); + format %{ "SDIV $temp,$src1,$src2\t! 32-bit\n\t" + "MSUB $dst,$src2,$temp,$src1\t! 32-bit\n\t" %} + ins_encode %{ + __ sdiv_w($temp$$Register, $src1$$Register, $src2$$Register); + __ msub_w($dst$$Register, $src2$$Register, $temp$$Register, $src1$$Register); + %} + ins_pipe(ialu_reg_reg); // FIXME +%} +#else +instruct modI_reg_reg(R0RegI dst, R0RegI src1, R2RegI src2, R1RegI temp, LRRegP lr, flagsReg ccr ) %{ + match(Set dst (ModI src1 src2)); + effect( KILL ccr, KILL temp, KILL src2, KILL lr); + + format %{ "MODI $dst,$src1,$src2\t ! call to StubRoutines::Arm::idiv_irem_entry" %} + ins_encode %{ + __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::runtime_call_type); + %} + ins_pipe(sdiv_reg_reg); +%} +#endif + +// Register Long Remainder +#ifdef AARCH64 +instruct modL_reg_reg(iRegL dst, iRegL src1, iRegL src2, iRegL temp) %{ + match(Set dst (ModL src1 src2)); + effect(TEMP temp); + + size(8); + format %{ "SDIV $temp,$src1,$src2\n\t" + "MSUB $dst,$src2,$temp,$src1" %} + ins_encode %{ + __ sdiv($temp$$Register, $src1$$Register, $src2$$Register); + __ msub($dst$$Register, $src2$$Register, $temp$$Register, $src1$$Register); + %} + ins_pipe(ialu_reg_reg); // FIXME +%} +#else +instruct modL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{ + match(Set dst (ModL src1 src2)); + effect(CALL); + ins_cost(MEMORY_REF_COST); // FIXME + format %{ "modL $dst,$src1,$src2\t ! call to SharedRuntime::lrem" %} + ins_encode %{ + address target = CAST_FROM_FN_PTR(address, SharedRuntime::lrem); + __ call(target, relocInfo::runtime_call_type); + %} + ins_pipe(divL_reg_reg); +%} +#endif + +// Integer Shift Instructions + +// Register Shift Left +instruct shlI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (LShiftI src1 src2)); + + size(4); +#ifdef AARCH64 + format %{ "LSLV $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ lslv_w($dst$$Register, $src1$$Register, $src2$$Register); + %} +#else + format %{ "LSL $dst,$src1,$src2 \n\t" %} + ins_encode %{ + __ mov($dst$$Register, AsmOperand($src1$$Register, lsl, $src2$$Register)); + %} +#endif + ins_pipe(ialu_reg_reg); +%} + +// Register Shift Left Immediate +instruct shlI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{ + match(Set dst (LShiftI src1 src2)); + + size(4); +#ifdef AARCH64 + format %{ "LSL_w $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ _lsl($dst$$Register, $src1$$Register, $src2$$constant); + %} +#else + format %{ "LSL $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ logical_shift_left($dst$$Register, $src1$$Register, $src2$$constant); + %} +#endif + ins_pipe(ialu_reg_imm); +%} + +#ifndef AARCH64 +instruct shlL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{"OR $dst.hi,$dst.hi,($src1.hi << $src2)" %} + ins_encode %{ + __ orr($dst$$Register->successor(), $dst$$Register->successor(), AsmOperand($src1$$Register->successor(), lsl, $src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct shlL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{ "LSL $dst.lo,$src1.lo,$src2 \n\t" %} + ins_encode %{ + __ mov($dst$$Register, AsmOperand($src1$$Register, lsl, $src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct shlL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{ + effect(DEF dst, USE src1, USE src2, KILL ccr); + size(16); + format %{ "SUBS $dst.hi,$src2,32 \n\t" + "LSLpl $dst.hi,$src1.lo,$dst.hi \n\t" + "RSBmi $dst.hi,$dst.hi,0 \n\t" + "LSRmi $dst.hi,$src1.lo,$dst.hi" %} + + ins_encode %{ + // $src1$$Register and $dst$$Register->successor() can't be the same + __ subs($dst$$Register->successor(), $src2$$Register, 32); + __ mov($dst$$Register->successor(), AsmOperand($src1$$Register, lsl, $dst$$Register->successor()), pl); + __ rsb($dst$$Register->successor(), $dst$$Register->successor(), 0, mi); + __ mov($dst$$Register->successor(), AsmOperand($src1$$Register, lsr, $dst$$Register->successor()), mi); + %} + ins_pipe(ialu_reg_reg); +%} +#endif // !AARCH64 + +instruct shlL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ + match(Set dst (LShiftL src1 src2)); + +#ifdef AARCH64 + size(4); + format %{ "LSLV $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ lslv($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +#else + expand %{ + flagsReg ccr; + shlL_reg_reg_overlap(dst, src1, src2, ccr); + shlL_reg_reg_merge_hi(dst, src1, src2); + shlL_reg_reg_merge_lo(dst, src1, src2); + %} +#endif +%} + +#ifdef AARCH64 +instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{ + match(Set dst (LShiftL src1 src2)); + + size(4); + format %{ "LSL $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ logical_shift_left($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} +#else +// Register Shift Left Immediate +instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{ + match(Set dst (LShiftL src1 src2)); + + size(8); + format %{ "LSL $dst.hi,$src1.lo,$src2-32\t! or mov if $src2==32\n\t" + "MOV $dst.lo, 0" %} + ins_encode %{ + if ($src2$$constant == 32) { + __ mov($dst$$Register->successor(), $src1$$Register); + } else { + __ mov($dst$$Register->successor(), AsmOperand($src1$$Register, lsl, $src2$$constant-32)); + } + __ mov($dst$$Register, 0); + %} + ins_pipe(ialu_reg_imm); +%} + +instruct shlL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{ + match(Set dst (LShiftL src1 src2)); + + size(12); + format %{ "LSL $dst.hi,$src1.lo,$src2\n\t" + "OR $dst.hi, $dst.hi, $src1.lo >> 32-$src2\n\t" + "LSL $dst.lo,$src1.lo,$src2" %} + ins_encode %{ + // The order of the following 3 instructions matters: src1.lo and + // dst.hi can't overlap but src.hi and dst.hi can. + __ mov($dst$$Register->successor(), AsmOperand($src1$$Register->successor(), lsl, $src2$$constant)); + __ orr($dst$$Register->successor(), $dst$$Register->successor(), AsmOperand($src1$$Register, lsr, 32-$src2$$constant)); + __ mov($dst$$Register, AsmOperand($src1$$Register, lsl, $src2$$constant)); + %} + ins_pipe(ialu_reg_imm); +%} +#endif // !AARCH64 + +// Register Arithmetic Shift Right +instruct sarI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (RShiftI src1 src2)); + size(4); +#ifdef AARCH64 + format %{ "ASRV $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ asrv_w($dst$$Register, $src1$$Register, $src2$$Register); + %} +#else + format %{ "ASR $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ mov($dst$$Register, AsmOperand($src1$$Register, asr, $src2$$Register)); + %} +#endif + ins_pipe(ialu_reg_reg); +%} + +// Register Arithmetic Shift Right Immediate +instruct sarI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{ + match(Set dst (RShiftI src1 src2)); + + size(4); +#ifdef AARCH64 + format %{ "ASR_w $dst,$src1,$src2" %} + ins_encode %{ + __ _asr_w($dst$$Register, $src1$$Register, $src2$$constant); + %} +#else + format %{ "ASR $dst,$src1,$src2" %} + ins_encode %{ + __ mov($dst$$Register, AsmOperand($src1$$Register, asr, $src2$$constant)); + %} +#endif + ins_pipe(ialu_reg_imm); +%} + +#ifndef AARCH64 +// Register Shift Right Arithmetic Long +instruct sarL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{ "OR $dst.lo,$dst.lo,($src1.lo >> $src2)" %} + ins_encode %{ + __ orr($dst$$Register, $dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct sarL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{ "ASR $dst.hi,$src1.hi,$src2 \n\t" %} + ins_encode %{ + __ mov($dst$$Register->successor(), AsmOperand($src1$$Register->successor(), asr, $src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct sarL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{ + effect(DEF dst, USE src1, USE src2, KILL ccr); + size(16); + format %{ "SUBS $dst.lo,$src2,32 \n\t" + "ASRpl $dst.lo,$src1.hi,$dst.lo \n\t" + "RSBmi $dst.lo,$dst.lo,0 \n\t" + "LSLmi $dst.lo,$src1.hi,$dst.lo" %} + + ins_encode %{ + // $src1$$Register->successor() and $dst$$Register can't be the same + __ subs($dst$$Register, $src2$$Register, 32); + __ mov($dst$$Register, AsmOperand($src1$$Register->successor(), asr, $dst$$Register), pl); + __ rsb($dst$$Register, $dst$$Register, 0, mi); + __ mov($dst$$Register, AsmOperand($src1$$Register->successor(), lsl, $dst$$Register), mi); + %} + ins_pipe(ialu_reg_reg); +%} +#endif // !AARCH64 + +instruct sarL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ + match(Set dst (RShiftL src1 src2)); + +#ifdef AARCH64 + size(4); + format %{ "ASRV $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ asrv($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +#else + expand %{ + flagsReg ccr; + sarL_reg_reg_overlap(dst, src1, src2, ccr); + sarL_reg_reg_merge_lo(dst, src1, src2); + sarL_reg_reg_merge_hi(dst, src1, src2); + %} +#endif +%} + +// Register Shift Left Immediate +#ifdef AARCH64 +instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{ + match(Set dst (RShiftL src1 src2)); + + size(4); + format %{ "ASR $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ _asr($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} +#else +instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{ + match(Set dst (RShiftL src1 src2)); + + size(8); + format %{ "ASR $dst.lo,$src1.hi,$src2-32\t! or mov if $src2==32\n\t" + "ASR $dst.hi,$src1.hi, $src2" %} + ins_encode %{ + if ($src2$$constant == 32) { + __ mov($dst$$Register, $src1$$Register->successor()); + } else{ + __ mov($dst$$Register, AsmOperand($src1$$Register->successor(), asr, $src2$$constant-32)); + } + __ mov($dst$$Register->successor(), AsmOperand($src1$$Register->successor(), asr, 0)); + %} + + ins_pipe(ialu_reg_imm); +%} + +instruct sarL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{ + match(Set dst (RShiftL src1 src2)); + size(12); + format %{ "LSR $dst.lo,$src1.lo,$src2\n\t" + "OR $dst.lo, $dst.lo, $src1.hi << 32-$src2\n\t" + "ASR $dst.hi,$src1.hi,$src2" %} + ins_encode %{ + // The order of the following 3 instructions matters: src1.lo and + // dst.hi can't overlap but src.hi and dst.hi can. + __ mov($dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$constant)); + __ orr($dst$$Register, $dst$$Register, AsmOperand($src1$$Register->successor(), lsl, 32-$src2$$constant)); + __ mov($dst$$Register->successor(), AsmOperand($src1$$Register->successor(), asr, $src2$$constant)); + %} + ins_pipe(ialu_reg_imm); +%} +#endif + +// Register Shift Right +instruct shrI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (URShiftI src1 src2)); + size(4); +#ifdef AARCH64 + format %{ "LSRV $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ lsrv_w($dst$$Register, $src1$$Register, $src2$$Register); + %} +#else + format %{ "LSR $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ mov($dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$Register)); + %} +#endif + ins_pipe(ialu_reg_reg); +%} + +// Register Shift Right Immediate +instruct shrI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{ + match(Set dst (URShiftI src1 src2)); + + size(4); +#ifdef AARCH64 + format %{ "LSR_w $dst,$src1,$src2" %} + ins_encode %{ + __ _lsr_w($dst$$Register, $src1$$Register, $src2$$constant); + %} +#else + format %{ "LSR $dst,$src1,$src2" %} + ins_encode %{ + __ mov($dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$constant)); + %} +#endif + ins_pipe(ialu_reg_imm); +%} + +#ifndef AARCH64 +// Register Shift Right +instruct shrL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{ "OR $dst.lo,$dst,($src1.lo >>> $src2)" %} + ins_encode %{ + __ orr($dst$$Register, $dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct shrL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{ "LSR $dst.hi,$src1.hi,$src2 \n\t" %} + ins_encode %{ + __ mov($dst$$Register->successor(), AsmOperand($src1$$Register->successor(), lsr, $src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct shrL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{ + effect(DEF dst, USE src1, USE src2, KILL ccr); + size(16); + format %{ "SUBS $dst,$src2,32 \n\t" + "LSRpl $dst,$src1.hi,$dst \n\t" + "RSBmi $dst,$dst,0 \n\t" + "LSLmi $dst,$src1.hi,$dst" %} + + ins_encode %{ + // $src1$$Register->successor() and $dst$$Register can't be the same + __ subs($dst$$Register, $src2$$Register, 32); + __ mov($dst$$Register, AsmOperand($src1$$Register->successor(), lsr, $dst$$Register), pl); + __ rsb($dst$$Register, $dst$$Register, 0, mi); + __ mov($dst$$Register, AsmOperand($src1$$Register->successor(), lsl, $dst$$Register), mi); + %} + ins_pipe(ialu_reg_reg); +%} +#endif // !AARCH64 + +instruct shrL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ + match(Set dst (URShiftL src1 src2)); + +#ifdef AARCH64 + size(4); + format %{ "LSRV $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ lsrv($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +#else + expand %{ + flagsReg ccr; + shrL_reg_reg_overlap(dst, src1, src2, ccr); + shrL_reg_reg_merge_lo(dst, src1, src2); + shrL_reg_reg_merge_hi(dst, src1, src2); + %} +#endif +%} + +// Register Shift Right Immediate +#ifdef AARCH64 +instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{ + match(Set dst (URShiftL src1 src2)); + + size(4); + format %{ "LSR $dst,$src1,$src2" %} + ins_encode %{ + __ _lsr($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} +#else +instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{ + match(Set dst (URShiftL src1 src2)); + + size(8); + format %{ "LSR $dst.lo,$src1.hi,$src2-32\t! or mov if $src2==32\n\t" + "MOV $dst.hi, 0" %} + ins_encode %{ + if ($src2$$constant == 32) { + __ mov($dst$$Register, $src1$$Register->successor()); + } else { + __ mov($dst$$Register, AsmOperand($src1$$Register->successor(), lsr, $src2$$constant-32)); + } + __ mov($dst$$Register->successor(), 0); + %} + + ins_pipe(ialu_reg_imm); +%} + +instruct shrL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{ + match(Set dst (URShiftL src1 src2)); + + size(12); + format %{ "LSR $dst.lo,$src1.lo,$src2\n\t" + "OR $dst.lo, $dst.lo, $src1.hi << 32-$src2\n\t" + "LSR $dst.hi,$src1.hi,$src2" %} + ins_encode %{ + // The order of the following 3 instructions matters: src1.lo and + // dst.hi can't overlap but src.hi and dst.hi can. + __ mov($dst$$Register, AsmOperand($src1$$Register, lsr, $src2$$constant)); + __ orr($dst$$Register, $dst$$Register, AsmOperand($src1$$Register->successor(), lsl, 32-$src2$$constant)); + __ mov($dst$$Register->successor(), AsmOperand($src1$$Register->successor(), lsr, $src2$$constant)); + %} + ins_pipe(ialu_reg_imm); +%} +#endif // !AARCH64 + + +instruct shrP_reg_imm5(iRegX dst, iRegP src1, immU5 src2) %{ + match(Set dst (URShiftI (CastP2X src1) src2)); + size(4); + format %{ "LSR $dst,$src1,$src2\t! Cast ptr $src1 to int and shift" %} + ins_encode %{ + __ logical_shift_right($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +//----------Floating Point Arithmetic Instructions----------------------------- + +// Add float single precision +instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (AddF src1 src2)); + + size(4); + format %{ "FADDS $dst,$src1,$src2" %} + ins_encode %{ + __ add_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + + ins_pipe(faddF_reg_reg); +%} + +// Add float double precision +instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (AddD src1 src2)); + + size(4); + format %{ "FADDD $dst,$src1,$src2" %} + ins_encode %{ + __ add_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + + ins_pipe(faddD_reg_reg); +%} + +// Sub float single precision +instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (SubF src1 src2)); + + size(4); + format %{ "FSUBS $dst,$src1,$src2" %} + ins_encode %{ + __ sub_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(faddF_reg_reg); +%} + +// Sub float double precision +instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (SubD src1 src2)); + + size(4); + format %{ "FSUBD $dst,$src1,$src2" %} + ins_encode %{ + __ sub_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(faddD_reg_reg); +%} + +// Mul float single precision +instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (MulF src1 src2)); + + size(4); + format %{ "FMULS $dst,$src1,$src2" %} + ins_encode %{ + __ mul_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + + ins_pipe(fmulF_reg_reg); +%} + +// Mul float double precision +instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (MulD src1 src2)); + + size(4); + format %{ "FMULD $dst,$src1,$src2" %} + ins_encode %{ + __ mul_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + + ins_pipe(fmulD_reg_reg); +%} + +// Div float single precision +instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (DivF src1 src2)); + + size(4); + format %{ "FDIVS $dst,$src1,$src2" %} + ins_encode %{ + __ div_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + + ins_pipe(fdivF_reg_reg); +%} + +// Div float double precision +instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (DivD src1 src2)); + + size(4); + format %{ "FDIVD $dst,$src1,$src2" %} + ins_encode %{ + __ div_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + + ins_pipe(fdivD_reg_reg); +%} + +// Absolute float double precision +instruct absD_reg(regD dst, regD src) %{ + match(Set dst (AbsD src)); + + size(4); + format %{ "FABSd $dst,$src" %} + ins_encode %{ + __ abs_double($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(faddD_reg); +%} + +// Absolute float single precision +instruct absF_reg(regF dst, regF src) %{ + match(Set dst (AbsF src)); + format %{ "FABSs $dst,$src" %} + ins_encode %{ + __ abs_float($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(faddF_reg); +%} + +instruct negF_reg(regF dst, regF src) %{ + match(Set dst (NegF src)); + + size(4); + format %{ "FNEGs $dst,$src" %} + ins_encode %{ + __ neg_float($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(faddF_reg); +%} + +instruct negD_reg(regD dst, regD src) %{ + match(Set dst (NegD src)); + + format %{ "FNEGd $dst,$src" %} + ins_encode %{ + __ neg_double($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(faddD_reg); +%} + +// Sqrt float double precision +instruct sqrtF_reg_reg(regF dst, regF src) %{ + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + + size(4); + format %{ "FSQRTS $dst,$src" %} + ins_encode %{ + __ sqrt_float($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(fdivF_reg_reg); +%} + +// Sqrt float double precision +instruct sqrtD_reg_reg(regD dst, regD src) %{ + match(Set dst (SqrtD src)); + + size(4); + format %{ "FSQRTD $dst,$src" %} + ins_encode %{ + __ sqrt_double($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(fdivD_reg_reg); +%} + +//----------Logical Instructions----------------------------------------------- +// And Instructions +// Register And +instruct andI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (AndI src1 src2)); + + size(4); + format %{ "and_32 $dst,$src1,$src2" %} + ins_encode %{ + __ and_32($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct andshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (AndI src1 (LShiftI src2 src3))); + + size(4); + format %{ "AND $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct andshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (AndI src1 (LShiftI src2 src3))); + + size(4); + format %{ "and_32 $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ and_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct andsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (AndI src1 (RShiftI src2 src3))); + + size(4); + format %{ "AND $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct andsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (AndI src1 (RShiftI src2 src3))); + + size(4); + format %{ "and_32 $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ and_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct andshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (AndI src1 (URShiftI src2 src3))); + + size(4); + format %{ "AND $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct andshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (AndI src1 (URShiftI src2 src3))); + + size(4); + format %{ "and_32 $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ and_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Immediate And +instruct andI_reg_limm(iRegI dst, iRegI src1, limmI src2) %{ + match(Set dst (AndI src1 src2)); + + size(4); + format %{ "and_32 $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ and_32($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +#ifndef AARCH64 +instruct andI_reg_limmn(iRegI dst, iRegI src1, limmIn src2) %{ + match(Set dst (AndI src1 src2)); + + size(4); + format %{ "bic $dst,$src1,~$src2\t! int" %} + ins_encode %{ + __ bic($dst$$Register, $src1$$Register, ~$src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} +#endif + +// Register And Long +instruct andL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (AndL src1 src2)); + + ins_cost(DEFAULT_COST); +#ifdef AARCH64 + size(4); + format %{ "AND $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $src2$$Register); + %} +#else + size(8); + format %{ "AND $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $src2$$Register); + __ andr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); + %} +#endif + ins_pipe(ialu_reg_reg); +%} + +#ifdef AARCH64 +// Immediate And +instruct andL_reg_limm(iRegL dst, iRegL src1, limmL src2) %{ + match(Set dst (AndL src1 src2)); + + size(4); + format %{ "AND $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, (uintx)$src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} +#else +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct andL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{ + match(Set dst (AndL src1 con)); + ins_cost(DEFAULT_COST); + size(8); + format %{ "AND $dst,$src1,$con\t! long" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $con$$constant); + __ andr($dst$$Register->successor(), $src1$$Register->successor(), 0); + %} + ins_pipe(ialu_reg_imm); +%} +#endif + +// Or Instructions +// Register Or +instruct orI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (OrI src1 src2)); + + size(4); + format %{ "orr_32 $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ orr_32($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct orshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (OrI src1 (LShiftI src2 src3))); + + size(4); + format %{ "OR $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct orshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (OrI src1 (LShiftI src2 src3))); + + size(4); + format %{ "orr_32 $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ orr_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct orsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (OrI src1 (RShiftI src2 src3))); + + size(4); + format %{ "OR $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct orsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (OrI src1 (RShiftI src2 src3))); + + size(4); + format %{ "orr_32 $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ orr_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct orshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (OrI src1 (URShiftI src2 src3))); + + size(4); + format %{ "OR $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct orshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (OrI src1 (URShiftI src2 src3))); + + size(4); + format %{ "orr_32 $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ orr_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Immediate Or +instruct orI_reg_limm(iRegI dst, iRegI src1, limmI src2) %{ + match(Set dst (OrI src1 src2)); + + size(4); + format %{ "orr_32 $dst,$src1,$src2" %} + ins_encode %{ + __ orr_32($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} +// TODO: orn_32 with limmIn + +// Register Or Long +instruct orL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (OrL src1 src2)); + + ins_cost(DEFAULT_COST); +#ifdef AARCH64 + size(4); + format %{ "OR $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$Register); + %} +#else + size(8); + format %{ "OR $dst.lo,$src1.lo,$src2.lo\t! long\n\t" + "OR $dst.hi,$src1.hi,$src2.hi" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$Register); + __ orr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); + %} +#endif + ins_pipe(ialu_reg_reg); +%} + +#ifdef AARCH64 +instruct orL_reg_limm(iRegL dst, iRegL src1, limmL src2) %{ + match(Set dst (OrL src1 src2)); + + size(4); + format %{ "ORR $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, (uintx)$src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} +#else +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct orL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{ + match(Set dst (OrL src1 con)); + ins_cost(DEFAULT_COST); + size(8); + format %{ "OR $dst.lo,$src1.lo,$con\t! long\n\t" + "OR $dst.hi,$src1.hi,$con" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $con$$constant); + __ orr($dst$$Register->successor(), $src1$$Register->successor(), 0); + %} + ins_pipe(ialu_reg_imm); +%} +#endif + +#ifdef TODO +// Use SPRegP to match Rthread (TLS register) without spilling. +// Use store_ptr_RegP to match Rthread (TLS register) without spilling. +// Use sp_ptr_RegP to match Rthread (TLS register) without spilling. +instruct orI_reg_castP2X(iRegI dst, iRegI src1, sp_ptr_RegP src2) %{ + match(Set dst (OrI src1 (CastP2X src2))); + size(4); + format %{ "OR $dst,$src1,$src2" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +// Xor Instructions +// Register Xor +instruct xorI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (XorI src1 src2)); + + size(4); + format %{ "eor_32 $dst,$src1,$src2" %} + ins_encode %{ + __ eor_32($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct xorshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (XorI src1 (LShiftI src2 src3))); + + size(4); + format %{ "XOR $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct xorshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (XorI src1 (LShiftI src2 src3))); + + size(4); + format %{ "eor_32 $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ eor_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsl, $src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct xorsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (XorI src1 (RShiftI src2 src3))); + + size(4); + format %{ "XOR $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct xorsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (XorI src1 (RShiftI src2 src3))); + + size(4); + format %{ "eor_32 $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ eor_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, asr, $src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +#ifndef AARCH64 +instruct xorshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (XorI src1 (URShiftI src2 src3))); + + size(4); + format %{ "XOR $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +instruct xorshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (XorI src1 (URShiftI src2 src3))); + + size(4); + format %{ "eor_32 $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ eor_32($dst$$Register, $src1$$Register, AsmOperand($src2$$Register, lsr, $src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Immediate Xor +instruct xorI_reg_imm(iRegI dst, iRegI src1, limmI src2) %{ + match(Set dst (XorI src1 src2)); + + size(4); + format %{ "eor_32 $dst,$src1,$src2" %} + ins_encode %{ + __ eor_32($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +// Register Xor Long +instruct xorL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (XorL src1 src2)); + ins_cost(DEFAULT_COST); +#ifdef AARCH64 + size(4); + format %{ "XOR $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $src2$$Register); + %} +#else + size(8); + format %{ "XOR $dst.hi,$src1.hi,$src2.hi\t! long\n\t" + "XOR $dst.lo,$src1.lo,$src2.lo\t! long" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $src2$$Register); + __ eor($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); + %} +#endif + ins_pipe(ialu_reg_reg); +%} + +#ifdef AARCH64 +instruct xorL_reg_limmL(iRegL dst, iRegL src1, limmL con) %{ + match(Set dst (XorL src1 con)); + ins_cost(DEFAULT_COST); + size(4); + format %{ "EOR $dst,$src1,$con\t! long" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, (uintx)$con$$constant); + %} + ins_pipe(ialu_reg_imm); +%} +#else +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct xorL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{ + match(Set dst (XorL src1 con)); + ins_cost(DEFAULT_COST); + size(8); + format %{ "XOR $dst.hi,$src1.hi,$con\t! long\n\t" + "XOR $dst.lo,$src1.lo,0\t! long" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $con$$constant); + __ eor($dst$$Register->successor(), $src1$$Register->successor(), 0); + %} + ins_pipe(ialu_reg_imm); +%} +#endif // AARCH64 + +//----------Convert to Boolean------------------------------------------------- +instruct convI2B( iRegI dst, iRegI src, flagsReg ccr ) %{ + match(Set dst (Conv2B src)); + effect(KILL ccr); +#ifdef AARCH64 + size(8); + ins_cost(DEFAULT_COST*2); + format %{ "cmp_32 $src,ZR\n\t" + "cset_w $dst, ne" %} + ins_encode %{ + __ cmp_32($src$$Register, ZR); + __ cset_w($dst$$Register, ne); + %} +#else + size(12); + ins_cost(DEFAULT_COST*2); + format %{ "TST $src,$src \n\t" + "MOV $dst, 0 \n\t" + "MOV.ne $dst, 1" %} + ins_encode %{ // FIXME: can do better? + __ tst($src$$Register, $src$$Register); + __ mov($dst$$Register, 0); + __ mov($dst$$Register, 1, ne); + %} +#endif + ins_pipe(ialu_reg_ialu); +%} + +instruct convP2B( iRegI dst, iRegP src, flagsReg ccr ) %{ + match(Set dst (Conv2B src)); + effect(KILL ccr); +#ifdef AARCH64 + size(8); + ins_cost(DEFAULT_COST*2); + format %{ "CMP $src,ZR\n\t" + "cset $dst, ne" %} + ins_encode %{ + __ cmp($src$$Register, ZR); + __ cset($dst$$Register, ne); + %} +#else + size(12); + ins_cost(DEFAULT_COST*2); + format %{ "TST $src,$src \n\t" + "MOV $dst, 0 \n\t" + "MOV.ne $dst, 1" %} + ins_encode %{ + __ tst($src$$Register, $src$$Register); + __ mov($dst$$Register, 0); + __ mov($dst$$Register, 1, ne); + %} +#endif + ins_pipe(ialu_reg_ialu); +%} + +instruct cmpLTMask_reg_reg( iRegI dst, iRegI p, iRegI q, flagsReg ccr ) %{ + match(Set dst (CmpLTMask p q)); + effect( KILL ccr ); +#ifdef AARCH64 + size(8); + ins_cost(DEFAULT_COST*2); + format %{ "CMP_w $p,$q\n\t" + "CSETM_w $dst, lt" %} + ins_encode %{ + __ cmp_w($p$$Register, $q$$Register); + __ csetm_w($dst$$Register, lt); + %} +#else + ins_cost(DEFAULT_COST*3); + format %{ "CMP $p,$q\n\t" + "MOV $dst, #0\n\t" + "MOV.lt $dst, #-1" %} + ins_encode %{ + __ cmp($p$$Register, $q$$Register); + __ mov($dst$$Register, 0); + __ mvn($dst$$Register, 0, lt); + %} +#endif + ins_pipe(ialu_reg_reg_ialu); +%} + +instruct cmpLTMask_reg_imm( iRegI dst, iRegI p, aimmI q, flagsReg ccr ) %{ + match(Set dst (CmpLTMask p q)); + effect( KILL ccr ); +#ifdef AARCH64 + size(8); + ins_cost(DEFAULT_COST*2); + format %{ "CMP_w $p,$q\n\t" + "CSETM_w $dst, lt" %} + ins_encode %{ + __ cmp_w($p$$Register, $q$$constant); + __ csetm_w($dst$$Register, lt); + %} +#else + ins_cost(DEFAULT_COST*3); + format %{ "CMP $p,$q\n\t" + "MOV $dst, #0\n\t" + "MOV.lt $dst, #-1" %} + ins_encode %{ + __ cmp($p$$Register, $q$$constant); + __ mov($dst$$Register, 0); + __ mvn($dst$$Register, 0, lt); + %} +#endif + ins_pipe(ialu_reg_reg_ialu); +%} + +#ifdef AARCH64 +instruct cadd_cmpLTMask3( iRegI dst, iRegI p, iRegI q, iRegI y, iRegI x, flagsReg ccr ) %{ + match(Set dst (AddI (AndI (CmpLTMask p q) y) x)); + effect( TEMP dst, KILL ccr ); + size(12); + ins_cost(DEFAULT_COST*3); + format %{ "CMP_w $p,$q\n\t" + "ADD_w $dst,$y,$x\n\t" + "CSEL_w $dst,$dst,$x,lt" %} + ins_encode %{ + __ cmp_w($p$$Register, $q$$Register); + __ add_w($dst$$Register, $y$$Register, $x$$Register); + __ csel_w($dst$$Register, $dst$$Register, $x$$Register, lt); + %} + ins_pipe( cadd_cmpltmask ); +%} +#else +instruct cadd_cmpLTMask3( iRegI p, iRegI q, iRegI y, iRegI z, flagsReg ccr ) %{ + match(Set z (AddI (AndI (CmpLTMask p q) y) z)); + effect( KILL ccr ); + ins_cost(DEFAULT_COST*2); + format %{ "CMP $p,$q\n\t" + "ADD.lt $z,$y,$z" %} + ins_encode %{ + __ cmp($p$$Register, $q$$Register); + __ add($z$$Register, $y$$Register, $z$$Register, lt); + %} + ins_pipe( cadd_cmpltmask ); +%} +#endif + +#ifdef AARCH64 +instruct cadd_cmpLTMask4( iRegI dst, iRegI p, aimmI q, iRegI y, iRegI x, flagsReg ccr ) %{ + match(Set dst (AddI (AndI (CmpLTMask p q) y) x)); + effect( TEMP dst, KILL ccr ); + size(12); + ins_cost(DEFAULT_COST*3); + format %{ "CMP_w $p,$q\n\t" + "ADD_w $dst,$y,$x\n\t" + "CSEL_w $dst,$dst,$x,lt" %} + ins_encode %{ + __ cmp_w($p$$Register, $q$$constant); + __ add_w($dst$$Register, $y$$Register, $x$$Register); + __ csel_w($dst$$Register, $dst$$Register, $x$$Register, lt); + %} + ins_pipe( cadd_cmpltmask ); +%} +#else +// FIXME: remove unused "dst" +instruct cadd_cmpLTMask4( iRegI dst, iRegI p, aimmI q, iRegI y, iRegI z, flagsReg ccr ) %{ + match(Set z (AddI (AndI (CmpLTMask p q) y) z)); + effect( KILL ccr ); + ins_cost(DEFAULT_COST*2); + format %{ "CMP $p,$q\n\t" + "ADD.lt $z,$y,$z" %} + ins_encode %{ + __ cmp($p$$Register, $q$$constant); + __ add($z$$Register, $y$$Register, $z$$Register, lt); + %} + ins_pipe( cadd_cmpltmask ); +%} +#endif // !AARCH64 + +#ifdef AARCH64 +instruct cadd_cmpLTMask( iRegI dst, iRegI p, iRegI q, iRegI y, flagsReg ccr ) %{ + match(Set dst (AddI (AndI (CmpLTMask p q) y) (SubI p q))); + effect( TEMP dst, KILL ccr ); + size(12); + ins_cost(DEFAULT_COST*3); + format %{ "SUBS_w $p,$p,$q\n\t" + "ADD_w $dst,$y,$p\n\t" + "CSEL_w $dst,$dst,$p,lt" %} + ins_encode %{ + __ subs_w($p$$Register, $p$$Register, $q$$Register); + __ add_w($dst$$Register, $y$$Register, $p$$Register); + __ csel_w($dst$$Register, $dst$$Register, $p$$Register, lt); + %} + ins_pipe( cadd_cmpltmask ); // FIXME +%} +#else +instruct cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, flagsReg ccr ) %{ + match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); + effect( KILL ccr ); + ins_cost(DEFAULT_COST*2); + format %{ "SUBS $p,$p,$q\n\t" + "ADD.lt $p,$y,$p" %} + ins_encode %{ + __ subs($p$$Register, $p$$Register, $q$$Register); + __ add($p$$Register, $y$$Register, $p$$Register, lt); + %} + ins_pipe( cadd_cmpltmask ); +%} +#endif + +//----------Arithmetic Conversion Instructions--------------------------------- +// The conversions operations are all Alpha sorted. Please keep it that way! + +instruct convD2F_reg(regF dst, regD src) %{ + match(Set dst (ConvD2F src)); + size(4); + format %{ "FCVTSD $dst,$src" %} + ins_encode %{ + __ convert_d2f($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(fcvtD2F); +%} + +// Convert a double to an int in a float register. +// If the double is a NAN, stuff a zero in instead. + +#ifdef AARCH64 +instruct convD2I_reg_reg(iRegI dst, regD src) %{ + match(Set dst (ConvD2I src)); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME + format %{ "FCVTZS_wd $dst, $src" %} + ins_encode %{ + __ fcvtzs_wd($dst$$Register, $src$$FloatRegister); + %} + ins_pipe(fcvtD2I); +%} + +instruct convD2L_reg_reg(iRegL dst, regD src) %{ + match(Set dst (ConvD2L src)); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME + format %{ "FCVTZS_xd $dst, $src" %} + ins_encode %{ + __ fcvtzs_xd($dst$$Register, $src$$FloatRegister); + %} + ins_pipe(fcvtD2L); +%} +#else +instruct convD2I_reg_reg(iRegI dst, regD src, regF tmp) %{ + match(Set dst (ConvD2I src)); + effect( TEMP tmp ); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME + format %{ "FTOSIZD $tmp,$src\n\t" + "FMRS $dst, $tmp" %} + ins_encode %{ + __ ftosizd($tmp$$FloatRegister, $src$$FloatRegister); + __ fmrs($dst$$Register, $tmp$$FloatRegister); + %} + ins_pipe(fcvtD2I); +%} +#endif + +// Convert a double to a long in a double register. +// If the double is a NAN, stuff a zero in instead. + +#ifndef AARCH64 +// Double to Long conversion +instruct convD2L_reg(R0R1RegL dst, regD src) %{ + match(Set dst (ConvD2L src)); + effect(CALL); + ins_cost(MEMORY_REF_COST); // FIXME + format %{ "convD2L $dst,$src\t ! call to SharedRuntime::d2l" %} + ins_encode %{ +#ifndef __ABI_HARD__ + __ fmrrd($dst$$Register, $dst$$Register->successor(), $src$$FloatRegister); +#else + if ($src$$FloatRegister != D0) { + __ mov_double(D0, $src$$FloatRegister); + } +#endif + address target = CAST_FROM_FN_PTR(address, SharedRuntime::d2l); + __ call(target, relocInfo::runtime_call_type); + %} + ins_pipe(fcvtD2L); +%} +#endif + +instruct convF2D_reg(regD dst, regF src) %{ + match(Set dst (ConvF2D src)); + size(4); + format %{ "FCVTDS $dst,$src" %} + ins_encode %{ + __ convert_f2d($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(fcvtF2D); +%} + +#ifdef AARCH64 +instruct convF2I_reg_reg(iRegI dst, regF src) %{ + match(Set dst (ConvF2I src)); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME + size(4); + format %{ "FCVTZS_ws $dst, $src" %} + ins_encode %{ + __ fcvtzs_ws($dst$$Register, $src$$FloatRegister); + %} + ins_pipe(fcvtF2I); +%} + +instruct convF2L_reg_reg(iRegL dst, regF src) %{ + match(Set dst (ConvF2L src)); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME + size(4); + format %{ "FCVTZS_xs $dst, $src" %} + ins_encode %{ + __ fcvtzs_xs($dst$$Register, $src$$FloatRegister); + %} + ins_pipe(fcvtF2L); +%} +#else +instruct convF2I_reg_reg(iRegI dst, regF src, regF tmp) %{ + match(Set dst (ConvF2I src)); + effect( TEMP tmp ); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME + size(8); + format %{ "FTOSIZS $tmp,$src\n\t" + "FMRS $dst, $tmp" %} + ins_encode %{ + __ ftosizs($tmp$$FloatRegister, $src$$FloatRegister); + __ fmrs($dst$$Register, $tmp$$FloatRegister); + %} + ins_pipe(fcvtF2I); +%} + +// Float to Long conversion +instruct convF2L_reg(R0R1RegL dst, regF src, R0RegI arg1) %{ + match(Set dst (ConvF2L src)); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME + effect(CALL); + format %{ "convF2L $dst,$src\t! call to SharedRuntime::f2l" %} + ins_encode %{ +#ifndef __ABI_HARD__ + __ fmrs($arg1$$Register, $src$$FloatRegister); +#else + if($src$$FloatRegister != S0) { + __ mov_float(S0, $src$$FloatRegister); + } +#endif + address target = CAST_FROM_FN_PTR(address, SharedRuntime::f2l); + __ call(target, relocInfo::runtime_call_type); + %} + ins_pipe(fcvtF2L); +%} +#endif + +#ifdef AARCH64 +instruct convI2D_reg_reg(iRegI src, regD dst) %{ + match(Set dst (ConvI2D src)); + ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME + size(4); + format %{ "SCVTF_dw $dst,$src" %} + ins_encode %{ + __ scvtf_dw($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(fcvtI2D); +%} +#else +instruct convI2D_reg_reg(iRegI src, regD_low dst) %{ + match(Set dst (ConvI2D src)); + ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME + size(8); + format %{ "FMSR $dst,$src \n\t" + "FSITOD $dst $dst"%} + ins_encode %{ + __ fmsr($dst$$FloatRegister, $src$$Register); + __ fsitod($dst$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe(fcvtI2D); +%} +#endif + +instruct convI2F_reg_reg( regF dst, iRegI src ) %{ + match(Set dst (ConvI2F src)); + ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME +#ifdef AARCH64 + size(4); + format %{ "SCVTF_sw $dst,$src" %} + ins_encode %{ + __ scvtf_sw($dst$$FloatRegister, $src$$Register); + %} +#else + size(8); + format %{ "FMSR $dst,$src \n\t" + "FSITOS $dst, $dst"%} + ins_encode %{ + __ fmsr($dst$$FloatRegister, $src$$Register); + __ fsitos($dst$$FloatRegister, $dst$$FloatRegister); + %} +#endif + ins_pipe(fcvtI2F); +%} + +instruct convI2L_reg(iRegL dst, iRegI src) %{ + match(Set dst (ConvI2L src)); +#ifdef AARCH64 + size(4); + format %{ "SXTW $dst,$src\t! int->long" %} + ins_encode %{ + __ sxtw($dst$$Register, $src$$Register); + %} +#else + size(8); + format %{ "MOV $dst.lo, $src \n\t" + "ASR $dst.hi,$src,31\t! int->long" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register); + __ mov($dst$$Register->successor(), AsmOperand($src$$Register, asr, 31)); + %} +#endif + ins_pipe(ialu_reg_reg); +%} + +// Zero-extend convert int to long +instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask ) %{ + match(Set dst (AndL (ConvI2L src) mask) ); +#ifdef AARCH64 + size(4); + format %{ "mov_w $dst,$src\t! zero-extend int to long" %} + ins_encode %{ + __ mov_w($dst$$Register, $src$$Register); + %} +#else + size(8); + format %{ "MOV $dst.lo,$src.lo\t! zero-extend int to long\n\t" + "MOV $dst.hi, 0"%} + ins_encode %{ + __ mov($dst$$Register, $src$$Register); + __ mov($dst$$Register->successor(), 0); + %} +#endif + ins_pipe(ialu_reg_reg); +%} + +// Zero-extend long +instruct zerox_long(iRegL dst, iRegL src, immL_32bits mask ) %{ + match(Set dst (AndL src mask) ); +#ifdef AARCH64 + size(4); + format %{ "mov_w $dst,$src\t! zero-extend long" %} + ins_encode %{ + __ mov_w($dst$$Register, $src$$Register); + %} +#else + size(8); + format %{ "MOV $dst.lo,$src.lo\t! zero-extend long\n\t" + "MOV $dst.hi, 0"%} + ins_encode %{ + __ mov($dst$$Register, $src$$Register); + __ mov($dst$$Register->successor(), 0); + %} +#endif + ins_pipe(ialu_reg_reg); +%} + +instruct MoveF2I_reg_reg(iRegI dst, regF src) %{ + match(Set dst (MoveF2I src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); // FIXME + + size(4); + format %{ "FMRS $dst,$src\t! MoveF2I" %} + ins_encode %{ + __ fmrs($dst$$Register, $src$$FloatRegister); + %} + ins_pipe(iload_mem); // FIXME +%} + +instruct MoveI2F_reg_reg(regF dst, iRegI src) %{ + match(Set dst (MoveI2F src)); + ins_cost(MEMORY_REF_COST); // FIXME + + size(4); + format %{ "FMSR $dst,$src\t! MoveI2F" %} + ins_encode %{ + __ fmsr($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(iload_mem); // FIXME +%} + +instruct MoveD2L_reg_reg(iRegL dst, regD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); // FIXME + + size(4); +#ifdef AARCH64 + format %{ "FMOV_xd $dst,$src\t! MoveD2L" %} + ins_encode %{ + __ fmov_xd($dst$$Register, $src$$FloatRegister); + %} +#else + format %{ "FMRRD $dst,$src\t! MoveD2L" %} + ins_encode %{ + __ fmrrd($dst$$Register, $dst$$Register->successor(), $src$$FloatRegister); + %} +#endif + ins_pipe(iload_mem); // FIXME +%} + +instruct MoveL2D_reg_reg(regD dst, iRegL src) %{ + match(Set dst (MoveL2D src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); // FIXME + + size(4); +#ifdef AARCH64 + format %{ "FMOV_dx $dst,$src\t! MoveL2D" %} + ins_encode %{ + __ fmov_dx($dst$$FloatRegister, $src$$Register); + %} +#else + format %{ "FMDRR $dst,$src\t! MoveL2D" %} + ins_encode %{ + __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register->successor()); + %} +#endif + ins_pipe(ialu_reg_reg); // FIXME +%} + +//----------- +// Long to Double conversion + +#ifdef AARCH64 +instruct convL2D(regD dst, iRegL src) %{ + match(Set dst (ConvL2D src)); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME + size(4); + format %{ "SCVTF_dx $dst, $src" %} + ins_encode %{ + __ scvtf_dx($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(fcvtL2D); +%} + +instruct convL2F(regF dst, iRegL src) %{ + match(Set dst (ConvL2F src)); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME + size(4); + format %{ "SCVTF_sx $dst, $src" %} + ins_encode %{ + __ scvtf_sx($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(fcvtL2F); +%} +#else +// Magic constant, 0x43300000 +instruct loadConI_x43300000(iRegI dst) %{ + effect(DEF dst); + size(8); + format %{ "MOV_SLOW $dst,0x43300000\t! 2^52" %} + ins_encode %{ + __ mov_slow($dst$$Register, 0x43300000); + %} + ins_pipe(ialu_none); +%} + +// Magic constant, 0x41f00000 +instruct loadConI_x41f00000(iRegI dst) %{ + effect(DEF dst); + size(8); + format %{ "MOV_SLOW $dst, 0x41f00000\t! 2^32" %} + ins_encode %{ + __ mov_slow($dst$$Register, 0x41f00000); + %} + ins_pipe(ialu_none); +%} + +instruct loadConI_x0(iRegI dst) %{ + effect(DEF dst); + size(4); + format %{ "MOV $dst, 0x0\t! 0" %} + ins_encode %{ + __ mov($dst$$Register, 0); + %} + ins_pipe(ialu_none); +%} + +// Construct a double from two float halves +instruct regDHi_regDLo_to_regD(regD_low dst, regD_low src1, regD_low src2) %{ + effect(DEF dst, USE src1, USE src2); + size(8); + format %{ "FCPYS $dst.hi,$src1.hi\n\t" + "FCPYS $dst.lo,$src2.lo" %} + ins_encode %{ + __ fcpys($dst$$FloatRegister->successor(), $src1$$FloatRegister->successor()); + __ fcpys($dst$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(faddD_reg_reg); +%} + +#ifndef AARCH64 +// Convert integer in high half of a double register (in the lower half of +// the double register file) to double +instruct convI2D_regDHi_regD(regD dst, regD_low src) %{ + effect(DEF dst, USE src); + size(4); + format %{ "FSITOD $dst,$src" %} + ins_encode %{ + __ fsitod($dst$$FloatRegister, $src$$FloatRegister->successor()); + %} + ins_pipe(fcvtLHi2D); +%} +#endif + +// Add float double precision +instruct addD_regD_regD(regD dst, regD src1, regD src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "FADDD $dst,$src1,$src2" %} + ins_encode %{ + __ add_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(faddD_reg_reg); +%} + +// Sub float double precision +instruct subD_regD_regD(regD dst, regD src1, regD src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "FSUBD $dst,$src1,$src2" %} + ins_encode %{ + __ sub_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(faddD_reg_reg); +%} + +// Mul float double precision +instruct mulD_regD_regD(regD dst, regD src1, regD src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "FMULD $dst,$src1,$src2" %} + ins_encode %{ + __ mul_double($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(fmulD_reg_reg); +%} + +instruct regL_to_regD(regD dst, iRegL src) %{ + // No match rule to avoid chain rule match. + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "FMDRR $dst,$src\t! regL to regD" %} + ins_encode %{ + __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register->successor()); + %} + ins_pipe(ialu_reg_reg); // FIXME +%} + +instruct regI_regI_to_regD(regD dst, iRegI src1, iRegI src2) %{ + // No match rule to avoid chain rule match. + effect(DEF dst, USE src1, USE src2); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "FMDRR $dst,$src1,$src2\t! regI,regI to regD" %} + ins_encode %{ + __ fmdrr($dst$$FloatRegister, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); // FIXME +%} + +instruct convL2D_reg_slow_fxtof(regD dst, iRegL src) %{ + match(Set dst (ConvL2D src)); + ins_cost(DEFAULT_COST*8 + MEMORY_REF_COST*6); // FIXME + + expand %{ + regD_low tmpsrc; + iRegI ix43300000; + iRegI ix41f00000; + iRegI ix0; + regD_low dx43300000; + regD dx41f00000; + regD tmp1; + regD_low tmp2; + regD tmp3; + regD tmp4; + + regL_to_regD(tmpsrc, src); + + loadConI_x43300000(ix43300000); + loadConI_x41f00000(ix41f00000); + loadConI_x0(ix0); + + regI_regI_to_regD(dx43300000, ix0, ix43300000); + regI_regI_to_regD(dx41f00000, ix0, ix41f00000); + + convI2D_regDHi_regD(tmp1, tmpsrc); + regDHi_regDLo_to_regD(tmp2, dx43300000, tmpsrc); + subD_regD_regD(tmp3, tmp2, dx43300000); + mulD_regD_regD(tmp4, tmp1, dx41f00000); + addD_regD_regD(dst, tmp3, tmp4); + %} +%} +#endif // !AARCH64 + +instruct convL2I_reg(iRegI dst, iRegL src) %{ + match(Set dst (ConvL2I src)); + size(4); +#ifdef AARCH64 + format %{ "MOV_w $dst,$src\t! long->int" %} + ins_encode %{ + __ mov_w($dst$$Register, $src$$Register); + %} +#else + format %{ "MOV $dst,$src.lo\t! long->int" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register); + %} +#endif + ins_pipe(ialu_move_reg_I_to_L); +%} + +#ifndef AARCH64 +// Register Shift Right Immediate +instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt) %{ + match(Set dst (ConvL2I (RShiftL src cnt))); + size(4); + format %{ "ASR $dst,$src.hi,($cnt - 32)\t! long->int or mov if $cnt==32" %} + ins_encode %{ + if ($cnt$$constant == 32) { + __ mov($dst$$Register, $src$$Register->successor()); + } else { + __ mov($dst$$Register, AsmOperand($src$$Register->successor(), asr, $cnt$$constant - 32)); + } + %} + ins_pipe(ialu_reg_imm); +%} +#endif + + +//----------Control Flow Instructions------------------------------------------ +// Compare Instructions +// Compare Integers +instruct compI_iReg(flagsReg icc, iRegI op1, iRegI op2) %{ + match(Set icc (CmpI op1 op2)); + effect( DEF icc, USE op1, USE op2 ); + + size(4); + format %{ "cmp_32 $op1,$op2\t! int" %} + ins_encode %{ + __ cmp_32($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_cconly_reg_reg); +%} + +#ifdef _LP64 +// Compare compressed pointers +instruct compN_reg2(flagsRegU icc, iRegN op1, iRegN op2) %{ + match(Set icc (CmpN op1 op2)); + effect( DEF icc, USE op1, USE op2 ); + + size(4); + format %{ "cmp_32 $op1,$op2\t! int" %} + ins_encode %{ + __ cmp_32($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_cconly_reg_reg); +%} +#endif + +instruct compU_iReg(flagsRegU icc, iRegI op1, iRegI op2) %{ + match(Set icc (CmpU op1 op2)); + + size(4); + format %{ "cmp_32 $op1,$op2\t! unsigned int" %} + ins_encode %{ + __ cmp_32($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compI_iReg_immneg(flagsReg icc, iRegI op1, aimmIneg op2) %{ + match(Set icc (CmpI op1 op2)); + effect( DEF icc, USE op1 ); + + size(4); + format %{ "cmn_32 $op1,-$op2\t! int" %} + ins_encode %{ + __ cmn_32($op1$$Register, -$op2$$constant); + %} + ins_pipe(ialu_cconly_reg_imm); +%} + +instruct compI_iReg_imm(flagsReg icc, iRegI op1, aimmI op2) %{ + match(Set icc (CmpI op1 op2)); + effect( DEF icc, USE op1 ); + + size(4); + format %{ "cmp_32 $op1,$op2\t! int" %} + ins_encode %{ + __ cmp_32($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_cconly_reg_imm); +%} + +instruct testI_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 op2) zero)); + size(4); + format %{ "tst_32 $op2,$op1" %} + + ins_encode %{ + __ tst_32($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} + +#ifndef AARCH64 +instruct testshlI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero)); + size(4); + format %{ "TST $op2,$op1<<$op3" %} + + ins_encode %{ + __ tst($op1$$Register, AsmOperand($op2$$Register, lsl, $op3$$Register)); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} +#endif + +instruct testshlI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero)); + size(4); + format %{ "tst_32 $op2,$op1<<$op3" %} + + ins_encode %{ + __ tst_32($op1$$Register, AsmOperand($op2$$Register, lsl, $op3$$constant)); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} + +#ifndef AARCH64 +instruct testsarI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero)); + size(4); + format %{ "TST $op2,$op1<<$op3" %} + + ins_encode %{ + __ tst($op1$$Register, AsmOperand($op2$$Register, asr, $op3$$Register)); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} +#endif + +instruct testsarI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero)); + size(4); + format %{ "tst_32 $op2,$op1<<$op3" %} + + ins_encode %{ + __ tst_32($op1$$Register, AsmOperand($op2$$Register, asr, $op3$$constant)); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} + +#ifndef AARCH64 +instruct testshrI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero)); + size(4); + format %{ "TST $op2,$op1<<$op3" %} + + ins_encode %{ + __ tst($op1$$Register, AsmOperand($op2$$Register, lsr, $op3$$Register)); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} +#endif + +instruct testshrI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero)); + size(4); + format %{ "tst_32 $op2,$op1<<$op3" %} + + ins_encode %{ + __ tst_32($op1$$Register, AsmOperand($op2$$Register, lsr, $op3$$constant)); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} + +instruct testI_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, limmI op2, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 op2) zero)); + size(4); + format %{ "tst_32 $op2,$op1" %} + + ins_encode %{ + __ tst_32($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_cconly_reg_imm_zero); +%} + +#ifdef AARCH64 +instruct compL_reg_reg(flagsReg xcc, iRegL op1, iRegL op2) +%{ + match(Set xcc (CmpL op1 op2)); + effect( DEF xcc, USE op1, USE op2 ); + + size(4); + format %{ "CMP $op1,$op2\t! long" %} + ins_encode %{ + __ cmp($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_cconly_reg_reg); +%} +#else +instruct compL_reg_reg_LTGE(flagsRegL_LTGE xcc, iRegL op1, iRegL op2, iRegL tmp) %{ + match(Set xcc (CmpL op1 op2)); + effect( DEF xcc, USE op1, USE op2, TEMP tmp ); + + size(8); + format %{ "SUBS $tmp,$op1.low,$op2.low\t\t! long\n\t" + "SBCS $tmp,$op1.hi,$op2.hi" %} + ins_encode %{ + __ subs($tmp$$Register, $op1$$Register, $op2$$Register); + __ sbcs($tmp$$Register->successor(), $op1$$Register->successor(), $op2$$Register->successor()); + %} + ins_pipe(ialu_cconly_reg_reg); +%} +#endif + +#ifdef AARCH64 +instruct compL_reg_con(flagsReg xcc, iRegL op1, aimmL con) %{ + match(Set xcc (CmpL op1 con)); + effect( DEF xcc, USE op1, USE con ); + + size(8); + format %{ "CMP $op1,$con\t\t! long" %} + ins_encode %{ + __ cmp($op1$$Register, $con$$constant); + %} + + ins_pipe(ialu_cconly_reg_imm); +%} +#else +instruct compL_reg_reg_EQNE(flagsRegL_EQNE xcc, iRegL op1, iRegL op2) %{ + match(Set xcc (CmpL op1 op2)); + effect( DEF xcc, USE op1, USE op2 ); + + size(8); + format %{ "TEQ $op1.hi,$op2.hi\t\t! long\n\t" + "TEQ.eq $op1.lo,$op2.lo" %} + ins_encode %{ + __ teq($op1$$Register->successor(), $op2$$Register->successor()); + __ teq($op1$$Register, $op2$$Register, eq); + %} + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compL_reg_reg_LEGT(flagsRegL_LEGT xcc, iRegL op1, iRegL op2, iRegL tmp) %{ + match(Set xcc (CmpL op1 op2)); + effect( DEF xcc, USE op1, USE op2, TEMP tmp ); + + size(8); + format %{ "SUBS $tmp,$op2.low,$op1.low\t\t! long\n\t" + "SBCS $tmp,$op2.hi,$op1.hi" %} + ins_encode %{ + __ subs($tmp$$Register, $op2$$Register, $op1$$Register); + __ sbcs($tmp$$Register->successor(), $op2$$Register->successor(), $op1$$Register->successor()); + %} + ins_pipe(ialu_cconly_reg_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct compL_reg_con_LTGE(flagsRegL_LTGE xcc, iRegL op1, immLlowRot con, iRegL tmp) %{ + match(Set xcc (CmpL op1 con)); + effect( DEF xcc, USE op1, USE con, TEMP tmp ); + + size(8); + format %{ "SUBS $tmp,$op1.low,$con\t\t! long\n\t" + "SBCS $tmp,$op1.hi,0" %} + ins_encode %{ + __ subs($tmp$$Register, $op1$$Register, $con$$constant); + __ sbcs($tmp$$Register->successor(), $op1$$Register->successor(), 0); + %} + + ins_pipe(ialu_cconly_reg_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct compL_reg_con_EQNE(flagsRegL_EQNE xcc, iRegL op1, immLlowRot con) %{ + match(Set xcc (CmpL op1 con)); + effect( DEF xcc, USE op1, USE con ); + + size(8); + format %{ "TEQ $op1.hi,0\t\t! long\n\t" + "TEQ.eq $op1.lo,$con" %} + ins_encode %{ + __ teq($op1$$Register->successor(), 0); + __ teq($op1$$Register, $con$$constant, eq); + %} + + ins_pipe(ialu_cconly_reg_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct compL_reg_con_LEGT(flagsRegL_LEGT xcc, iRegL op1, immLlowRot con, iRegL tmp) %{ + match(Set xcc (CmpL op1 con)); + effect( DEF xcc, USE op1, USE con, TEMP tmp ); + + size(8); + format %{ "RSBS $tmp,$op1.low,$con\t\t! long\n\t" + "RSCS $tmp,$op1.hi,0" %} + ins_encode %{ + __ rsbs($tmp$$Register, $op1$$Register, $con$$constant); + __ rscs($tmp$$Register->successor(), $op1$$Register->successor(), 0); + %} + + ins_pipe(ialu_cconly_reg_reg); +%} +#endif + +/* instruct testL_reg_reg(flagsRegL xcc, iRegL op1, iRegL op2, immL0 zero) %{ */ +/* match(Set xcc (CmpL (AndL op1 op2) zero)); */ +/* ins_encode %{ */ +/* __ stop("testL_reg_reg unimplemented"); */ +/* %} */ +/* ins_pipe(ialu_cconly_reg_reg); */ +/* %} */ + +/* // useful for checking the alignment of a pointer: */ +/* instruct testL_reg_con(flagsRegL xcc, iRegL op1, immLlowRot con, immL0 zero) %{ */ +/* match(Set xcc (CmpL (AndL op1 con) zero)); */ +/* ins_encode %{ */ +/* __ stop("testL_reg_con unimplemented"); */ +/* %} */ +/* ins_pipe(ialu_cconly_reg_reg); */ +/* %} */ + +instruct compU_iReg_imm(flagsRegU icc, iRegI op1, aimmU31 op2 ) %{ + match(Set icc (CmpU op1 op2)); + + size(4); + format %{ "cmp_32 $op1,$op2\t! unsigned" %} + ins_encode %{ + __ cmp_32($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_cconly_reg_imm); +%} + +// Compare Pointers +instruct compP_iRegP(flagsRegP pcc, iRegP op1, iRegP op2 ) %{ + match(Set pcc (CmpP op1 op2)); + + size(4); + format %{ "CMP $op1,$op2\t! ptr" %} + ins_encode %{ + __ cmp($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compP_iRegP_imm(flagsRegP pcc, iRegP op1, aimmP op2 ) %{ + match(Set pcc (CmpP op1 op2)); + + size(4); + format %{ "CMP $op1,$op2\t! ptr" %} + ins_encode %{ + assert($op2$$constant == 0 || _opnds[2]->constant_reloc() == relocInfo::none, "reloc in cmp?"); + __ cmp($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_cconly_reg_imm); +%} + +//----------Max and Min-------------------------------------------------------- +// Min Instructions +// Conditional move for min +instruct cmovI_reg_lt( iRegI op2, iRegI op1, flagsReg icc ) %{ + effect( USE_DEF op2, USE op1, USE icc ); + + size(4); + format %{ "MOV.lt $op2,$op1\t! min" %} + ins_encode %{ + __ mov($op2$$Register, $op1$$Register, lt); + %} + ins_pipe(ialu_reg_flags); +%} + +// Min Register with Register. +instruct minI_eReg(iRegI op1, iRegI op2) %{ + match(Set op2 (MinI op1 op2)); + ins_cost(DEFAULT_COST*2); + expand %{ + flagsReg icc; + compI_iReg(icc,op1,op2); + cmovI_reg_lt(op2,op1,icc); + %} +%} + +// Max Instructions +// Conditional move for max +instruct cmovI_reg_gt( iRegI op2, iRegI op1, flagsReg icc ) %{ + effect( USE_DEF op2, USE op1, USE icc ); + format %{ "MOV.gt $op2,$op1\t! max" %} + ins_encode %{ + __ mov($op2$$Register, $op1$$Register, gt); + %} + ins_pipe(ialu_reg_flags); +%} + +// Max Register with Register +instruct maxI_eReg(iRegI op1, iRegI op2) %{ + match(Set op2 (MaxI op1 op2)); + ins_cost(DEFAULT_COST*2); + expand %{ + flagsReg icc; + compI_iReg(icc,op1,op2); + cmovI_reg_gt(op2,op1,icc); + %} +%} + + +//----------Float Compares---------------------------------------------------- +// Compare floating, generate condition code +instruct cmpF_cc(flagsRegF fcc, flagsReg icc, regF src1, regF src2) %{ + match(Set icc (CmpF src1 src2)); + effect(KILL fcc); + +#ifdef AARCH64 + size(4); + format %{ "FCMP_s $src1,$src2" %} + ins_encode %{ + __ fcmp_s($src1$$FloatRegister, $src2$$FloatRegister); + %} +#else + size(8); + format %{ "FCMPs $src1,$src2\n\t" + "FMSTAT" %} + ins_encode %{ + __ fcmps($src1$$FloatRegister, $src2$$FloatRegister); + __ fmstat(); + %} +#endif + ins_pipe(faddF_fcc_reg_reg_zero); +%} + +instruct cmpF0_cc(flagsRegF fcc, flagsReg icc, regF src1, immF0 src2) %{ + match(Set icc (CmpF src1 src2)); + effect(KILL fcc); + +#ifdef AARCH64 + size(4); + format %{ "FCMP0_s $src1" %} + ins_encode %{ + __ fcmp0_s($src1$$FloatRegister); + %} +#else + size(8); + format %{ "FCMPs $src1,$src2\n\t" + "FMSTAT" %} + ins_encode %{ + __ fcmpzs($src1$$FloatRegister); + __ fmstat(); + %} +#endif + ins_pipe(faddF_fcc_reg_reg_zero); +%} + +instruct cmpD_cc(flagsRegF fcc, flagsReg icc, regD src1, regD src2) %{ + match(Set icc (CmpD src1 src2)); + effect(KILL fcc); + +#ifdef AARCH64 + size(4); + format %{ "FCMP_d $src1,$src2" %} + ins_encode %{ + __ fcmp_d($src1$$FloatRegister, $src2$$FloatRegister); + %} +#else + size(8); + format %{ "FCMPd $src1,$src2 \n\t" + "FMSTAT" %} + ins_encode %{ + __ fcmpd($src1$$FloatRegister, $src2$$FloatRegister); + __ fmstat(); + %} +#endif + ins_pipe(faddD_fcc_reg_reg_zero); +%} + +instruct cmpD0_cc(flagsRegF fcc, flagsReg icc, regD src1, immD0 src2) %{ + match(Set icc (CmpD src1 src2)); + effect(KILL fcc); + +#ifdef AARCH64 + size(8); + format %{ "FCMP0_d $src1" %} + ins_encode %{ + __ fcmp0_d($src1$$FloatRegister); + %} +#else + size(8); + format %{ "FCMPZd $src1,$src2 \n\t" + "FMSTAT" %} + ins_encode %{ + __ fcmpzd($src1$$FloatRegister); + __ fmstat(); + %} +#endif + ins_pipe(faddD_fcc_reg_reg_zero); +%} + +#ifdef AARCH64 +// Compare floating, generate -1,0,1 +instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsReg icc) %{ + match(Set dst (CmpF3 src1 src2)); + // effect(KILL fcc); // nobody cares if flagsRegF is killed + effect(KILL icc); + ins_cost(DEFAULT_COST*3); // FIXME + size(12); + format %{ "FCMP_s $src1,$src2\n\t" + "CSET $dst, gt\n\t" + "CSINV $dst, $dst, ZR, ge" %} + ins_encode %{ + Register dst = $dst$$Register; + __ fcmp_s($src1$$FloatRegister, $src2$$FloatRegister); + __ cset(dst, gt); // 1 if '>', else 0 + __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 + %} + ins_pipe( floating_cmp ); // FIXME +%} + +// Compare floating, generate -1,0,1 +instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsReg icc) %{ + match(Set dst (CmpD3 src1 src2)); + // effect(KILL fcc); // nobody cares if flagsRegF is killed + effect(KILL icc); + ins_cost(DEFAULT_COST*3); // FIXME + size(12); + format %{ "FCMP_d $src1,$src2\n\t" + "CSET $dst, gt\n\t" + "CSINV $dst, $dst, ZR, ge" %} + ins_encode %{ + Register dst = $dst$$Register; + __ fcmp_d($src1$$FloatRegister, $src2$$FloatRegister); + __ cset(dst, gt); // 1 if '>', else 0 + __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 + %} + ins_pipe( floating_cmp ); // FIXME +%} + +// Compare floating, generate -1,0,1 +instruct cmpF0_reg(iRegI dst, regF src1, immF0 src2, flagsReg icc) %{ + match(Set dst (CmpF3 src1 src2)); + // effect(KILL fcc); // nobody cares if flagsRegF is killed + effect(KILL icc); + ins_cost(DEFAULT_COST*3); // FIXME + size(12); + format %{ "FCMP0_s $src1\n\t" + "CSET $dst, gt\n\t" + "CSINV $dst, $dst, ZR, ge" %} + ins_encode %{ + Register dst = $dst$$Register; + __ fcmp0_s($src1$$FloatRegister); + __ cset(dst, gt); // 1 if '>', else 0 + __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 + %} + ins_pipe( floating_cmp ); // FIXME +%} + +// Compare floating, generate -1,0,1 +instruct cmpD0_reg(iRegI dst, regD src1, immD0 src2, flagsReg icc) %{ + match(Set dst (CmpD3 src1 src2)); + // effect(KILL fcc); // nobody cares if flagsRegF is killed + effect(KILL icc); + ins_cost(DEFAULT_COST*3); // FIXME + size(12); + format %{ "FCMP0_d $src1\n\t" + "CSET $dst, gt\n\t" + "CSINV $dst, $dst, ZR, ge" %} + ins_encode %{ + Register dst = $dst$$Register; + __ fcmp0_d($src1$$FloatRegister); + __ cset(dst, gt); // 1 if '>', else 0 + __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 + %} + ins_pipe( floating_cmp ); // FIXME +%} +#else +// Compare floating, generate -1,0,1 +instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsRegF fcc) %{ + match(Set dst (CmpF3 src1 src2)); + effect(KILL fcc); + ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME + size(20); + // same number of instructions as code using conditional moves but + // doesn't kill integer condition register + format %{ "FCMPs $dst,$src1,$src2 \n\t" + "VMRS $dst, FPSCR \n\t" + "OR $dst, $dst, 0x08000000 \n\t" + "EOR $dst, $dst, $dst << 3 \n\t" + "MOV $dst, $dst >> 30" %} + ins_encode %{ + __ fcmps($src1$$FloatRegister, $src2$$FloatRegister); + __ floating_cmp($dst$$Register); + %} + ins_pipe( floating_cmp ); +%} + +instruct cmpF0_reg(iRegI dst, regF src1, immF0 src2, flagsRegF fcc) %{ + match(Set dst (CmpF3 src1 src2)); + effect(KILL fcc); + ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME + size(20); + // same number of instructions as code using conditional moves but + // doesn't kill integer condition register + format %{ "FCMPZs $dst,$src1,$src2 \n\t" + "VMRS $dst, FPSCR \n\t" + "OR $dst, $dst, 0x08000000 \n\t" + "EOR $dst, $dst, $dst << 3 \n\t" + "MOV $dst, $dst >> 30" %} + ins_encode %{ + __ fcmpzs($src1$$FloatRegister); + __ floating_cmp($dst$$Register); + %} + ins_pipe( floating_cmp ); +%} + +instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsRegF fcc) %{ + match(Set dst (CmpD3 src1 src2)); + effect(KILL fcc); + ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME + size(20); + // same number of instructions as code using conditional moves but + // doesn't kill integer condition register + format %{ "FCMPd $dst,$src1,$src2 \n\t" + "VMRS $dst, FPSCR \n\t" + "OR $dst, $dst, 0x08000000 \n\t" + "EOR $dst, $dst, $dst << 3 \n\t" + "MOV $dst, $dst >> 30" %} + ins_encode %{ + __ fcmpd($src1$$FloatRegister, $src2$$FloatRegister); + __ floating_cmp($dst$$Register); + %} + ins_pipe( floating_cmp ); +%} + +instruct cmpD0_reg(iRegI dst, regD src1, immD0 src2, flagsRegF fcc) %{ + match(Set dst (CmpD3 src1 src2)); + effect(KILL fcc); + ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME + size(20); + // same number of instructions as code using conditional moves but + // doesn't kill integer condition register + format %{ "FCMPZd $dst,$src1,$src2 \n\t" + "VMRS $dst, FPSCR \n\t" + "OR $dst, $dst, 0x08000000 \n\t" + "EOR $dst, $dst, $dst << 3 \n\t" + "MOV $dst, $dst >> 30" %} + ins_encode %{ + __ fcmpzd($src1$$FloatRegister); + __ floating_cmp($dst$$Register); + %} + ins_pipe( floating_cmp ); +%} +#endif // !AARCH64 + +//----------Branches--------------------------------------------------------- +// Jump +// (compare 'operand indIndex' and 'instruct addP_reg_reg' above) +// FIXME +instruct jumpXtnd(iRegX switch_val, iRegP tmp) %{ + match(Jump switch_val); + effect(TEMP tmp); + ins_cost(350); + format %{ "ADD $tmp, $constanttablebase, $switch_val\n\t" + "LDR $tmp,[$tmp + $constantoffset]\n\t" + "BX $tmp" %} + size(20); + ins_encode %{ + Register table_reg; + Register label_reg = $tmp$$Register; + if (constant_offset() == 0) { + table_reg = $constanttablebase; + __ ldr(label_reg, Address(table_reg, $switch_val$$Register)); + } else { + table_reg = $tmp$$Register; + int offset = $constantoffset; + if (is_memoryP(offset)) { + __ add(table_reg, $constanttablebase, $switch_val$$Register); + __ ldr(label_reg, Address(table_reg, offset)); + } else { + __ mov_slow(table_reg, $constantoffset); + __ add(table_reg, $constanttablebase, table_reg); + __ ldr(label_reg, Address(table_reg, $switch_val$$Register)); + } + } + __ jump(label_reg); // ldr + b better than ldr to PC for branch predictor? + // __ ldr(PC, Address($table$$Register, $switch_val$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +// // Direct Branch. +instruct branch(label labl) %{ + match(Goto); + effect(USE labl); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B $labl" %} + ins_encode %{ + __ b(*($labl$$label)); + %} + ins_pipe(br); +%} + +// Conditional Direct Branch +instruct branchCon(cmpOp cmp, flagsReg icc, label labl) %{ + match(If cmp icc); + effect(USE labl); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $icc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +#ifdef ARM +instruct branchCon_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, label labl) %{ + match(If cmp icc); + effect(USE labl); + predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $icc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} +#endif + +#ifdef AARCH64 +instruct cbzI(cmpOp cmp, iRegI op1, immI0 op2, label labl) %{ + match(If cmp (CmpI op1 op2)); + effect(USE labl); + predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); + size(4); + ins_cost(BRANCH_COST); + format %{ "CB{N}Z $op1, $labl\t! int $cmp" %} + ins_encode %{ + if ($cmp$$cmpcode == eq) { + __ cbz_w($op1$$Register, *($labl$$label)); + } else { + __ cbnz_w($op1$$Register, *($labl$$label)); + } + %} + ins_pipe(br_cc); // FIXME +%} + +instruct cbzP(cmpOpP cmp, iRegP op1, immP0 op2, label labl) %{ + match(If cmp (CmpP op1 op2)); + effect(USE labl); + predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); + size(4); + ins_cost(BRANCH_COST); + format %{ "CB{N}Z $op1, $labl\t! ptr $cmp" %} + ins_encode %{ + if ($cmp$$cmpcode == eq) { + __ cbz($op1$$Register, *($labl$$label)); + } else { + __ cbnz($op1$$Register, *($labl$$label)); + } + %} + ins_pipe(br_cc); // FIXME +%} + +instruct cbzL(cmpOpL cmp, iRegL op1, immL0 op2, label labl) %{ + match(If cmp (CmpL op1 op2)); + effect(USE labl); + predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); + size(4); + ins_cost(BRANCH_COST); + format %{ "CB{N}Z $op1, $labl\t! long $cmp" %} + ins_encode %{ + if ($cmp$$cmpcode == eq) { + __ cbz($op1$$Register, *($labl$$label)); + } else { + __ cbnz($op1$$Register, *($labl$$label)); + } + %} + ins_pipe(br_cc); // FIXME +%} +#endif + +instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{ + match(If cmp icc); + effect(USE labl); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $icc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +instruct branchConP(cmpOpP cmp, flagsRegP pcc, label labl) %{ + match(If cmp pcc); + effect(USE labl); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $pcc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +#ifndef AARCH64 +instruct branchConL_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, label labl) %{ + match(If cmp xcc); + effect(USE labl); + predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $xcc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +instruct branchConL_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, label labl) %{ + match(If cmp xcc); + effect(USE labl); + predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $xcc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +instruct branchConL_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, label labl) %{ + match(If cmp xcc); + effect(USE labl); + predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $xcc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} +#endif + +instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{ + match(CountedLoopEnd cmp icc); + effect(USE labl); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $icc,$labl\t! Loop end" %} + ins_encode %{ + __ b(*($labl$$label), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +// instruct branchLoopEndU(cmpOpU cmp, flagsRegU icc, label labl) %{ +// match(CountedLoopEnd cmp icc); +// ins_pipe(br_cc); +// %} + +// ============================================================================ +// Long Compare +// +// Currently we hold longs in 2 registers. Comparing such values efficiently +// is tricky. The flavor of compare used depends on whether we are testing +// for LT, LE, or EQ. For a simple LT test we can check just the sign bit. +// The GE test is the negated LT test. The LE test can be had by commuting +// the operands (yielding a GE test) and then negating; negate again for the +// GT test. The EQ test is done by ORcc'ing the high and low halves, and the +// NE test is negated from that. + +// Due to a shortcoming in the ADLC, it mixes up expressions like: +// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the +// difference between 'Y' and '0L'. The tree-matches for the CmpI sections +// are collapsed internally in the ADLC's dfa-gen code. The match for +// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the +// foo match ends up with the wrong leaf. One fix is to not match both +// reg-reg and reg-zero forms of long-compare. This is unfortunate because +// both forms beat the trinary form of long-compare and both are very useful +// on Intel which has so few registers. + +// instruct branchCon_long(cmpOp cmp, flagsRegL xcc, label labl) %{ +// match(If cmp xcc); +// ins_pipe(br_cc); +// %} + +// Manifest a CmpL3 result in an integer register. Very painful. +// This is the test to avoid. +#ifdef AARCH64 +instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr) %{ + match(Set dst (CmpL3 src1 src2)); + // effect(KILL fcc); // nobody cares if flagsRegF is killed + effect(KILL ccr); + ins_cost(DEFAULT_COST*3); // FIXME + size(12); + format %{ "CMP $src1,$src2\n\t" + "CSET $dst, gt\n\t" + "CSINV $dst, $dst, ZR, ge" %} + ins_encode %{ + Register dst = $dst$$Register; + __ cmp($src1$$Register, $src2$$Register); + __ cset(dst, gt); // 1 if '>', else 0 + __ csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 + %} + ins_pipe( ialu_cconly_reg_reg ); // FIXME +%} +// TODO cmpL3_reg_imm +#else +instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr ) %{ + match(Set dst (CmpL3 src1 src2) ); + effect( KILL ccr ); + ins_cost(6*DEFAULT_COST); // FIXME + size(32); + format %{ + "CMP $src1.hi, $src2.hi\t\t! long\n" + "\tMOV.gt $dst, 1\n" + "\tmvn.lt $dst, 0\n" + "\tB.ne done\n" + "\tSUBS $dst, $src1.lo, $src2.lo\n" + "\tMOV.hi $dst, 1\n" + "\tmvn.lo $dst, 0\n" + "done:" %} + ins_encode %{ + Label done; + __ cmp($src1$$Register->successor(), $src2$$Register->successor()); + __ mov($dst$$Register, 1, gt); + __ mvn($dst$$Register, 0, lt); + __ b(done, ne); + __ subs($dst$$Register, $src1$$Register, $src2$$Register); + __ mov($dst$$Register, 1, hi); + __ mvn($dst$$Register, 0, lo); + __ bind(done); + %} + ins_pipe(cmpL_reg); +%} +#endif + +#ifndef AARCH64 +// Conditional move +instruct cmovLL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + ins_cost(150); + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovLL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + ins_cost(150); + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovLL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + + ins_cost(150); + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovLL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegL dst, immL0 src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + ins_cost(140); + size(8); + format %{ "MOV$cmp $dst.lo,0\t! long\n\t" + "MOV$cmp $dst,0" %} + ins_encode %{ + __ mov($dst$$Register, 0, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegL dst, immL0 src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + ins_cost(140); + size(8); + format %{ "MOV$cmp $dst.lo,0\t! long\n\t" + "MOV$cmp $dst,0" %} + ins_encode %{ + __ mov($dst$$Register, 0, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegL dst, immL0 src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + ins_cost(140); + size(8); + format %{ "MOV$cmp $dst.lo,0\t! long\n\t" + "MOV$cmp $dst,0" %} + ins_encode %{ + __ mov($dst$$Register, 0, (AsmCondition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} +#endif // !AARCH64 + +#ifndef AARCH64 +instruct cmovIL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovIL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovIL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} +#endif // !AARCH64 + +#ifndef AARCH64 +instruct cmovIL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + ins_cost(140); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovIL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + ins_cost(140); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovIL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + + ins_cost(140); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovPL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + ins_cost(140); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovPL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + ins_cost(140); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovPL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + + ins_cost(140); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw($dst$$Register, $src$$constant, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovFL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + ins_cost(150); + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + ins_cost(150); + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + ins_cost(150); + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ fcpys($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovDL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + ins_cost(150); + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovDL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + ins_cost(150); + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovDL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + + ins_cost(150); + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ fcpyd($dst$$FloatRegister, $src$$FloatRegister, (AsmCondition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} +#endif // !AARCH64 + +// ============================================================================ +// Safepoint Instruction +#ifdef AARCH64 +instruct safePoint_poll(iRegP poll, flagsReg icc, RtempRegP tmp) %{ + match(SafePoint poll); + // The handler stub kills Rtemp + effect(USE poll, KILL tmp, KILL icc); + + size(4); + format %{ "LDR ZR,[$poll]\t! Safepoint: poll for GC" %} + ins_encode %{ + __ relocate(relocInfo::poll_type); + __ ldr(ZR, Address($poll$$Register)); + %} + ins_pipe(loadPollP); +%} +#else +// rather than KILL R12, it would be better to use any reg as +// TEMP. Can't do that at this point because it crashes the compiler +instruct safePoint_poll(iRegP poll, R12RegI tmp, flagsReg icc) %{ + match(SafePoint poll); + effect(USE poll, KILL tmp, KILL icc); + + size(4); + format %{ "LDR $tmp,[$poll]\t! Safepoint: poll for GC" %} + ins_encode %{ + __ relocate(relocInfo::poll_type); + __ ldr($tmp$$Register, Address($poll$$Register)); + %} + ins_pipe(loadPollP); +%} +#endif + + +// ============================================================================ +// Call Instructions +// Call Java Static Instruction +instruct CallStaticJavaDirect( method meth ) %{ + match(CallStaticJava); + predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke()); + effect(USE meth); + + ins_cost(CALL_COST); + format %{ "CALL,static ==> " %} + ins_encode( Java_Static_Call( meth ), call_epilog ); + ins_pipe(simple_call); +%} + +// Call Java Static Instruction (method handle version) +instruct CallStaticJavaHandle( method meth ) %{ + match(CallStaticJava); + predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke()); + effect(USE meth); + // FP is saved by all callees (for interpreter stack correction). + // We use it here for a similar purpose, in {preserve,restore}_FP. + + ins_cost(CALL_COST); + format %{ "CALL,static/MethodHandle ==> " %} + ins_encode( preserve_SP, Java_Static_Call( meth ), restore_SP, call_epilog ); + ins_pipe(simple_call); +%} + +// Call Java Dynamic Instruction +instruct CallDynamicJavaDirect( method meth ) %{ + match(CallDynamicJava); + effect(USE meth); + + ins_cost(CALL_COST); + format %{ "MOV_OOP (empty),R_R8\n\t" + "CALL,dynamic ; NOP ==> " %} + ins_encode( Java_Dynamic_Call( meth ), call_epilog ); + ins_pipe(call); +%} + +// Call Runtime Instruction +instruct CallRuntimeDirect(method meth) %{ + match(CallRuntime); + effect(USE meth); + ins_cost(CALL_COST); + format %{ "CALL,runtime" %} +#ifdef AARCH64 + ins_encode( save_last_PC, Java_To_Runtime( meth ), + call_epilog ); +#else + ins_encode( Java_To_Runtime( meth ), + call_epilog ); +#endif + ins_pipe(simple_call); +%} + +// Call runtime without safepoint - same as CallRuntime +instruct CallLeafDirect(method meth) %{ + match(CallLeaf); + effect(USE meth); + ins_cost(CALL_COST); + format %{ "CALL,runtime leaf" %} + // TODO: ned save_last_PC here? + ins_encode( Java_To_Runtime( meth ), + call_epilog ); + ins_pipe(simple_call); +%} + +// Call runtime without safepoint - same as CallLeaf +instruct CallLeafNoFPDirect(method meth) %{ + match(CallLeafNoFP); + effect(USE meth); + ins_cost(CALL_COST); + format %{ "CALL,runtime leaf nofp" %} + // TODO: ned save_last_PC here? + ins_encode( Java_To_Runtime( meth ), + call_epilog ); + ins_pipe(simple_call); +%} + +// Tail Call; Jump from runtime stub to Java code. +// Also known as an 'interprocedural jump'. +// Target of jump will eventually return to caller. +// TailJump below removes the return address. +instruct TailCalljmpInd(IPRegP jump_target, inline_cache_regP method_oop) %{ + match(TailCall jump_target method_oop ); + + ins_cost(CALL_COST); + format %{ "MOV Rexception_pc, LR\n\t" + "jump $jump_target \t! $method_oop holds method oop" %} + ins_encode %{ + __ mov(Rexception_pc, LR); // this is used only to call + // StubRoutines::forward_exception_entry() + // which expects PC of exception in + // R5. FIXME? + __ jump($jump_target$$Register); + %} + ins_pipe(tail_call); +%} + + +// Return Instruction +instruct Ret() %{ + match(Return); + + format %{ "ret LR" %} + + ins_encode %{ + __ ret(LR); + %} + + ins_pipe(br); +%} + + +// Tail Jump; remove the return address; jump to target. +// TailCall above leaves the return address around. +// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). +// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a +// "restore" before this instruction (in Epilogue), we need to materialize it +// in %i0. +instruct tailjmpInd(IPRegP jump_target, RExceptionRegP ex_oop) %{ + match( TailJump jump_target ex_oop ); + ins_cost(CALL_COST); + format %{ "MOV Rexception_pc, LR\n\t" + "jump $jump_target \t! $ex_oop holds exc. oop" %} + ins_encode %{ + __ mov(Rexception_pc, LR); + __ jump($jump_target$$Register); + %} + ins_pipe(tail_call); +%} + +// Create exception oop: created by stack-crawling runtime code. +// Created exception is now available to this handler, and is setup +// just prior to jumping to this handler. No code emitted. +instruct CreateException( RExceptionRegP ex_oop ) +%{ + match(Set ex_oop (CreateEx)); + ins_cost(0); + + size(0); + // use the following format syntax + format %{ "! exception oop is in Rexception_obj; no code emitted" %} + ins_encode(); + ins_pipe(empty); +%} + + +// Rethrow exception: +// The exception oop will come in the first argument position. +// Then JUMP (not call) to the rethrow stub code. +instruct RethrowException() +%{ + match(Rethrow); + ins_cost(CALL_COST); + + // use the following format syntax + format %{ "b rethrow_stub" %} + ins_encode %{ + Register scratch = R1_tmp; + assert_different_registers(scratch, c_rarg0, LR); + __ jump(OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type, scratch); + %} + ins_pipe(tail_call); +%} + + +// Die now +instruct ShouldNotReachHere( ) +%{ + match(Halt); + ins_cost(CALL_COST); + + size(4); + // Use the following format syntax + format %{ "breakpoint ; ShouldNotReachHere" %} + ins_encode %{ + __ breakpoint(); + %} + ins_pipe(tail_call); +%} + +// ============================================================================ +// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass +// array for an instance of the superklass. Set a hidden internal cache on a +// hit (cache is checked with exposed code in gen_subtype_check()). Return +// not zero for a miss or zero for a hit. The encoding ALSO sets flags. +instruct partialSubtypeCheck( R0RegP index, R1RegP sub, R2RegP super, flagsRegP pcc, LRRegP lr ) %{ + match(Set index (PartialSubtypeCheck sub super)); + effect( KILL pcc, KILL lr ); + ins_cost(DEFAULT_COST*10); + format %{ "CALL PartialSubtypeCheck" %} + ins_encode %{ + __ call(StubRoutines::Arm::partial_subtype_check(), relocInfo::runtime_call_type); + %} + ins_pipe(partial_subtype_check_pipe); +%} + +/* instruct partialSubtypeCheck_vs_zero( flagsRegP pcc, o1RegP sub, o2RegP super, immP0 zero, o0RegP idx, o7RegP o7 ) %{ */ +/* match(Set pcc (CmpP (PartialSubtypeCheck sub super) zero)); */ +/* ins_pipe(partial_subtype_check_pipe); */ +/* %} */ + + +// ============================================================================ +// inlined locking and unlocking + +#ifdef AARCH64 +instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch, iRegP scratch3 ) +#else +instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch ) +#endif +%{ + match(Set pcc (FastLock object box)); + +#ifdef AARCH64 + effect(TEMP scratch, TEMP scratch2, TEMP scratch3); +#else + effect(TEMP scratch, TEMP scratch2); +#endif + ins_cost(100); + +#ifdef AARCH64 + format %{ "FASTLOCK $object, $box; KILL $scratch, $scratch2, $scratch3" %} + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register, $scratch3$$Register); + %} +#else + format %{ "FASTLOCK $object, $box; KILL $scratch, $scratch2" %} + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register); + %} +#endif + ins_pipe(long_memory_op); +%} + + +#ifdef AARCH64 +instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch, iRegP scratch3 ) %{ + match(Set pcc (FastUnlock object box)); + effect(TEMP scratch, TEMP scratch2, TEMP scratch3); + ins_cost(100); + + format %{ "FASTUNLOCK $object, $box; KILL $scratch, $scratch2, $scratch3" %} + ins_encode %{ + __ fast_unlock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register, $scratch3$$Register); + %} + ins_pipe(long_memory_op); +%} +#else +instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch ) %{ + match(Set pcc (FastUnlock object box)); + effect(TEMP scratch, TEMP scratch2); + ins_cost(100); + + format %{ "FASTUNLOCK $object, $box; KILL $scratch, $scratch2" %} + ins_encode %{ + __ fast_unlock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register); + %} + ins_pipe(long_memory_op); +%} +#endif + +#ifdef AARCH64 +// TODO: add version that takes immI cnt? +instruct clear_array(iRegX cnt, iRegP base, iRegP ptr, iRegX temp, Universe dummy, flagsReg cpsr) %{ + match(Set dummy (ClearArray cnt base)); + effect(TEMP temp, TEMP ptr, KILL cpsr); + ins_cost(300); + format %{ + " MOV $temp,$cnt\n" + " ADD $ptr,$base,$cnt\n" + " SUBS $temp,$temp,16\t! Count down dword pair in bytes\n" + " B.lt done16\n" + "loop: STP ZR,ZR,[$ptr,-16]!\n" + " SUBS $temp,$temp,16\t! Count down dword pair in bytes\n" + " B.ge loop\t! Clearing loop\n" + "done16: ADDS $temp,$temp,8\t! Room for 1 more long?\n" + " B.lt done\n" + " STR ZR,[$base+$temp]\n" + "done:" + %} + ins_encode %{ + // TODO: preload? + __ mov($temp$$Register, $cnt$$Register); + __ add($ptr$$Register, $base$$Register, $cnt$$Register); + Label loop, done, done16; + __ subs($temp$$Register, $temp$$Register, 16); + __ b(done16, lt); + __ bind(loop); + __ stp(ZR, ZR, Address($ptr$$Register, -16, pre_indexed)); + __ subs($temp$$Register, $temp$$Register, 16); + __ b(loop, ge); + __ bind(done16); + __ adds($temp$$Register, $temp$$Register, 8); + __ b(done, lt); + // $temp should be 0 here + __ str(ZR, Address($base$$Register, $temp$$Register)); + __ bind(done); + %} + ins_pipe(long_memory_op); +%} +#else +// Count and Base registers are fixed because the allocator cannot +// kill unknown registers. The encodings are generic. +instruct clear_array(iRegX cnt, iRegP base, iRegI temp, iRegX zero, Universe dummy, flagsReg cpsr) %{ + match(Set dummy (ClearArray cnt base)); + effect(TEMP temp, TEMP zero, KILL cpsr); + ins_cost(300); + format %{ "MOV $zero,0\n" + " MOV $temp,$cnt\n" + "loop: SUBS $temp,$temp,4\t! Count down a dword of bytes\n" + " STR.ge $zero,[$base+$temp]\t! delay slot" + " B.gt loop\t\t! Clearing loop\n" %} + ins_encode %{ + __ mov($zero$$Register, 0); + __ mov($temp$$Register, $cnt$$Register); + Label(loop); + __ bind(loop); + __ subs($temp$$Register, $temp$$Register, 4); + __ str($zero$$Register, Address($base$$Register, $temp$$Register), ge); + __ b(loop, gt); + %} + ins_pipe(long_memory_op); +%} +#endif + +#ifdef XXX +// FIXME: Why R0/R1/R2/R3? +instruct string_compare(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, + iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ + predicate(!CompactStrings); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ccr, TEMP tmp1, TEMP tmp2); + ins_cost(300); + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // TEMP $tmp1, $tmp2" %} + ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2) ); + + ins_pipe(long_memory_op); +%} + +// FIXME: Why R0/R1/R2? +instruct string_equals(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, iRegI tmp2, + flagsReg ccr) %{ + predicate(!CompactStrings); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp1, TEMP tmp2, TEMP result, KILL ccr); + + ins_cost(300); + format %{ "String Equals $str1,$str2,$cnt -> $result // TEMP $tmp1, $tmp2" %} + ins_encode( enc_String_Equals(str1, str2, cnt, result, tmp1, tmp2) ); + ins_pipe(long_memory_op); +%} + +// FIXME: Why R0/R1? +instruct array_equals(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI result, + flagsReg ccr) %{ + predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP result, KILL ccr); + + ins_cost(300); + format %{ "Array Equals $ary1,$ary2 -> $result // TEMP $tmp1,$tmp2,$tmp3" %} + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, result)); + ins_pipe(long_memory_op); +%} +#endif + +//---------- Zeros Count Instructions ------------------------------------------ + +instruct countLeadingZerosI(iRegI dst, iRegI src) %{ + match(Set dst (CountLeadingZerosI src)); + size(4); + format %{ "CLZ_32 $dst,$src" %} + ins_encode %{ + __ clz_32($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +#ifdef AARCH64 +instruct countLeadingZerosL(iRegI dst, iRegL src) %{ + match(Set dst (CountLeadingZerosL src)); + size(4); + format %{ "CLZ $dst,$src" %} + ins_encode %{ + __ clz($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} +#else +instruct countLeadingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{ + match(Set dst (CountLeadingZerosL src)); + effect(TEMP tmp, TEMP dst, KILL ccr); + size(16); + format %{ "CLZ $dst,$src.hi\n\t" + "TEQ $dst,32\n\t" + "CLZ.eq $tmp,$src.lo\n\t" + "ADD.eq $dst, $dst, $tmp\n\t" %} + ins_encode %{ + __ clz($dst$$Register, $src$$Register->successor()); + __ teq($dst$$Register, 32); + __ clz($tmp$$Register, $src$$Register, eq); + __ add($dst$$Register, $dst$$Register, $tmp$$Register, eq); + %} + ins_pipe(ialu_reg); +%} +#endif + +instruct countTrailingZerosI(iRegI dst, iRegI src, iRegI tmp) %{ + match(Set dst (CountTrailingZerosI src)); + effect(TEMP tmp); + size(8); + format %{ "RBIT_32 $tmp, $src\n\t" + "CLZ_32 $dst,$tmp" %} + ins_encode %{ + __ rbit_32($tmp$$Register, $src$$Register); + __ clz_32($dst$$Register, $tmp$$Register); + %} + ins_pipe(ialu_reg); +%} + +#ifdef AARCH64 +instruct countTrailingZerosL(iRegI dst, iRegL src, iRegL tmp) %{ + match(Set dst (CountTrailingZerosL src)); + effect(TEMP tmp); + size(8); + format %{ "RBIT $tmp, $src\n\t" + "CLZ $dst,$tmp" %} + ins_encode %{ + __ rbit($tmp$$Register, $src$$Register); + __ clz($dst$$Register, $tmp$$Register); + %} + ins_pipe(ialu_reg); +%} +#else +instruct countTrailingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{ + match(Set dst (CountTrailingZerosL src)); + effect(TEMP tmp, TEMP dst, KILL ccr); + size(24); + format %{ "RBIT $tmp,$src.lo\n\t" + "CLZ $dst,$tmp\n\t" + "TEQ $dst,32\n\t" + "RBIT $tmp,$src.hi\n\t" + "CLZ.eq $tmp,$tmp\n\t" + "ADD.eq $dst,$dst,$tmp\n\t" %} + ins_encode %{ + __ rbit($tmp$$Register, $src$$Register); + __ clz($dst$$Register, $tmp$$Register); + __ teq($dst$$Register, 32); + __ rbit($tmp$$Register, $src$$Register->successor()); + __ clz($tmp$$Register, $tmp$$Register, eq); + __ add($dst$$Register, $dst$$Register, $tmp$$Register, eq); + %} + ins_pipe(ialu_reg); +%} +#endif + + +//---------- Population Count Instructions ------------------------------------- + +#ifdef AARCH64 +instruct popCountI(iRegI dst, iRegI src, regD_low tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + effect(TEMP tmp); + size(20); + + format %{ "MOV_W $dst,$src\n\t" + "FMOV_dx $tmp,$dst\n\t" + "VCNT $tmp.8B,$tmp.8B\n\t" + "ADDV $tmp.B,$tmp.8B\n\t" + "FMRS $dst,$tmp" %} + + ins_encode %{ + __ mov_w($dst$$Register, $src$$Register); + __ fmov_dx($tmp$$FloatRegister, $dst$$Register); + int quad = 0; + int cnt_size = 0; // VELEM_SIZE_8 + __ vcnt($tmp$$FloatRegister, $tmp$$FloatRegister, quad, cnt_size); + int add_size = 0; // VELEM_SIZE_8 + __ addv($tmp$$FloatRegister, $tmp$$FloatRegister, quad, add_size); + __ fmrs($dst$$Register, $tmp$$FloatRegister); + %} + ins_pipe(ialu_reg); // FIXME +%} +#else +instruct popCountI(iRegI dst, iRegI src, regD_low tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + effect(TEMP tmp); + + format %{ "FMSR $tmp,$src\n\t" + "VCNT.8 $tmp,$tmp\n\t" + "VPADDL.U8 $tmp,$tmp\n\t" + "VPADDL.U16 $tmp,$tmp\n\t" + "FMRS $dst,$tmp" %} + size(20); + + ins_encode %{ + __ fmsr($tmp$$FloatRegister, $src$$Register); + __ vcnt($tmp$$FloatRegister, $tmp$$FloatRegister); + __ vpaddl($tmp$$FloatRegister, $tmp$$FloatRegister, 8, 0); + __ vpaddl($tmp$$FloatRegister, $tmp$$FloatRegister, 16, 0); + __ fmrs($dst$$Register, $tmp$$FloatRegister); + %} + ins_pipe(ialu_reg); // FIXME +%} +#endif + +#ifdef AARCH64 +instruct popCountL(iRegI dst, iRegL src, regD tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + effect(TEMP tmp); + size(16); + + format %{ "FMOV_dx $tmp,$src\n\t" + "VCNT $tmp.8B,$tmp.8B\n\t" + "ADDV $tmp.B,$tmp.8B\n\t" + "FMOV_ws $dst,$tmp" %} + + ins_encode %{ + __ fmov_dx($tmp$$FloatRegister, $src$$Register); + int quad = 0; + int cnt_size = 0; + __ vcnt($tmp$$FloatRegister, $tmp$$FloatRegister, quad, cnt_size); + int add_size = 0; + __ addv($tmp$$FloatRegister, $tmp$$FloatRegister, quad, add_size); + __ fmov_ws($dst$$Register, $tmp$$FloatRegister); + %} + ins_pipe(ialu_reg); // FIXME +%} +#else +// Note: Long.bitCount(long) returns an int. +instruct popCountL(iRegI dst, iRegL src, regD_low tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + effect(TEMP tmp); + + format %{ "FMDRR $tmp,$src.lo,$src.hi\n\t" + "VCNT.8 $tmp,$tmp\n\t" + "VPADDL.U8 $tmp,$tmp\n\t" + "VPADDL.U16 $tmp,$tmp\n\t" + "VPADDL.U32 $tmp,$tmp\n\t" + "FMRS $dst,$tmp" %} + + size(32); + + ins_encode %{ + __ fmdrr($tmp$$FloatRegister, $src$$Register, $src$$Register->successor()); + __ vcnt($tmp$$FloatRegister, $tmp$$FloatRegister); + __ vpaddl($tmp$$FloatRegister, $tmp$$FloatRegister, 8, 0); + __ vpaddl($tmp$$FloatRegister, $tmp$$FloatRegister, 16, 0); + __ vpaddl($tmp$$FloatRegister, $tmp$$FloatRegister, 32, 0); + __ fmrs($dst$$Register, $tmp$$FloatRegister); + %} + ins_pipe(ialu_reg); +%} +#endif + + +// ============================================================================ +//------------Bytes reverse-------------------------------------------------- + +instruct bytes_reverse_int(iRegI dst, iRegI src) %{ + match(Set dst (ReverseBytesI src)); + + size(4); + format %{ "REV32 $dst,$src" %} + ins_encode %{ +#ifdef AARCH64 + __ rev_w($dst$$Register, $src$$Register); + // high 32 bits zeroed, not sign extended +#else + __ rev($dst$$Register, $src$$Register); +#endif + %} + ins_pipe( iload_mem ); // FIXME +%} + +instruct bytes_reverse_long(iRegL dst, iRegL src) %{ + match(Set dst (ReverseBytesL src)); +#ifdef AARCH64 +//size(4); + format %{ "REV $dst,$src" %} + ins_encode %{ + __ rev($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg_reg); // FIXME +#else + effect(TEMP dst); + size(8); + format %{ "REV $dst.lo,$src.lo\n\t" + "REV $dst.hi,$src.hi" %} + ins_encode %{ + __ rev($dst$$Register, $src$$Register->successor()); + __ rev($dst$$Register->successor(), $src$$Register); + %} + ins_pipe( iload_mem ); // FIXME +#endif +%} + +instruct bytes_reverse_unsigned_short(iRegI dst, iRegI src) %{ + match(Set dst (ReverseBytesUS src)); +#ifdef AARCH64 + size(4); + format %{ "REV16_W $dst,$src" %} + ins_encode %{ + __ rev16_w($dst$$Register, $src$$Register); + // high 32 bits zeroed + %} +#else + size(4); + format %{ "REV16 $dst,$src" %} + ins_encode %{ + __ rev16($dst$$Register, $src$$Register); + %} +#endif + ins_pipe( iload_mem ); // FIXME +%} + +instruct bytes_reverse_short(iRegI dst, iRegI src) %{ + match(Set dst (ReverseBytesS src)); +#ifdef AARCH64 + size(8); + format %{ "REV16_W $dst,$src\n\t" + "SIGN_EXT16 $dst" %} + ins_encode %{ + __ rev16_w($dst$$Register, $src$$Register); + __ sign_extend($dst$$Register, $dst$$Register, 16); + %} +#else + size(4); + format %{ "REVSH $dst,$src" %} + ins_encode %{ + __ revsh($dst$$Register, $src$$Register); + %} +#endif + ins_pipe( iload_mem ); // FIXME +%} + + +// ====================VECTOR INSTRUCTIONS===================================== + +// Load Aligned Packed values into a Double Register +instruct loadV8(vecD dst, memoryD mem) %{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "FLDD $mem,$dst\t! load vector (8 bytes)" %} + ins_encode %{ + __ ldr_double($dst$$FloatRegister, $mem$$Address); + %} + ins_pipe(floadD_mem); +%} + +// Load Aligned Packed values into a Double Register Pair +instruct loadV16(vecX dst, memoryvld mem) %{ + predicate(n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "VLD1 $mem,$dst.Q\t! load vector (16 bytes)" %} + ins_encode %{ + __ vld1($dst$$FloatRegister, $mem$$Address, MacroAssembler::VELEM_SIZE_16, 128); + %} + ins_pipe(floadD_mem); // FIXME +%} + +// Store Vector in Double register to memory +instruct storeV8(memoryD mem, vecD src) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "FSTD $src,$mem\t! store vector (8 bytes)" %} + ins_encode %{ + __ str_double($src$$FloatRegister, $mem$$Address); + %} + ins_pipe(fstoreD_mem_reg); +%} + +// Store Vector in Double Register Pair to memory +instruct storeV16(memoryvld mem, vecX src) %{ + predicate(n->as_StoreVector()->memory_size() == 16); + match(Set mem (StoreVector mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "VST1 $src,$mem\t! store vector (16 bytes)" %} + ins_encode %{ + __ vst1($src$$FloatRegister, $mem$$Address, MacroAssembler::VELEM_SIZE_16, 128); + %} + ins_pipe(fstoreD_mem_reg); // FIXME +%} + +#ifndef AARCH64 +// Replicate scalar to packed byte values in Double register +instruct Repl8B_reg(vecD dst, iRegI src, iRegI tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + ins_cost(DEFAULT_COST*4); + effect(TEMP tmp); + size(16); + + // FIXME: could use PKH instruction instead? + format %{ "LSL $tmp, $src, 24 \n\t" + "OR $tmp, $tmp, ($tmp >> 8) \n\t" + "OR $tmp, $tmp, ($tmp >> 16) \n\t" + "FMDRR $dst,$tmp,$tmp\t" %} + ins_encode %{ + __ mov($tmp$$Register, AsmOperand($src$$Register, lsl, 24)); + __ orr($tmp$$Register, $tmp$$Register, AsmOperand($tmp$$Register, lsr, 8)); + __ orr($tmp$$Register, $tmp$$Register, AsmOperand($tmp$$Register, lsr, 16)); + __ fmdrr($dst$$FloatRegister, $tmp$$Register, $tmp$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} +#endif /* !AARCH64 */ + +// Replicate scalar to packed byte values in Double register +instruct Repl8B_reg_simd(vecD dst, iRegI src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd()); + match(Set dst (ReplicateB src)); + size(4); + + format %{ "VDUP.8 $dst,$src\t" %} + ins_encode %{ + bool quad = false; + __ vdupI($dst$$FloatRegister, $src$$Register, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed byte values in Double register pair +instruct Repl16B_reg(vecX dst, iRegI src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (ReplicateB src)); + size(4); + + format %{ "VDUP.8 $dst.Q,$src\t" %} + ins_encode %{ + bool quad = true; + __ vdupI($dst$$FloatRegister, $src$$Register, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe(ialu_reg); // FIXME +%} + +#ifndef AARCH64 +// Replicate scalar constant to packed byte values in Double register +instruct Repl8B_immI(vecD dst, immI src, iRegI tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + ins_cost(DEFAULT_COST*2); + effect(TEMP tmp); + size(12); + + format %{ "MOV $tmp, Repl4($src))\n\t" + "FMDRR $dst,$tmp,$tmp\t" %} + ins_encode( LdReplImmI(src, dst, tmp, (4), (1)) ); + ins_pipe(loadConFD); // FIXME +%} +#endif /* !AARCH64 */ + +// Replicate scalar constant to packed byte values in Double register +// TODO: support negative constants with MVNI? +instruct Repl8B_immU8(vecD dst, immU8 src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd()); + match(Set dst (ReplicateB src)); + size(4); + + format %{ "VMOV.U8 $dst,$src" %} + ins_encode %{ + bool quad = false; + __ vmovI($dst$$FloatRegister, $src$$constant, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar constant to packed byte values in Double register pair +instruct Repl16B_immU8(vecX dst, immU8 src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd()); + match(Set dst (ReplicateB src)); + size(4); + + format %{ "VMOV.U8 $dst.Q,$src" %} + ins_encode %{ + bool quad = true; + __ vmovI($dst$$FloatRegister, $src$$constant, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe(loadConFD); // FIXME +%} + +#ifndef AARCH64 +// Replicate scalar to packed short/char values into Double register +instruct Repl4S_reg(vecD dst, iRegI src, iRegI tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + ins_cost(DEFAULT_COST*3); + effect(TEMP tmp); + size(12); + + // FIXME: could use PKH instruction instead? + format %{ "LSL $tmp, $src, 16 \n\t" + "OR $tmp, $tmp, ($tmp >> 16) \n\t" + "FMDRR $dst,$tmp,$tmp\t" %} + ins_encode %{ + __ mov($tmp$$Register, AsmOperand($src$$Register, lsl, 16)); + __ orr($tmp$$Register, $tmp$$Register, AsmOperand($tmp$$Register, lsr, 16)); + __ fmdrr($dst$$FloatRegister, $tmp$$Register, $tmp$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} +#endif /* !AARCH64 */ + +// Replicate scalar to packed byte values in Double register +instruct Repl4S_reg_simd(vecD dst, iRegI src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd()); + match(Set dst (ReplicateS src)); + size(4); + + format %{ "VDUP.16 $dst,$src\t" %} + ins_encode %{ + bool quad = false; + __ vdupI($dst$$FloatRegister, $src$$Register, + MacroAssembler::VELEM_SIZE_16, quad); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed byte values in Double register pair +instruct Repl8S_reg(vecX dst, iRegI src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd()); + match(Set dst (ReplicateS src)); + size(4); + + format %{ "VDUP.16 $dst.Q,$src\t" %} + ins_encode %{ + bool quad = true; + __ vdupI($dst$$FloatRegister, $src$$Register, + MacroAssembler::VELEM_SIZE_16, quad); + %} + ins_pipe(ialu_reg); // FIXME +%} + + +#ifndef AARCH64 +// Replicate scalar constant to packed short/char values in Double register +instruct Repl4S_immI(vecD dst, immI src, iRegP tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + effect(TEMP tmp); + size(12); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "MOV $tmp, Repl2($src))\n\t" + "FMDRR $dst,$tmp,$tmp\t" %} + ins_encode( LdReplImmI(src, dst, tmp, (2), (2)) ); + ins_pipe(loadConFD); // FIXME +%} +#endif /* !AARCH64 */ + +// Replicate scalar constant to packed byte values in Double register +instruct Repl4S_immU8(vecD dst, immU8 src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd()); + match(Set dst (ReplicateS src)); + size(4); + + format %{ "VMOV.U16 $dst,$src" %} + ins_encode %{ + bool quad = false; + __ vmovI($dst$$FloatRegister, $src$$constant, + MacroAssembler::VELEM_SIZE_16, quad); + %} + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar constant to packed byte values in Double register pair +instruct Repl8S_immU8(vecX dst, immU8 src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd()); + match(Set dst (ReplicateS src)); + size(4); + + format %{ "VMOV.U16 $dst.Q,$src" %} + ins_encode %{ + bool quad = true; + __ vmovI($dst$$FloatRegister, $src$$constant, + MacroAssembler::VELEM_SIZE_16, quad); + %} + ins_pipe(loadConFD); // FIXME +%} + +#ifndef AARCH64 +// Replicate scalar to packed int values in Double register +instruct Repl2I_reg(vecD dst, iRegI src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + size(4); + + format %{ "FMDRR $dst,$src,$src\t" %} + ins_encode %{ + __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed int values in Double register pair +instruct Repl4I_reg(vecX dst, iRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI src)); + ins_cost(DEFAULT_COST*2); + size(8); + + format %{ "FMDRR $dst.lo,$src,$src\n\t" + "FMDRR $dst.hi,$src,$src" %} + + ins_encode %{ + __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register); + __ fmdrr($dst$$FloatRegister->successor()->successor(), + $src$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} +#endif /* !AARCH64 */ + +// Replicate scalar to packed int values in Double register +instruct Repl2I_reg_simd(vecD dst, iRegI src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd()); + match(Set dst (ReplicateI src)); + size(4); + + format %{ "VDUP.32 $dst.D,$src\t" %} + ins_encode %{ + bool quad = false; + __ vdupI($dst$$FloatRegister, $src$$Register, + MacroAssembler::VELEM_SIZE_32, quad); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed int values in Double register pair +instruct Repl4I_reg_simd(vecX dst, iRegI src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd()); + match(Set dst (ReplicateI src)); + size(4); + + format %{ "VDUP.32 $dst.Q,$src\t" %} + ins_encode %{ + bool quad = true; + __ vdupI($dst$$FloatRegister, $src$$Register, + MacroAssembler::VELEM_SIZE_32, quad); + %} + ins_pipe(ialu_reg); // FIXME +%} + + +#ifndef AARCH64 +// Replicate scalar zero constant to packed int values in Double register +instruct Repl2I_immI(vecD dst, immI src, iRegI tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + effect(TEMP tmp); + size(12); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "MOV $tmp, Repl1($src))\n\t" + "FMDRR $dst,$tmp,$tmp\t" %} + ins_encode( LdReplImmI(src, dst, tmp, (1), (4)) ); + ins_pipe(loadConFD); // FIXME +%} +#endif /* !AARCH64 */ + +// Replicate scalar constant to packed byte values in Double register +instruct Repl2I_immU8(vecD dst, immU8 src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd()); + match(Set dst (ReplicateI src)); + size(4); + + format %{ "VMOV.I32 $dst.D,$src" %} + ins_encode %{ + bool quad = false; + __ vmovI($dst$$FloatRegister, $src$$constant, + MacroAssembler::VELEM_SIZE_32, quad); + %} + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar constant to packed byte values in Double register pair +instruct Repl4I_immU8(vecX dst, immU8 src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd()); + match(Set dst (ReplicateI src)); + size(4); + + format %{ "VMOV.I32 $dst.Q,$src" %} + ins_encode %{ + bool quad = true; + __ vmovI($dst$$FloatRegister, $src$$constant, + MacroAssembler::VELEM_SIZE_32, quad); + %} + ins_pipe(loadConFD); // FIXME +%} + +#ifdef AARCH64 +// Replicate scalar to packed byte values in Double register pair +instruct Repl2L_reg(vecX dst, iRegL src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + + format %{ "VDUP.2D $dst.Q,$src\t" %} + ins_encode %{ + bool quad = true; + __ vdupI($dst$$FloatRegister, $src$$Register, + MacroAssembler::VELEM_SIZE_64, quad); + %} + ins_pipe(ialu_reg); // FIXME +%} +#else /* !AARCH64 */ +// Replicate scalar to packed byte values in Double register pair +instruct Repl2L_reg(vecX dst, iRegL src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + size(8); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FMDRR $dst.D,$src.lo,$src.hi\t\n" + "FMDRR $dst.D.next,$src.lo,$src.hi" %} + ins_encode %{ + __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register->successor()); + __ fmdrr($dst$$FloatRegister->successor()->successor(), + $src$$Register, $src$$Register->successor()); + %} + ins_pipe(ialu_reg); // FIXME +%} + + +// Replicate scalar to packed float values in Double register +instruct Repl2F_regI(vecD dst, iRegI src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + size(4); + + format %{ "FMDRR $dst.D,$src,$src\t" %} + ins_encode %{ + __ fmdrr($dst$$FloatRegister, $src$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed float values in Double register +instruct Repl2F_reg_vfp(vecD dst, regF src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + expand %{ + iRegI tmp; + MoveF2I_reg_reg(tmp, src); + Repl2F_regI(dst,tmp); + %} +%} +#endif /* !AARCH64 */ + +// Replicate scalar to packed float values in Double register +instruct Repl2F_reg_simd(vecD dst, regF src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd()); + match(Set dst (ReplicateF src)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + + format %{ "VDUP.32 $dst.D,$src.D\t" %} + ins_encode %{ + bool quad = false; + __ vdupF($dst$$FloatRegister, $src$$FloatRegister, quad); + %} + ins_pipe(ialu_reg); // FIXME +%} + +#ifndef AARCH64 +// Replicate scalar to packed float values in Double register pair +instruct Repl4F_reg(vecX dst, regF src, iRegI tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateF src)); + effect(TEMP tmp); + size(4*3); + ins_cost(DEFAULT_COST*3); // FIXME + + format %{ "FMRS $tmp,$src\n\t" + "FMDRR $dst.D,$tmp,$tmp\n\t" + "FMDRR $dst.D.next,$tmp,$tmp\t" %} + ins_encode %{ + __ fmrs($tmp$$Register, $src$$FloatRegister); + __ fmdrr($dst$$FloatRegister, $tmp$$Register, $tmp$$Register); + __ fmdrr($dst$$FloatRegister->successor()->successor(), + $tmp$$Register, $tmp$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} +#endif /* !AARCH64 */ + +// Replicate scalar to packed float values in Double register pair +instruct Repl4F_reg_simd(vecX dst, regF src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd()); + match(Set dst (ReplicateF src)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + + format %{ "VDUP.32 $dst.Q,$src.D\t" %} + ins_encode %{ + bool quad = true; + __ vdupF($dst$$FloatRegister, $src$$FloatRegister, quad); + %} + ins_pipe(ialu_reg); // FIXME +%} + +#ifndef AARCH64 +// Replicate scalar zero constant to packed float values in Double register +instruct Repl2F_immI(vecD dst, immF src, iRegI tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + effect(TEMP tmp); + size(12); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "MOV $tmp, Repl1($src))\n\t" + "FMDRR $dst,$tmp,$tmp\t" %} + ins_encode( LdReplImmF(src, dst, tmp) ); + ins_pipe(loadConFD); // FIXME +%} +#endif /* !AAARCH64 */ + +// Replicate scalar to packed double float values in Double register pair +instruct Repl2D_reg(vecX dst, regD src) %{ +#ifdef AARCH64 + predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd()); + match(Set dst (ReplicateD src)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + + format %{ "VDUP $dst.2D,$src\t" %} + ins_encode %{ + bool quad = true; + __ vdupD($dst$$FloatRegister, $src$$FloatRegister, quad); + %} +#else + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateD src)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FCPYD $dst.D.a,$src\n\t" + "FCPYD $dst.D.b,$src\t" %} + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + __ fcpyd(dsta, src); + FloatRegister dstb = dsta->successor()->successor(); + __ fcpyd(dstb, src); + %} +#endif + ins_pipe(ialu_reg); // FIXME +%} + +// ====================VECTOR ARITHMETIC======================================= + +// --------------------------------- ADD -------------------------------------- + +// Bytes vector add +instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVB src1 src2)); + format %{ "VADD.I8 $dst,$src1,$src2\t! add packed8B" %} + size(4); + ins_encode %{ + bool quad = false; + __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB src1 src2)); + size(4); + format %{ "VADD.I8 $dst.Q,$src1.Q,$src2.Q\t! add packed16B" %} + ins_encode %{ + bool quad = true; + __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Shorts/Chars vector add +instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVS src1 src2)); + size(4); + format %{ "VADD.I16 $dst,$src1,$src2\t! add packed4S" %} + ins_encode %{ + bool quad = false; + __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_16, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS src1 src2)); + size(4); + format %{ "VADD.I16 $dst.Q,$src1.Q,$src2.Q\t! add packed8S" %} + ins_encode %{ + bool quad = true; + __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_16, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector add +instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI src1 src2)); + size(4); + format %{ "VADD.I32 $dst.D,$src1.D,$src2.D\t! add packed2I" %} + ins_encode %{ + bool quad = false; + __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_32, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI src1 src2)); + size(4); + format %{ "VADD.I32 $dst.Q,$src1.Q,$src2.Q\t! add packed4I" %} + ins_encode %{ + bool quad = true; + __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_32, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector add +instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL src1 src2)); + size(4); + format %{ "VADD.I64 $dst.Q,$src1.Q,$src2.Q\t! add packed2L" %} + ins_encode %{ + bool quad = true; + __ vaddI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_64, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Floats vector add +instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant()); + match(Set dst (AddVF src1 src2)); + size(4); + format %{ "VADD.F32 $dst,$src1,$src2\t! add packed2F" %} + ins_encode %{ + bool quad = false; + __ vaddF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VFA_SIZE_F32, quad); + %} + ins_pipe( faddD_reg_reg ); // FIXME +%} + +#ifndef AARCH64 +instruct vadd2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2 && !VM_Version::simd_math_is_compliant()); + match(Set dst (AddVF src1 src2)); + ins_cost(DEFAULT_COST*2); // FIXME + + size(4*2); + format %{ "FADDS $dst.a,$src1.a,$src2.a\n\t" + "FADDS $dst.b,$src1.b,$src2.b" %} + ins_encode %{ + __ add_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ add_float($dst$$FloatRegister->successor(), + $src1$$FloatRegister->successor(), + $src2$$FloatRegister->successor()); + %} + + ins_pipe(faddF_reg_reg); // FIXME +%} +#endif + +instruct vadd4F_reg_simd(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::simd_math_is_compliant()); + match(Set dst (AddVF src1 src2)); + size(4); + format %{ "VADD.F32 $dst.Q,$src1.Q,$src2.Q\t! add packed4F" %} + ins_encode %{ + bool quad = true; + __ vaddF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VFA_SIZE_F32, quad); + %} + ins_pipe( faddD_reg_reg ); // FIXME +%} + +#ifdef AARCH64 +instruct vadd2D_reg_simd(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant()); + match(Set dst (AddVD src1 src2)); + size(4); + format %{ "VADD.F64 $dst.Q,$src1.Q,$src2.Q\t! add packed2D" %} + ins_encode %{ + bool quad = true; + __ vaddF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VFA_SIZE_F64, quad); + %} + ins_pipe( faddD_reg_reg ); // FIXME +%} +#else +instruct vadd4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4 && !VM_Version::simd_math_is_compliant()); + match(Set dst (AddVF src1 src2)); + size(4*4); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "FADDS $dst.a,$src1.a,$src2.a\n\t" + "FADDS $dst.b,$src1.b,$src2.b\n\t" + "FADDS $dst.c,$src1.c,$src2.c\n\t" + "FADDS $dst.d,$src1.d,$src2.d" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ add_float(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(); + FloatRegister src1b = src1a->successor(); + FloatRegister src2b = src2a->successor(); + __ add_float(dstb, src1b, src2b); + FloatRegister dstc = dstb->successor(); + FloatRegister src1c = src1b->successor(); + FloatRegister src2c = src2b->successor(); + __ add_float(dstc, src1c, src2c); + FloatRegister dstd = dstc->successor(); + FloatRegister src1d = src1c->successor(); + FloatRegister src2d = src2c->successor(); + __ add_float(dstd, src1d, src2d); + %} + + ins_pipe(faddF_reg_reg); // FIXME +%} + +instruct vadd2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVD src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FADDD $dst.a,$src1.a,$src2.a\n\t" + "FADDD $dst.b,$src1.b,$src2.b" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ add_double(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor()->successor(); + FloatRegister src1b = src1a->successor()->successor(); + FloatRegister src2b = src2a->successor()->successor(); + __ add_double(dstb, src1b, src2b); + %} + + ins_pipe(faddF_reg_reg); // FIXME +%} +#endif + + +// Bytes vector sub +instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVB src1 src2)); + size(4); + format %{ "VSUB.I8 $dst,$src1,$src2\t! sub packed8B" %} + ins_encode %{ + bool quad = false; + __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVB src1 src2)); + size(4); + format %{ "VSUB.I8 $dst.Q,$src1.Q,$src2.Q\t! sub packed16B" %} + ins_encode %{ + bool quad = true; + __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Shorts/Chars vector sub +instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVS src1 src2)); + size(4); + format %{ "VSUB.I16 $dst,$src1,$src2\t! sub packed4S" %} + ins_encode %{ + bool quad = false; + __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_16, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsub16S_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS src1 src2)); + size(4); + format %{ "VSUB.I16 $dst.Q,$src1.Q,$src2.Q\t! sub packed8S" %} + ins_encode %{ + bool quad = true; + __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_16, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector sub +instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVI src1 src2)); + size(4); + format %{ "VSUB.I32 $dst,$src1,$src2\t! sub packed2I" %} + ins_encode %{ + bool quad = false; + __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_32, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI src1 src2)); + size(4); + format %{ "VSUB.I32 $dst.Q,$src1.Q,$src2.Q\t! sub packed4I" %} + ins_encode %{ + bool quad = true; + __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_32, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector sub +instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVL src1 src2)); + size(4); + format %{ "VSUB.I64 $dst.Q,$src1.Q,$src2.Q\t! sub packed2L" %} + ins_encode %{ + bool quad = true; + __ vsubI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_64, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Floats vector sub +instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant()); + match(Set dst (SubVF src1 src2)); + size(4); + format %{ "VSUB.F32 $dst,$src1,$src2\t! sub packed2F" %} + ins_encode %{ + bool quad = false; + __ vsubF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VFA_SIZE_F32, quad); + %} + ins_pipe( faddF_reg_reg ); // FIXME +%} + +#ifndef AARCH64 +instruct vsub2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2 && !VM_Version::simd_math_is_compliant()); + match(Set dst (SubVF src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FSUBS $dst.a,$src1.a,$src2.a\n\t" + "FSUBS $dst.b,$src1.b,$src2.b" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ sub_float(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(); + FloatRegister src1b = src1a->successor(); + FloatRegister src2b = src2a->successor(); + __ sub_float(dstb, src1b, src2b); + %} + + ins_pipe(faddF_reg_reg); // FIXME +%} +#endif + + +instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::simd_math_is_compliant()); + match(Set dst (SubVF src1 src2)); + size(4); + format %{ "VSUB.F32 $dst.Q,$src1.Q,$src2.Q\t! sub packed4F" %} + ins_encode %{ + bool quad = true; + __ vsubF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VFA_SIZE_F32, quad); + %} + ins_pipe( faddF_reg_reg ); // FIXME +%} + +#ifdef AARCH64 +instruct vsub2D_reg_simd(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant()); + match(Set dst (SubVD src1 src2)); + size(4); + format %{ "VSUB.F64 $dst.Q,$src1.Q,$src2.Q\t! add packed2D" %} + ins_encode %{ + bool quad = true; + __ vsubF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VFA_SIZE_F64, quad); + %} + ins_pipe( faddD_reg_reg ); // FIXME +%} +#else +instruct vsub4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4 && !VM_Version::simd_math_is_compliant()); + match(Set dst (SubVF src1 src2)); + size(4*4); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "FSUBS $dst.a,$src1.a,$src2.a\n\t" + "FSUBS $dst.b,$src1.b,$src2.b\n\t" + "FSUBS $dst.c,$src1.c,$src2.c\n\t" + "FSUBS $dst.d,$src1.d,$src2.d" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ sub_float(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(); + FloatRegister src1b = src1a->successor(); + FloatRegister src2b = src2a->successor(); + __ sub_float(dstb, src1b, src2b); + FloatRegister dstc = dstb->successor(); + FloatRegister src1c = src1b->successor(); + FloatRegister src2c = src2b->successor(); + __ sub_float(dstc, src1c, src2c); + FloatRegister dstd = dstc->successor(); + FloatRegister src1d = src1c->successor(); + FloatRegister src2d = src2c->successor(); + __ sub_float(dstd, src1d, src2d); + %} + + ins_pipe(faddF_reg_reg); // FIXME +%} + +instruct vsub2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVD src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FSUBD $dst.a,$src1.a,$src2.a\n\t" + "FSUBD $dst.b,$src1.b,$src2.b" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ sub_double(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor()->successor(); + FloatRegister src1b = src1a->successor()->successor(); + FloatRegister src2b = src2a->successor()->successor(); + __ sub_double(dstb, src1b, src2b); + %} + + ins_pipe(faddF_reg_reg); // FIXME +%} +#endif + +// Shorts/Chars vector mul +instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVS src1 src2)); + size(4); + format %{ "VMUL.I16 $dst,$src1,$src2\t! mul packed4S" %} + ins_encode %{ + __ vmulI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_16, 0); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (MulVS src1 src2)); + size(4); + format %{ "VMUL.I16 $dst.Q,$src1.Q,$src2.Q\t! mul packed8S" %} + ins_encode %{ + __ vmulI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_16, 1); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector mul +instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVI src1 src2)); + size(4); + format %{ "VMUL.I32 $dst,$src1,$src2\t! mul packed2I" %} + ins_encode %{ + __ vmulI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_32, 0); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVI src1 src2)); + size(4); + format %{ "VMUL.I32 $dst.Q,$src1.Q,$src2.Q\t! mul packed4I" %} + ins_encode %{ + __ vmulI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VELEM_SIZE_32, 1); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Floats vector mul +instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2 && VM_Version::simd_math_is_compliant()); + match(Set dst (MulVF src1 src2)); + size(4); + format %{ "VMUL.F32 $dst,$src1,$src2\t! mul packed2F" %} + ins_encode %{ + __ vmulF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VFA_SIZE_F32, 0); + %} + ins_pipe( fmulF_reg_reg ); // FIXME +%} + +#ifndef AARCH64 +instruct vmul2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2 && !VM_Version::simd_math_is_compliant()); + match(Set dst (MulVF src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FMULS $dst.a,$src1.a,$src2.a\n\t" + "FMULS $dst.b,$src1.b,$src2.b" %} + ins_encode %{ + __ mul_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ mul_float($dst$$FloatRegister->successor(), + $src1$$FloatRegister->successor(), + $src2$$FloatRegister->successor()); + %} + + ins_pipe(fmulF_reg_reg); // FIXME +%} +#endif + +instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::simd_math_is_compliant()); + match(Set dst (MulVF src1 src2)); + size(4); + format %{ "VMUL.F32 $dst.Q,$src1.Q,$src2.Q\t! mul packed4F" %} + ins_encode %{ + __ vmulF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VFA_SIZE_F32, 1); + %} + ins_pipe( fmulF_reg_reg ); // FIXME +%} + +#ifndef AARCH64 +instruct vmul4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4 && !VM_Version::simd_math_is_compliant()); + match(Set dst (MulVF src1 src2)); + size(4*4); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "FMULS $dst.a,$src1.a,$src2.a\n\t" + "FMULS $dst.b,$src1.b,$src2.b\n\t" + "FMULS $dst.c,$src1.c,$src2.c\n\t" + "FMULS $dst.d,$src1.d,$src2.d" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ mul_float(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(); + FloatRegister src1b = src1a->successor(); + FloatRegister src2b = src2a->successor(); + __ mul_float(dstb, src1b, src2b); + FloatRegister dstc = dstb->successor(); + FloatRegister src1c = src1b->successor(); + FloatRegister src2c = src2b->successor(); + __ mul_float(dstc, src1c, src2c); + FloatRegister dstd = dstc->successor(); + FloatRegister src1d = src1c->successor(); + FloatRegister src2d = src2c->successor(); + __ mul_float(dstd, src1d, src2d); + %} + + ins_pipe(fmulF_reg_reg); // FIXME +%} +#endif + +#ifdef AARCH64 +instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd()); + match(Set dst (MulVD src1 src2)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + + format %{ "FMUL.2D $dst,$src1,$src2\t! double[2]" %} + ins_encode %{ + int quad = 1; + __ vmulF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VFA_SIZE_F64, quad); + %} + + ins_pipe(fdivF_reg_reg); // FIXME +%} +#else +instruct vmul2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVD src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FMULD $dst.D.a,$src1.D.a,$src2.D.a\n\t" + "FMULD $dst.D.b,$src1.D.b,$src2.D.b" %} + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ mul_double(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor()->successor(); + FloatRegister src1b = src1a->successor()->successor(); + FloatRegister src2b = src2a->successor()->successor(); + __ mul_double(dstb, src1b, src2b); + %} + + ins_pipe(fmulD_reg_reg); // FIXME +%} +#endif + + +// Floats vector div +instruct vdiv2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVF src1 src2)); +#ifdef AARCH64 + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + + format %{ "FDIV.2S $dst,$src1,$src2\t! float[2]" %} + ins_encode %{ + int quad = 0; + __ vdivF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VFA_SIZE_F32, quad); + %} + + ins_pipe(fdivF_reg_reg); // FIXME +#else + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FDIVS $dst.a,$src1.a,$src2.a\n\t" + "FDIVS $dst.b,$src1.b,$src2.b" %} + ins_encode %{ + __ div_float($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ div_float($dst$$FloatRegister->successor(), + $src1$$FloatRegister->successor(), + $src2$$FloatRegister->successor()); + %} + + ins_pipe(fdivF_reg_reg); // FIXME +#endif +%} + +instruct vdiv4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (DivVF src1 src2)); +#ifdef AARCH64 + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + + format %{ "FDIV.4S $dst,$src1,$src2\t! float[4]" %} + ins_encode %{ + int quad = 1; + __ vdivF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VFA_SIZE_F32, quad); + %} + + ins_pipe(fdivF_reg_reg); // FIXME +#else + size(4*4); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "FDIVS $dst.a,$src1.a,$src2.a\n\t" + "FDIVS $dst.b,$src1.b,$src2.b\n\t" + "FDIVS $dst.c,$src1.c,$src2.c\n\t" + "FDIVS $dst.d,$src1.d,$src2.d" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ div_float(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(); + FloatRegister src1b = src1a->successor(); + FloatRegister src2b = src2a->successor(); + __ div_float(dstb, src1b, src2b); + FloatRegister dstc = dstb->successor(); + FloatRegister src1c = src1b->successor(); + FloatRegister src2c = src2b->successor(); + __ div_float(dstc, src1c, src2c); + FloatRegister dstd = dstc->successor(); + FloatRegister src1d = src1c->successor(); + FloatRegister src2d = src2c->successor(); + __ div_float(dstd, src1d, src2d); + %} + + ins_pipe(fdivF_reg_reg); // FIXME +#endif +%} + +#ifdef AARCH64 +instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd()); + match(Set dst (DivVD src1 src2)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + + format %{ "FDIV.2D $dst,$src1,$src2\t! double[2]" %} + ins_encode %{ + int quad = 1; + __ vdivF($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + MacroAssembler::VFA_SIZE_F64, quad); + %} + + ins_pipe(fdivF_reg_reg); // FIXME +%} +#else +instruct vdiv2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVD src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FDIVD $dst.D.a,$src1.D.a,$src2.D.a\n\t" + "FDIVD $dst.D.b,$src1.D.b,$src2.D.b" %} + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ div_double(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor()->successor(); + FloatRegister src1b = src1a->successor()->successor(); + FloatRegister src2b = src2a->successor()->successor(); + __ div_double(dstb, src1b, src2b); + %} + + ins_pipe(fdivD_reg_reg); // FIXME +%} +#endif + +// --------------------------------- NEG -------------------------------------- + +instruct vneg8B_reg(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8); + effect(DEF dst, USE src); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ "VNEG.S8 $dst.D,$src.D\t! neg packed8B" %} + ins_encode %{ + bool quad = false; + __ vnegI($dst$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vneg16B_reg(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + effect(DEF dst, USE src); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ "VNEG.S8 $dst.Q,$src.Q\t! neg0 packed16B" %} + ins_encode %{ + bool _float = false; + bool quad = true; + __ vnegI($dst$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// ------------------------------ Shift --------------------------------------- + +instruct vslcntD(vecD dst, iRegI cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd()); + match(Set dst (LShiftCntV cnt)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + Repl8B_reg_simd(dst, cnt); + %} +%} + +instruct vslcntX(vecX dst, iRegI cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd()); + match(Set dst (LShiftCntV cnt)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + Repl16B_reg(dst, cnt); + %} +%} + +// Low bits of vector "shift" elements are used, so it +// doesn't matter if we treat it as ints or bytes here. +instruct vsrcntD(vecD dst, iRegI cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && VM_Version::has_simd()); + match(Set dst (RShiftCntV cnt)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "VDUP.8 $dst.D,$cnt\n\t" + "VNEG.S8 $dst.D,$dst.D\t! neg packed8B" %} + ins_encode %{ + bool quad = false; + __ vdupI($dst$$FloatRegister, $cnt$$Register, + MacroAssembler::VELEM_SIZE_8, quad); + __ vnegI($dst$$FloatRegister, $dst$$FloatRegister, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsrcntX(vecX dst, iRegI cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && VM_Version::has_simd()); + match(Set dst (RShiftCntV cnt)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + format %{ "VDUP.8 $dst.Q,$cnt\n\t" + "VNEG.S8 $dst.Q,$dst.Q\t! neg packed16B" %} + ins_encode %{ + bool quad = true; + __ vdupI($dst$$FloatRegister, $cnt$$Register, + MacroAssembler::VELEM_SIZE_8, quad); + __ vnegI($dst$$FloatRegister, $dst$$FloatRegister, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Byte vector logical left/right shift based on sign +instruct vsh8B_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 8); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U8 $dst.D,$src.D,$shift.D\t! logical left/right shift packed8B" + %} + ins_encode %{ + bool quad = false; + __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsh16B_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U8 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed16B" + %} + ins_encode %{ + bool quad = true; + __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Shorts/Char vector logical left/right shift based on sign +instruct vsh4S_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 4); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U16 $dst.D,$src.D,$shift.D\t! logical left/right shift packed4S" + %} + ins_encode %{ + bool quad = false; + __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_16, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsh8S_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U16 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed8S" + %} + ins_encode %{ + bool quad = true; + __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_16, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector logical left/right shift based on sign +instruct vsh2I_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 2); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U32 $dst.D,$src.D,$shift.D\t! logical left/right shift packed2I" + %} + ins_encode %{ + bool quad = false; + __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_32, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsh4I_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U32 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed4I" + %} + ins_encode %{ + bool quad = true; + __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_32, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector logical left/right shift based on sign +instruct vsh2L_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U64 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed2L" + %} + ins_encode %{ + bool quad = true; + __ vshlUI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_64, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// ------------------------------ LeftShift ----------------------------------- + +// Byte vector left shift +instruct vsl8B_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh8B_reg(dst, src, shift); + %} +%} + +instruct vsl16B_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh16B_reg(dst, src, shift); + %} +%} + +instruct vsl8B_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I8 $dst.D,$src.D,$shift\t! logical left shift packed8B" + %} + ins_encode %{ + bool quad = false; + __ vshli($dst$$FloatRegister, $src$$FloatRegister, 8, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsl16B_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I8 $dst.Q,$src.Q,$shift\t! logical left shift packed16B" + %} + ins_encode %{ + bool quad = true; + __ vshli($dst$$FloatRegister, $src$$FloatRegister, 8, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Shorts/Chars vector logical left/right shift +instruct vsl4S_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + match(Set dst (URShiftVS src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh4S_reg(dst, src, shift); + %} +%} + +instruct vsl8S_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + match(Set dst (URShiftVS src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh8S_reg(dst, src, shift); + %} +%} + +instruct vsl4S_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I16 $dst.D,$src.D,$shift\t! logical left shift packed4S" + %} + ins_encode %{ + bool quad = false; + __ vshli($dst$$FloatRegister, $src$$FloatRegister, 16, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsl8S_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I16 $dst.Q,$src.Q,$shift\t! logical left shift packed8S" + %} + ins_encode %{ + bool quad = true; + __ vshli($dst$$FloatRegister, $src$$FloatRegister, 16, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector logical left/right shift +instruct vsl2I_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd()); + match(Set dst (LShiftVI src shift)); + match(Set dst (URShiftVI src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh2I_reg(dst, src, shift); + %} +%} + +instruct vsl4I_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::has_simd()); + match(Set dst (LShiftVI src shift)); + match(Set dst (URShiftVI src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh4I_reg(dst, src, shift); + %} +%} + +instruct vsl2I_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd()); + match(Set dst (LShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I32 $dst.D,$src.D,$shift\t! logical left shift packed2I" + %} + ins_encode %{ + bool quad = false; + __ vshli($dst$$FloatRegister, $src$$FloatRegister, 32, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsl4I_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::has_simd()); + match(Set dst (LShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I32 $dst.Q,$src.Q,$shift\t! logical left shift packed4I" + %} + ins_encode %{ + bool quad = true; + __ vshli($dst$$FloatRegister, $src$$FloatRegister, 32, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector logical left/right shift +instruct vsl2L_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + match(Set dst (URShiftVL src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh2L_reg(dst, src, shift); + %} +%} + +instruct vsl2L_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I64 $dst.Q,$src.Q,$shift\t! logical left shift packed2L" + %} + ins_encode %{ + bool quad = true; + __ vshli($dst$$FloatRegister, $src$$FloatRegister, 64, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// ----------------------- LogicalRightShift ----------------------------------- + +// Bytes/Shorts vector logical right shift produces incorrect Java result +// for negative data because java code convert short value into int with +// sign extension before a shift. + +// Chars vector logical right shift +instruct vsrl4S_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.U16 $dst.D,$src.D,$shift\t! logical right shift packed4S" + %} + ins_encode %{ + bool quad = false; + __ vshrUI($dst$$FloatRegister, $src$$FloatRegister, 16, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsrl8S_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.U16 $dst.Q,$src.Q,$shift\t! logical right shift packed8S" + %} + ins_encode %{ + bool quad = true; + __ vshrUI($dst$$FloatRegister, $src$$FloatRegister, 16, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector logical right shift +instruct vsrl2I_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 && VM_Version::has_simd()); + match(Set dst (URShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.U32 $dst.D,$src.D,$shift\t! logical right shift packed2I" + %} + ins_encode %{ + bool quad = false; + __ vshrUI($dst$$FloatRegister, $src$$FloatRegister, 32, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsrl4I_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::has_simd()); + match(Set dst (URShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.U32 $dst.Q,$src.Q,$shift\t! logical right shift packed4I" + %} + ins_encode %{ + bool quad = true; + __ vshrUI($dst$$FloatRegister, $src$$FloatRegister, 32, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector logical right shift +instruct vsrl2L_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.U64 $dst.Q,$src.Q,$shift\t! logical right shift packed2L" + %} + ins_encode %{ + bool quad = true; + __ vshrUI($dst$$FloatRegister, $src$$FloatRegister, 64, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// ------------------- ArithmeticRightShift ----------------------------------- + +// Bytes vector arithmetic left/right shift based on sign +instruct vsha8B_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 8); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S8 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed8B" + %} + ins_encode %{ + bool quad = false; + __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsha16B_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S8 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed16B" + %} + ins_encode %{ + bool quad = true; + __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_8, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Shorts vector arithmetic left/right shift based on sign +instruct vsha4S_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 4); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S16 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed4S" + %} + ins_encode %{ + bool quad = false; + __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_16, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsha8S_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S16 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed8S" + %} + ins_encode %{ + bool quad = true; + __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_16, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector arithmetic left/right shift based on sign +instruct vsha2I_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 2); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S32 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed2I" + %} + ins_encode %{ + bool quad = false; + __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_32, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsha4I_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S32 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed4I" + %} + ins_encode %{ + bool quad = true; + __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_32, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector arithmetic left/right shift based on sign +instruct vsha2L_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S64 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed2L" + %} + ins_encode %{ + bool quad = true; + __ vshlSI($dst$$FloatRegister, $shift$$FloatRegister, $src$$FloatRegister, + MacroAssembler::VELEM_SIZE_64, quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Byte vector arithmetic right shift + +instruct vsra8B_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVB src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha8B_reg(dst, src, shift); + %} +%} + +instruct vsrl16B_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha16B_reg(dst, src, shift); + %} +%} + +instruct vsrl8B_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVB src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S8 $dst.D,$src.D,$shift\t! logical right shift packed8B" + %} + ins_encode %{ + bool quad = false; + __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 8, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsrl16B_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S8 $dst.Q,$src.Q,$shift\t! logical right shift packed16B" + %} + ins_encode %{ + bool quad = true; + __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 8, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Shorts vector arithmetic right shift +instruct vsra4S_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha4S_reg(dst, src, shift); + %} +%} + +instruct vsra8S_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha8S_reg(dst, src, shift); + %} +%} + +instruct vsra4S_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S16 $dst.D,$src.D,$shift\t! logical right shift packed4S" + %} + ins_encode %{ + bool quad = false; + __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 16, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsra8S_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S16 $dst.Q,$src.Q,$shift\t! logical right shift packed8S" + %} + ins_encode %{ + bool quad = true; + __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 16, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector arithmetic right shift +instruct vsra2I_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha2I_reg(dst, src, shift); + %} +%} + +instruct vsra4I_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha4I_reg(dst, src, shift); + %} +%} + +instruct vsra2I_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S32 $dst.D,$src.D,$shift\t! logical right shift packed2I" + %} + ins_encode %{ + bool quad = false; + __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 32, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsra4I_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S32 $dst.Q,$src.Q,$shift\t! logical right shift packed4I" + %} + ins_encode %{ + bool quad = true; + __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 32, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector arithmetic right shift +instruct vsra2L_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha2L_reg(dst, src, shift); + %} +%} + +instruct vsra2L_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S64 $dst.Q,$src.Q,$shift\t! logical right shift packed2L" + %} + ins_encode %{ + bool quad = true; + __ vshrSI($dst$$FloatRegister, $src$$FloatRegister, 64, $shift$$constant, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// --------------------------------- AND -------------------------------------- + +instruct vandD(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 8); + match(Set dst (AndV src1 src2)); + format %{ "VAND $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %} + ins_encode %{ + bool quad = false; + __ vandI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vandX(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (AndV src1 src2)); + format %{ "VAND $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %} + ins_encode %{ + bool quad = true; + __ vandI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// --------------------------------- OR --------------------------------------- + +instruct vorD(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 8); + match(Set dst (OrV src1 src2)); + format %{ "VOR $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %} + ins_encode %{ + bool quad = false; + __ vorI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vorX(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (OrV src1 src2)); + format %{ "VOR $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %} + ins_encode %{ + bool quad = true; + __ vorI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// --------------------------------- XOR -------------------------------------- + +instruct vxorD(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 8); + match(Set dst (XorV src1 src2)); + format %{ "VXOR $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %} + ins_encode %{ + bool quad = false; + __ vxorI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vxorX(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV src1 src2)); + format %{ "VXOR $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %} + ins_encode %{ + bool quad = true; + __ vxorI($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + quad); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + + +//----------PEEPHOLE RULES----------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// peepmatch ( root_instr_name [preceding_instruction]* ); +// +// peepconstraint %{ +// (instruction_number.operand_name relational_op instruction_number.operand_name +// [, ...] ); +// // instruction numbers are zero-based using left to right order in peepmatch +// +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); +// // provide an instruction_number.operand_name for each operand that appears +// // in the replacement instruction's match rule +// +// ---------VM FLAGS--------------------------------------------------------- +// +// All peephole optimizations can be turned off using -XX:-OptoPeephole +// +// Each peephole rule is given an identifying number starting with zero and +// increasing by one in the order seen by the parser. An individual peephole +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# +// on the command-line. +// +// ---------CURRENT LIMITATIONS---------------------------------------------- +// +// Only match adjacent instructions in same basic block +// Only equality constraints +// Only constraints between operands, not (0.dest_reg == EAX_enc) +// Only one replacement instruction +// +// ---------EXAMPLE---------------------------------------------------------- +// +// // pertinent parts of existing instructions in architecture description +// instruct movI(eRegI dst, eRegI src) %{ +// match(Set dst (CopyI src)); +// %} +// +// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ +// match(Set dst (AddI dst src)); +// effect(KILL cr); +// %} +// +// // Change (inc mov) to lea +// peephole %{ +// // increment preceeded by register-register move +// peepmatch ( incI_eReg movI ); +// // require that the destination register of the increment +// // match the destination register of the move +// peepconstraint ( 0.dst == 1.dst ); +// // construct a replacement instruction that sets +// // the destination to ( move's source register + one ) +// peepreplace ( incI_eReg_immI1( 0.dst 1.src 0.src ) ); +// %} +// + +// // Change load of spilled value to only a spill +// instruct storeI(memory mem, eRegI src) %{ +// match(Set mem (StoreI mem src)); +// %} +// +// instruct loadI(eRegI dst, memory mem) %{ +// match(Set dst (LoadI mem)); +// %} +// +// peephole %{ +// peepmatch ( loadI storeI ); +// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); +// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); +// %} + +//----------SMARTSPILL RULES--------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// ARM will probably not have any of these rules due to RISC instruction set. + +//----------PIPELINE----------------------------------------------------------- +// Rules which define the behavior of the target architectures pipeline. --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/arm_32.ad 2016-12-02 11:16:58.144993691 -0500 @@ -0,0 +1,586 @@ +// +// Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +// ARM Architecture Description File + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// archtecture. +register %{ +//----------Architecture Description Register Definitions---------------------- +// General Registers +// "reg_def" name ( register save type, C convention save type, +// ideal register type, encoding, vm name ); +// Register Save Types: +// +// NS = No-Save: The register allocator assumes that these registers +// can be used without saving upon entry to the method, & +// that they do not need to be saved at call sites. +// +// SOC = Save-On-Call: The register allocator assumes that these registers +// can be used without saving upon entry to the method, +// but that they must be saved at call sites. +// +// SOE = Save-On-Entry: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, but they do not need to be saved at call +// sites. +// +// AS = Always-Save: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, & that they must be saved at call sites. +// +// Ideal Register Type is used to determine how to save & restore a +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. +// +// The encoding number is the actual bit-pattern placed into the opcodes. + + +// ---------------------------- +// Integer/Long Registers +// ---------------------------- + +reg_def R_R0 (SOC, SOC, Op_RegI, 0, R(0)->as_VMReg()); +reg_def R_R1 (SOC, SOC, Op_RegI, 1, R(1)->as_VMReg()); +reg_def R_R2 (SOC, SOC, Op_RegI, 2, R(2)->as_VMReg()); +reg_def R_R3 (SOC, SOC, Op_RegI, 3, R(3)->as_VMReg()); +reg_def R_R4 (SOC, SOE, Op_RegI, 4, R(4)->as_VMReg()); +reg_def R_R5 (SOC, SOE, Op_RegI, 5, R(5)->as_VMReg()); +reg_def R_R6 (SOC, SOE, Op_RegI, 6, R(6)->as_VMReg()); +reg_def R_R7 (SOC, SOE, Op_RegI, 7, R(7)->as_VMReg()); +reg_def R_R8 (SOC, SOE, Op_RegI, 8, R(8)->as_VMReg()); +reg_def R_R9 (SOC, SOE, Op_RegI, 9, R(9)->as_VMReg()); +reg_def R_R10(NS, SOE, Op_RegI, 10, R(10)->as_VMReg()); +reg_def R_R11(NS, SOE, Op_RegI, 11, R(11)->as_VMReg()); +reg_def R_R12(SOC, SOC, Op_RegI, 12, R(12)->as_VMReg()); +reg_def R_R13(NS, NS, Op_RegI, 13, R(13)->as_VMReg()); +reg_def R_R14(SOC, SOC, Op_RegI, 14, R(14)->as_VMReg()); +reg_def R_R15(NS, NS, Op_RegI, 15, R(15)->as_VMReg()); + +// ---------------------------- +// Float/Double Registers +// ---------------------------- + +// Float Registers + +reg_def R_S0 ( SOC, SOC, Op_RegF, 0, S0->as_VMReg()); +reg_def R_S1 ( SOC, SOC, Op_RegF, 1, S1_reg->as_VMReg()); +reg_def R_S2 ( SOC, SOC, Op_RegF, 2, S2_reg->as_VMReg()); +reg_def R_S3 ( SOC, SOC, Op_RegF, 3, S3_reg->as_VMReg()); +reg_def R_S4 ( SOC, SOC, Op_RegF, 4, S4_reg->as_VMReg()); +reg_def R_S5 ( SOC, SOC, Op_RegF, 5, S5_reg->as_VMReg()); +reg_def R_S6 ( SOC, SOC, Op_RegF, 6, S6_reg->as_VMReg()); +reg_def R_S7 ( SOC, SOC, Op_RegF, 7, S7->as_VMReg()); +reg_def R_S8 ( SOC, SOC, Op_RegF, 8, S8->as_VMReg()); +reg_def R_S9 ( SOC, SOC, Op_RegF, 9, S9->as_VMReg()); +reg_def R_S10( SOC, SOC, Op_RegF, 10,S10->as_VMReg()); +reg_def R_S11( SOC, SOC, Op_RegF, 11,S11->as_VMReg()); +reg_def R_S12( SOC, SOC, Op_RegF, 12,S12->as_VMReg()); +reg_def R_S13( SOC, SOC, Op_RegF, 13,S13->as_VMReg()); +reg_def R_S14( SOC, SOC, Op_RegF, 14,S14->as_VMReg()); +reg_def R_S15( SOC, SOC, Op_RegF, 15,S15->as_VMReg()); +reg_def R_S16( SOC, SOE, Op_RegF, 16,S16->as_VMReg()); +reg_def R_S17( SOC, SOE, Op_RegF, 17,S17->as_VMReg()); +reg_def R_S18( SOC, SOE, Op_RegF, 18,S18->as_VMReg()); +reg_def R_S19( SOC, SOE, Op_RegF, 19,S19->as_VMReg()); +reg_def R_S20( SOC, SOE, Op_RegF, 20,S20->as_VMReg()); +reg_def R_S21( SOC, SOE, Op_RegF, 21,S21->as_VMReg()); +reg_def R_S22( SOC, SOE, Op_RegF, 22,S22->as_VMReg()); +reg_def R_S23( SOC, SOE, Op_RegF, 23,S23->as_VMReg()); +reg_def R_S24( SOC, SOE, Op_RegF, 24,S24->as_VMReg()); +reg_def R_S25( SOC, SOE, Op_RegF, 25,S25->as_VMReg()); +reg_def R_S26( SOC, SOE, Op_RegF, 26,S26->as_VMReg()); +reg_def R_S27( SOC, SOE, Op_RegF, 27,S27->as_VMReg()); +reg_def R_S28( SOC, SOE, Op_RegF, 28,S28->as_VMReg()); +reg_def R_S29( SOC, SOE, Op_RegF, 29,S29->as_VMReg()); +reg_def R_S30( SOC, SOE, Op_RegF, 30,S30->as_VMReg()); +reg_def R_S31( SOC, SOE, Op_RegF, 31,S31->as_VMReg()); + +// Double Registers +// The rules of ADL require that double registers be defined in pairs. +// Each pair must be two 32-bit values, but not necessarily a pair of +// single float registers. In each pair, ADLC-assigned register numbers +// must be adjacent, with the lower number even. Finally, when the +// CPU stores such a register pair to memory, the word associated with +// the lower ADLC-assigned number must be stored to the lower address. + +reg_def R_D16 (SOC, SOC, Op_RegD, 32, D16->as_VMReg()); +reg_def R_D16x(SOC, SOC, Op_RegD,255, D16->as_VMReg()->next()); +reg_def R_D17 (SOC, SOC, Op_RegD, 34, D17->as_VMReg()); +reg_def R_D17x(SOC, SOC, Op_RegD,255, D17->as_VMReg()->next()); +reg_def R_D18 (SOC, SOC, Op_RegD, 36, D18->as_VMReg()); +reg_def R_D18x(SOC, SOC, Op_RegD,255, D18->as_VMReg()->next()); +reg_def R_D19 (SOC, SOC, Op_RegD, 38, D19->as_VMReg()); +reg_def R_D19x(SOC, SOC, Op_RegD,255, D19->as_VMReg()->next()); +reg_def R_D20 (SOC, SOC, Op_RegD, 40, D20->as_VMReg()); +reg_def R_D20x(SOC, SOC, Op_RegD,255, D20->as_VMReg()->next()); +reg_def R_D21 (SOC, SOC, Op_RegD, 42, D21->as_VMReg()); +reg_def R_D21x(SOC, SOC, Op_RegD,255, D21->as_VMReg()->next()); +reg_def R_D22 (SOC, SOC, Op_RegD, 44, D22->as_VMReg()); +reg_def R_D22x(SOC, SOC, Op_RegD,255, D22->as_VMReg()->next()); +reg_def R_D23 (SOC, SOC, Op_RegD, 46, D23->as_VMReg()); +reg_def R_D23x(SOC, SOC, Op_RegD,255, D23->as_VMReg()->next()); +reg_def R_D24 (SOC, SOC, Op_RegD, 48, D24->as_VMReg()); +reg_def R_D24x(SOC, SOC, Op_RegD,255, D24->as_VMReg()->next()); +reg_def R_D25 (SOC, SOC, Op_RegD, 50, D25->as_VMReg()); +reg_def R_D25x(SOC, SOC, Op_RegD,255, D25->as_VMReg()->next()); +reg_def R_D26 (SOC, SOC, Op_RegD, 52, D26->as_VMReg()); +reg_def R_D26x(SOC, SOC, Op_RegD,255, D26->as_VMReg()->next()); +reg_def R_D27 (SOC, SOC, Op_RegD, 54, D27->as_VMReg()); +reg_def R_D27x(SOC, SOC, Op_RegD,255, D27->as_VMReg()->next()); +reg_def R_D28 (SOC, SOC, Op_RegD, 56, D28->as_VMReg()); +reg_def R_D28x(SOC, SOC, Op_RegD,255, D28->as_VMReg()->next()); +reg_def R_D29 (SOC, SOC, Op_RegD, 58, D29->as_VMReg()); +reg_def R_D29x(SOC, SOC, Op_RegD,255, D29->as_VMReg()->next()); +reg_def R_D30 (SOC, SOC, Op_RegD, 60, D30->as_VMReg()); +reg_def R_D30x(SOC, SOC, Op_RegD,255, D30->as_VMReg()->next()); +reg_def R_D31 (SOC, SOC, Op_RegD, 62, D31->as_VMReg()); +reg_def R_D31x(SOC, SOC, Op_RegD,255, D31->as_VMReg()->next()); + +// ---------------------------- +// Special Registers +// Condition Codes Flag Registers +reg_def APSR (SOC, SOC, Op_RegFlags, 0, VMRegImpl::Bad()); +reg_def FPSCR(SOC, SOC, Op_RegFlags, 0, VMRegImpl::Bad()); + +// ---------------------------- +// Specify the enum values for the registers. These enums are only used by the +// OptoReg "class". We can convert these enum values at will to VMReg when needed +// for visibility to the rest of the vm. The order of this enum influences the +// register allocator so having the freedom to set this order and not be stuck +// with the order that is natural for the rest of the vm is worth it. + +// registers in that order so that R11/R12 is an aligned pair that can be used for longs +alloc_class chunk0( + R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R10, R_R13, R_R14, R_R15, R_R0, R_R1, R_R2, R_R3); + +// Note that a register is not allocatable unless it is also mentioned +// in a widely-used reg_class below. + +alloc_class chunk1( + R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23, + R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31, + R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7, + R_S8, R_S9, R_S10, R_S11, R_S12, R_S13, R_S14, R_S15, + R_D16, R_D16x,R_D17, R_D17x,R_D18, R_D18x,R_D19, R_D19x, + R_D20, R_D20x,R_D21, R_D21x,R_D22, R_D22x,R_D23, R_D23x, + R_D24, R_D24x,R_D25, R_D25x,R_D26, R_D26x,R_D27, R_D27x, + R_D28, R_D28x,R_D29, R_D29x,R_D30, R_D30x,R_D31, R_D31x +); + +alloc_class chunk2(APSR, FPSCR); + +//----------Architecture Description Register Classes-------------------------- +// Several register classes are automatically defined based upon information in +// this architecture description. +// 1) reg_class inline_cache_reg ( as defined in frame section ) +// 2) reg_class interpreter_method_oop_reg ( as defined in frame section ) +// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) +// + +// ---------------------------- +// Integer Register Classes +// ---------------------------- +// Exclusions from i_reg: +// SP (R13), PC (R15) +// R10: reserved by HotSpot to the TLS register (invariant within Java) +reg_class int_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14); + +reg_class R0_regI(R_R0); +reg_class R1_regI(R_R1); +reg_class R2_regI(R_R2); +reg_class R3_regI(R_R3); +reg_class R12_regI(R_R12); + +// ---------------------------- +// Pointer Register Classes +// ---------------------------- +reg_class ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14); +// Special class for storeP instructions, which can store SP or RPC to TLS. +// It is also used for memory addressing, allowing direct TLS addressing. +reg_class sp_ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14, R_R10 /* TLS*/, R_R13 /* SP*/); + +#define R_Ricklass R_R8 +#define R_Rmethod R_R9 +#define R_Rthread R_R10 +#define R_Rexception_obj R_R4 + +// Other special pointer regs +reg_class R0_regP(R_R0); +reg_class R1_regP(R_R1); +reg_class R2_regP(R_R2); +reg_class R4_regP(R_R4); +reg_class Rexception_regP(R_Rexception_obj); +reg_class Ricklass_regP(R_Ricklass); +reg_class Rmethod_regP(R_Rmethod); +reg_class Rthread_regP(R_Rthread); +reg_class IP_regP(R_R12); +reg_class LR_regP(R_R14); + +reg_class FP_regP(R_R11); + +// ---------------------------- +// Long Register Classes +// ---------------------------- +reg_class long_reg ( R_R0,R_R1, R_R2,R_R3, R_R4,R_R5, R_R6,R_R7, R_R8,R_R9, R_R11,R_R12); +// for ldrexd, strexd: first reg of pair must be even +reg_class long_reg_align ( R_R0,R_R1, R_R2,R_R3, R_R4,R_R5, R_R6,R_R7, R_R8,R_R9); + +reg_class R0R1_regL(R_R0,R_R1); +reg_class R2R3_regL(R_R2,R_R3); + +// ---------------------------- +// Special Class for Condition Code Flags Register +reg_class int_flags(APSR); +reg_class float_flags(FPSCR); + + +// ---------------------------- +// Float Point Register Classes +// ---------------------------- +// Skip S14/S15, they are reserved for mem-mem copies +reg_class sflt_reg(R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7, R_S8, R_S9, R_S10, R_S11, R_S12, R_S13, + R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23, R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31); + +// Paired floating point registers--they show up in the same order as the floats, +// but they are used with the "Op_RegD" type, and always occur in even/odd pairs. +reg_class dflt_reg(R_S0,R_S1, R_S2,R_S3, R_S4,R_S5, R_S6,R_S7, R_S8,R_S9, R_S10,R_S11, R_S12,R_S13, + R_S16,R_S17, R_S18,R_S19, R_S20,R_S21, R_S22,R_S23, R_S24,R_S25, R_S26,R_S27, R_S28,R_S29, R_S30,R_S31, + R_D16,R_D16x, R_D17,R_D17x, R_D18,R_D18x, R_D19,R_D19x, R_D20,R_D20x, R_D21,R_D21x, R_D22,R_D22x, + R_D23,R_D23x, R_D24,R_D24x, R_D25,R_D25x, R_D26,R_D26x, R_D27,R_D27x, R_D28,R_D28x, R_D29,R_D29x, + R_D30,R_D30x, R_D31,R_D31x); + +reg_class dflt_low_reg(R_S0,R_S1, R_S2,R_S3, R_S4,R_S5, R_S6,R_S7, R_S8,R_S9, R_S10,R_S11, R_S12,R_S13, + R_S16,R_S17, R_S18,R_S19, R_S20,R_S21, R_S22,R_S23, R_S24,R_S25, R_S26,R_S27, R_S28,R_S29, R_S30,R_S31); + + +reg_class actual_dflt_reg %{ + if (VM_Version::has_vfp3_32()) { + return DFLT_REG_mask(); + } else { + return DFLT_LOW_REG_mask(); + } +%} + +reg_class S0_regF(R_S0); +reg_class D0_regD(R_S0,R_S1); +reg_class D1_regD(R_S2,R_S3); +reg_class D2_regD(R_S4,R_S5); +reg_class D3_regD(R_S6,R_S7); +reg_class D4_regD(R_S8,R_S9); +reg_class D5_regD(R_S10,R_S11); +reg_class D6_regD(R_S12,R_S13); +reg_class D7_regD(R_S14,R_S15); + +reg_class D16_regD(R_D16,R_D16x); +reg_class D17_regD(R_D17,R_D17x); +reg_class D18_regD(R_D18,R_D18x); +reg_class D19_regD(R_D19,R_D19x); +reg_class D20_regD(R_D20,R_D20x); +reg_class D21_regD(R_D21,R_D21x); +reg_class D22_regD(R_D22,R_D22x); +reg_class D23_regD(R_D23,R_D23x); +reg_class D24_regD(R_D24,R_D24x); +reg_class D25_regD(R_D25,R_D25x); +reg_class D26_regD(R_D26,R_D26x); +reg_class D27_regD(R_D27,R_D27x); +reg_class D28_regD(R_D28,R_D28x); +reg_class D29_regD(R_D29,R_D29x); +reg_class D30_regD(R_D30,R_D30x); +reg_class D31_regD(R_D31,R_D31x); + +reg_class vectorx_reg(R_S0,R_S1,R_S2,R_S3, R_S4,R_S5,R_S6,R_S7, + R_S8,R_S9,R_S10,R_S11, /* skip S14/S15 */ + R_S16,R_S17,R_S18,R_S19, R_S20,R_S21,R_S22,R_S23, + R_S24,R_S25,R_S26,R_S27, R_S28,R_S29,R_S30,R_S31, + R_D16,R_D16x,R_D17,R_D17x, R_D18,R_D18x,R_D19,R_D19x, + R_D20,R_D20x,R_D21,R_D21x, R_D22,R_D22x,R_D23,R_D23x, + R_D24,R_D24x,R_D25,R_D25x, R_D26,R_D26x,R_D27,R_D27x, + R_D28,R_D28x,R_D29,R_D29x, R_D30,R_D30x,R_D31,R_D31x); + +%} + +source_hpp %{ +// FIXME +const MachRegisterNumbers R_mem_copy_lo_num = R_S14_num; +const MachRegisterNumbers R_mem_copy_hi_num = R_S15_num; +const FloatRegister Rmemcopy = S14; +const MachRegisterNumbers R_hf_ret_lo_num = R_S0_num; +const MachRegisterNumbers R_hf_ret_hi_num = R_S1_num; + +const MachRegisterNumbers R_Ricklass_num = R_R8_num; +const MachRegisterNumbers R_Rmethod_num = R_R9_num; + +#define LDR_DOUBLE "FLDD" +#define LDR_FLOAT "FLDS" +#define STR_DOUBLE "FSTD" +#define STR_FLOAT "FSTS" +#define LDR_64 "LDRD" +#define STR_64 "STRD" +#define LDR_32 "LDR" +#define STR_32 "STR" +#define MOV_DOUBLE "FCPYD" +#define MOV_FLOAT "FCPYS" +#define FMSR "FMSR" +#define FMRS "FMRS" +#define LDREX "ldrex " +#define STREX "strex " + +#define str_64 strd +#define ldr_64 ldrd +#define ldr_32 ldr +#define ldrex ldrex +#define strex strex + +static inline bool is_memoryD(int offset) { + return offset < 1024 && offset > -1024; +} + +static inline bool is_memoryfp(int offset) { + return offset < 1024 && offset > -1024; +} + +static inline bool is_memoryI(int offset) { + return offset < 4096 && offset > -4096; +} + +static inline bool is_memoryP(int offset) { + return offset < 4096 && offset > -4096; +} + +static inline bool is_memoryHD(int offset) { + return offset < 256 && offset > -256; +} + +static inline bool is_aimm(int imm) { + return AsmOperand::is_rotated_imm(imm); +} + +static inline bool is_limmI(jint imm) { + return AsmOperand::is_rotated_imm(imm); +} + +static inline bool is_limmI_low(jint imm, int n) { + int imml = imm & right_n_bits(n); + return is_limmI(imml) || is_limmI(imm); +} + +static inline int limmI_low(jint imm, int n) { + int imml = imm & right_n_bits(n); + return is_limmI(imml) ? imml : imm; +} + +%} + +source %{ + +// Given a register encoding, produce a Integer Register object +static Register reg_to_register_object(int register_encoding) { + assert(R0->encoding() == R_R0_enc && R15->encoding() == R_R15_enc, "right coding"); + return as_Register(register_encoding); +} + +// Given a register encoding, produce a single-precision Float Register object +static FloatRegister reg_to_FloatRegister_object(int register_encoding) { + assert(S0->encoding() == R_S0_enc && S31->encoding() == R_S31_enc, "right coding"); + return as_FloatRegister(register_encoding); +} + +void Compile::pd_compiler2_init() { + // Umimplemented +} + +// Location of compiled Java return values. Same as C +OptoRegPair c2::return_value(int ideal_reg) { + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); +#ifndef __ABI_HARD__ + static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num, R_R0_num, R_R0_num, R_R0_num, R_R0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_R1_num, R_R1_num }; +#else + static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num, R_R0_num, R_hf_ret_lo_num, R_hf_ret_lo_num, R_R0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_hf_ret_hi_num, R_R1_num }; +#endif + return OptoRegPair( hi[ideal_reg], lo[ideal_reg]); +} + +// !!!!! Special hack to get all type of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. + +int MachCallStaticJavaNode::ret_addr_offset() { + bool far = (_method == NULL) ? maybe_far_call(this) : !cache_reachable(); + return ((far ? 3 : 1) + (_method_handle_invoke ? 1 : 0)) * + NativeInstruction::instruction_size; +} + +int MachCallDynamicJavaNode::ret_addr_offset() { + bool far = !cache_reachable(); + // mov_oop is always 2 words + return (2 + (far ? 3 : 1)) * NativeInstruction::instruction_size; +} + +int MachCallRuntimeNode::ret_addr_offset() { + // bl or movw; movt; blx + bool far = maybe_far_call(this); + return (far ? 3 : 1) * NativeInstruction::instruction_size; +} +%} + +// The intptr_t operand types, defined by textual substitution. +// (Cf. opto/type.hpp. This lets us avoid many, many other ifdefs.) +#define immX immI +#define immXRot immIRot +#define iRegX iRegI +#define aimmX aimmI +#define limmX limmI +#define immX10x2 immI10x2 +#define LShiftX LShiftI +#define shimmX immU5 + +// Compatibility interface +#define aimmP immPRot +#define immIMov immIRot + +#define store_RegL iRegL +#define store_RegLd iRegLd +#define store_RegI iRegI +#define store_ptr_RegP iRegP + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(1); // Required cost attribute + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +//----------Simple Operands---------------------------------------------------- +// Immediate Operands + +operand immIRot() %{ + predicate(AsmOperand::is_rotated_imm(n->get_int())); + match(ConI); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immIRotn() %{ + predicate(n->get_int() != 0 && AsmOperand::is_rotated_imm(~n->get_int())); + match(ConI); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immIRotneg() %{ + // if AsmOperand::is_rotated_imm() is true for this constant, it is + // a immIRot and an optimal instruction combination exists to handle the + // constant as an immIRot + predicate(!AsmOperand::is_rotated_imm(n->get_int()) && AsmOperand::is_rotated_imm(-n->get_int())); + match(ConI); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Non-negative integer immediate that is encodable using the rotation scheme, +// and that when expanded fits in 31 bits. +operand immU31Rot() %{ + predicate((0 <= n->get_int()) && AsmOperand::is_rotated_imm(n->get_int())); + match(ConI); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immPRot() %{ + predicate(n->get_ptr() == 0 || (AsmOperand::is_rotated_imm(n->get_ptr()) && ((ConPNode*)n)->type()->reloc() == relocInfo::none)); + + match(ConP); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immLlowRot() %{ + predicate(n->get_long() >> 32 == 0 && AsmOperand::is_rotated_imm((int)n->get_long())); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immLRot2() %{ + predicate(AsmOperand::is_rotated_imm((int)(n->get_long() >> 32)) && + AsmOperand::is_rotated_imm((int)(n->get_long()))); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 12-bit - for addressing mode +operand immI12() %{ + predicate((-4096 < n->get_int()) && (n->get_int() < 4096)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 10-bit disp and disp+4 - for addressing float pair +operand immI10x2() %{ + predicate((-1024 < n->get_int()) && (n->get_int() < 1024 - 4)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 12-bit disp and disp+4 - for addressing word pair +operand immI12x2() %{ + predicate((-4096 < n->get_int()) && (n->get_int() < 4096 - 4)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/arm_64.ad 2016-12-02 11:17:03.689308094 -0500 @@ -0,0 +1,998 @@ +// +// Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +// ARM Architecture Description File + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// archtecture. +register %{ +//----------Architecture Description Register Definitions---------------------- +// General Registers +// "reg_def" name ( register save type, C convention save type, +// ideal register type, encoding, vm name ); +// Register Save Types: +// +// NS = No-Save: The register allocator assumes that these registers +// can be used without saving upon entry to the method, & +// that they do not need to be saved at call sites. +// +// SOC = Save-On-Call: The register allocator assumes that these registers +// can be used without saving upon entry to the method, +// but that they must be saved at call sites. +// +// SOE = Save-On-Entry: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, but they do not need to be saved at call +// sites. +// +// AS = Always-Save: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, & that they must be saved at call sites. +// +// Ideal Register Type is used to determine how to save & restore a +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. +// FIXME: above comment seems wrong. Spill done through MachSpillCopyNode +// +// The encoding number is the actual bit-pattern placed into the opcodes. + + +// ---------------------------- +// Integer/Long Registers +// ---------------------------- + +// TODO: would be nice to keep track of high-word state: +// zeroRegI --> RegL +// signedRegI --> RegL +// junkRegI --> RegL +// how to tell C2 to treak RegI as RegL, or RegL as RegI? +reg_def R_R0 (SOC, SOC, Op_RegI, 0, R0->as_VMReg()); +reg_def R_R0x (SOC, SOC, Op_RegI, 255, R0->as_VMReg()->next()); +reg_def R_R1 (SOC, SOC, Op_RegI, 1, R1->as_VMReg()); +reg_def R_R1x (SOC, SOC, Op_RegI, 255, R1->as_VMReg()->next()); +reg_def R_R2 (SOC, SOC, Op_RegI, 2, R2->as_VMReg()); +reg_def R_R2x (SOC, SOC, Op_RegI, 255, R2->as_VMReg()->next()); +reg_def R_R3 (SOC, SOC, Op_RegI, 3, R3->as_VMReg()); +reg_def R_R3x (SOC, SOC, Op_RegI, 255, R3->as_VMReg()->next()); +reg_def R_R4 (SOC, SOC, Op_RegI, 4, R4->as_VMReg()); +reg_def R_R4x (SOC, SOC, Op_RegI, 255, R4->as_VMReg()->next()); +reg_def R_R5 (SOC, SOC, Op_RegI, 5, R5->as_VMReg()); +reg_def R_R5x (SOC, SOC, Op_RegI, 255, R5->as_VMReg()->next()); +reg_def R_R6 (SOC, SOC, Op_RegI, 6, R6->as_VMReg()); +reg_def R_R6x (SOC, SOC, Op_RegI, 255, R6->as_VMReg()->next()); +reg_def R_R7 (SOC, SOC, Op_RegI, 7, R7->as_VMReg()); +reg_def R_R7x (SOC, SOC, Op_RegI, 255, R7->as_VMReg()->next()); + +reg_def R_R8 (SOC, SOC, Op_RegI, 8, R8->as_VMReg()); +reg_def R_R8x (SOC, SOC, Op_RegI, 255, R8->as_VMReg()->next()); +reg_def R_R9 (SOC, SOC, Op_RegI, 9, R9->as_VMReg()); +reg_def R_R9x (SOC, SOC, Op_RegI, 255, R9->as_VMReg()->next()); +reg_def R_R10 (SOC, SOC, Op_RegI, 10, R10->as_VMReg()); +reg_def R_R10x(SOC, SOC, Op_RegI, 255, R10->as_VMReg()->next()); +reg_def R_R11 (SOC, SOC, Op_RegI, 11, R11->as_VMReg()); +reg_def R_R11x(SOC, SOC, Op_RegI, 255, R11->as_VMReg()->next()); +reg_def R_R12 (SOC, SOC, Op_RegI, 12, R12->as_VMReg()); +reg_def R_R12x(SOC, SOC, Op_RegI, 255, R12->as_VMReg()->next()); +reg_def R_R13 (SOC, SOC, Op_RegI, 13, R13->as_VMReg()); +reg_def R_R13x(SOC, SOC, Op_RegI, 255, R13->as_VMReg()->next()); +reg_def R_R14 (SOC, SOC, Op_RegI, 14, R14->as_VMReg()); +reg_def R_R14x(SOC, SOC, Op_RegI, 255, R14->as_VMReg()->next()); +reg_def R_R15 (SOC, SOC, Op_RegI, 15, R15->as_VMReg()); +reg_def R_R15x(SOC, SOC, Op_RegI, 255, R15->as_VMReg()->next()); + +reg_def R_R16 (SOC, SOC, Op_RegI, 16, R16->as_VMReg()); // IP0 +reg_def R_R16x(SOC, SOC, Op_RegI, 255, R16->as_VMReg()->next()); +reg_def R_R17 (SOC, SOC, Op_RegI, 17, R17->as_VMReg()); // IP1 +reg_def R_R17x(SOC, SOC, Op_RegI, 255, R17->as_VMReg()->next()); +reg_def R_R18 (SOC, SOC, Op_RegI, 18, R18->as_VMReg()); // Platform Register +reg_def R_R18x(SOC, SOC, Op_RegI, 255, R18->as_VMReg()->next()); + +reg_def R_R19 (SOC, SOE, Op_RegI, 19, R19->as_VMReg()); +reg_def R_R19x(SOC, SOE, Op_RegI, 255, R19->as_VMReg()->next()); +reg_def R_R20 (SOC, SOE, Op_RegI, 20, R20->as_VMReg()); +reg_def R_R20x(SOC, SOE, Op_RegI, 255, R20->as_VMReg()->next()); +reg_def R_R21 (SOC, SOE, Op_RegI, 21, R21->as_VMReg()); +reg_def R_R21x(SOC, SOE, Op_RegI, 255, R21->as_VMReg()->next()); +reg_def R_R22 (SOC, SOE, Op_RegI, 22, R22->as_VMReg()); +reg_def R_R22x(SOC, SOE, Op_RegI, 255, R22->as_VMReg()->next()); +reg_def R_R23 (SOC, SOE, Op_RegI, 23, R23->as_VMReg()); +reg_def R_R23x(SOC, SOE, Op_RegI, 255, R23->as_VMReg()->next()); +reg_def R_R24 (SOC, SOE, Op_RegI, 24, R24->as_VMReg()); +reg_def R_R24x(SOC, SOE, Op_RegI, 255, R24->as_VMReg()->next()); +reg_def R_R25 (SOC, SOE, Op_RegI, 25, R25->as_VMReg()); +reg_def R_R25x(SOC, SOE, Op_RegI, 255, R25->as_VMReg()->next()); +reg_def R_R26 (SOC, SOE, Op_RegI, 26, R26->as_VMReg()); +reg_def R_R26x(SOC, SOE, Op_RegI, 255, R26->as_VMReg()->next()); +reg_def R_R27 (SOC, SOE, Op_RegI, 27, R27->as_VMReg()); // Rheap_base +reg_def R_R27x(SOC, SOE, Op_RegI, 255, R27->as_VMReg()->next()); // Rheap_base +reg_def R_R28 ( NS, SOE, Op_RegI, 28, R28->as_VMReg()); // TLS +reg_def R_R28x( NS, SOE, Op_RegI, 255, R28->as_VMReg()->next()); // TLS + +reg_def R_R29 ( NS, SOE, Op_RegI, 29, R29->as_VMReg()); // FP +reg_def R_R29x( NS, SOE, Op_RegI, 255, R29->as_VMReg()->next()); // FP +reg_def R_R30 (SOC, SOC, Op_RegI, 30, R30->as_VMReg()); // LR +reg_def R_R30x(SOC, SOC, Op_RegI, 255, R30->as_VMReg()->next()); // LR + +reg_def R_ZR ( NS, NS, Op_RegI, 31, ZR->as_VMReg()); // ZR +reg_def R_ZRx( NS, NS, Op_RegI, 255, ZR->as_VMReg()->next()); // ZR + +// FIXME +//reg_def R_SP ( NS, NS, Op_RegP, 32, SP->as_VMReg()); +reg_def R_SP ( NS, NS, Op_RegI, 32, SP->as_VMReg()); +//reg_def R_SPx( NS, NS, Op_RegP, 255, SP->as_VMReg()->next()); +reg_def R_SPx( NS, NS, Op_RegI, 255, SP->as_VMReg()->next()); + +// ---------------------------- +// Float/Double/Vector Registers +// ---------------------------- + +reg_def R_V0(SOC, SOC, Op_RegF, 0, V0->as_VMReg()); +reg_def R_V1(SOC, SOC, Op_RegF, 1, V1->as_VMReg()); +reg_def R_V2(SOC, SOC, Op_RegF, 2, V2->as_VMReg()); +reg_def R_V3(SOC, SOC, Op_RegF, 3, V3->as_VMReg()); +reg_def R_V4(SOC, SOC, Op_RegF, 4, V4->as_VMReg()); +reg_def R_V5(SOC, SOC, Op_RegF, 5, V5->as_VMReg()); +reg_def R_V6(SOC, SOC, Op_RegF, 6, V6->as_VMReg()); +reg_def R_V7(SOC, SOC, Op_RegF, 7, V7->as_VMReg()); +reg_def R_V8(SOC, SOC, Op_RegF, 8, V8->as_VMReg()); +reg_def R_V9(SOC, SOC, Op_RegF, 9, V9->as_VMReg()); +reg_def R_V10(SOC, SOC, Op_RegF, 10, V10->as_VMReg()); +reg_def R_V11(SOC, SOC, Op_RegF, 11, V11->as_VMReg()); +reg_def R_V12(SOC, SOC, Op_RegF, 12, V12->as_VMReg()); +reg_def R_V13(SOC, SOC, Op_RegF, 13, V13->as_VMReg()); +reg_def R_V14(SOC, SOC, Op_RegF, 14, V14->as_VMReg()); +reg_def R_V15(SOC, SOC, Op_RegF, 15, V15->as_VMReg()); +reg_def R_V16(SOC, SOC, Op_RegF, 16, V16->as_VMReg()); +reg_def R_V17(SOC, SOC, Op_RegF, 17, V17->as_VMReg()); +reg_def R_V18(SOC, SOC, Op_RegF, 18, V18->as_VMReg()); +reg_def R_V19(SOC, SOC, Op_RegF, 19, V19->as_VMReg()); +reg_def R_V20(SOC, SOC, Op_RegF, 20, V20->as_VMReg()); +reg_def R_V21(SOC, SOC, Op_RegF, 21, V21->as_VMReg()); +reg_def R_V22(SOC, SOC, Op_RegF, 22, V22->as_VMReg()); +reg_def R_V23(SOC, SOC, Op_RegF, 23, V23->as_VMReg()); +reg_def R_V24(SOC, SOC, Op_RegF, 24, V24->as_VMReg()); +reg_def R_V25(SOC, SOC, Op_RegF, 25, V25->as_VMReg()); +reg_def R_V26(SOC, SOC, Op_RegF, 26, V26->as_VMReg()); +reg_def R_V27(SOC, SOC, Op_RegF, 27, V27->as_VMReg()); +reg_def R_V28(SOC, SOC, Op_RegF, 28, V28->as_VMReg()); +reg_def R_V29(SOC, SOC, Op_RegF, 29, V29->as_VMReg()); +reg_def R_V30(SOC, SOC, Op_RegF, 30, V30->as_VMReg()); +reg_def R_V31(SOC, SOC, Op_RegF, 31, V31->as_VMReg()); + +reg_def R_V0b(SOC, SOC, Op_RegF, 255, V0->as_VMReg()->next(1)); +reg_def R_V1b(SOC, SOC, Op_RegF, 255, V1->as_VMReg()->next(1)); +reg_def R_V2b(SOC, SOC, Op_RegF, 255, V2->as_VMReg()->next(1)); +reg_def R_V3b(SOC, SOC, Op_RegF, 3, V3->as_VMReg()->next(1)); +reg_def R_V4b(SOC, SOC, Op_RegF, 4, V4->as_VMReg()->next(1)); +reg_def R_V5b(SOC, SOC, Op_RegF, 5, V5->as_VMReg()->next(1)); +reg_def R_V6b(SOC, SOC, Op_RegF, 6, V6->as_VMReg()->next(1)); +reg_def R_V7b(SOC, SOC, Op_RegF, 7, V7->as_VMReg()->next(1)); +reg_def R_V8b(SOC, SOC, Op_RegF, 255, V8->as_VMReg()->next(1)); +reg_def R_V9b(SOC, SOC, Op_RegF, 9, V9->as_VMReg()->next(1)); +reg_def R_V10b(SOC, SOC, Op_RegF, 10, V10->as_VMReg()->next(1)); +reg_def R_V11b(SOC, SOC, Op_RegF, 11, V11->as_VMReg()->next(1)); +reg_def R_V12b(SOC, SOC, Op_RegF, 12, V12->as_VMReg()->next(1)); +reg_def R_V13b(SOC, SOC, Op_RegF, 13, V13->as_VMReg()->next(1)); +reg_def R_V14b(SOC, SOC, Op_RegF, 14, V14->as_VMReg()->next(1)); +reg_def R_V15b(SOC, SOC, Op_RegF, 15, V15->as_VMReg()->next(1)); +reg_def R_V16b(SOC, SOC, Op_RegF, 16, V16->as_VMReg()->next(1)); +reg_def R_V17b(SOC, SOC, Op_RegF, 17, V17->as_VMReg()->next(1)); +reg_def R_V18b(SOC, SOC, Op_RegF, 18, V18->as_VMReg()->next(1)); +reg_def R_V19b(SOC, SOC, Op_RegF, 19, V19->as_VMReg()->next(1)); +reg_def R_V20b(SOC, SOC, Op_RegF, 20, V20->as_VMReg()->next(1)); +reg_def R_V21b(SOC, SOC, Op_RegF, 21, V21->as_VMReg()->next(1)); +reg_def R_V22b(SOC, SOC, Op_RegF, 22, V22->as_VMReg()->next(1)); +reg_def R_V23b(SOC, SOC, Op_RegF, 23, V23->as_VMReg()->next(1)); +reg_def R_V24b(SOC, SOC, Op_RegF, 24, V24->as_VMReg()->next(1)); +reg_def R_V25b(SOC, SOC, Op_RegF, 25, V25->as_VMReg()->next(1)); +reg_def R_V26b(SOC, SOC, Op_RegF, 26, V26->as_VMReg()->next(1)); +reg_def R_V27b(SOC, SOC, Op_RegF, 27, V27->as_VMReg()->next(1)); +reg_def R_V28b(SOC, SOC, Op_RegF, 28, V28->as_VMReg()->next(1)); +reg_def R_V29b(SOC, SOC, Op_RegF, 29, V29->as_VMReg()->next(1)); +reg_def R_V30b(SOC, SOC, Op_RegD, 30, V30->as_VMReg()->next(1)); +reg_def R_V31b(SOC, SOC, Op_RegF, 31, V31->as_VMReg()->next(1)); + +reg_def R_V0c(SOC, SOC, Op_RegF, 0, V0->as_VMReg()->next(2)); +reg_def R_V1c(SOC, SOC, Op_RegF, 1, V1->as_VMReg()->next(2)); +reg_def R_V2c(SOC, SOC, Op_RegF, 2, V2->as_VMReg()->next(2)); +reg_def R_V3c(SOC, SOC, Op_RegF, 3, V3->as_VMReg()->next(2)); +reg_def R_V4c(SOC, SOC, Op_RegF, 4, V4->as_VMReg()->next(2)); +reg_def R_V5c(SOC, SOC, Op_RegF, 5, V5->as_VMReg()->next(2)); +reg_def R_V6c(SOC, SOC, Op_RegF, 6, V6->as_VMReg()->next(2)); +reg_def R_V7c(SOC, SOC, Op_RegF, 7, V7->as_VMReg()->next(2)); +reg_def R_V8c(SOC, SOC, Op_RegF, 8, V8->as_VMReg()->next(2)); +reg_def R_V9c(SOC, SOC, Op_RegF, 9, V9->as_VMReg()->next(2)); +reg_def R_V10c(SOC, SOC, Op_RegF, 10, V10->as_VMReg()->next(2)); +reg_def R_V11c(SOC, SOC, Op_RegF, 11, V11->as_VMReg()->next(2)); +reg_def R_V12c(SOC, SOC, Op_RegF, 12, V12->as_VMReg()->next(2)); +reg_def R_V13c(SOC, SOC, Op_RegF, 13, V13->as_VMReg()->next(2)); +reg_def R_V14c(SOC, SOC, Op_RegF, 14, V14->as_VMReg()->next(2)); +reg_def R_V15c(SOC, SOC, Op_RegF, 15, V15->as_VMReg()->next(2)); +reg_def R_V16c(SOC, SOC, Op_RegF, 16, V16->as_VMReg()->next(2)); +reg_def R_V17c(SOC, SOC, Op_RegF, 17, V17->as_VMReg()->next(2)); +reg_def R_V18c(SOC, SOC, Op_RegF, 18, V18->as_VMReg()->next(2)); +reg_def R_V19c(SOC, SOC, Op_RegF, 19, V19->as_VMReg()->next(2)); +reg_def R_V20c(SOC, SOC, Op_RegF, 20, V20->as_VMReg()->next(2)); +reg_def R_V21c(SOC, SOC, Op_RegF, 21, V21->as_VMReg()->next(2)); +reg_def R_V22c(SOC, SOC, Op_RegF, 22, V22->as_VMReg()->next(2)); +reg_def R_V23c(SOC, SOC, Op_RegF, 23, V23->as_VMReg()->next(2)); +reg_def R_V24c(SOC, SOC, Op_RegF, 24, V24->as_VMReg()->next(2)); +reg_def R_V25c(SOC, SOC, Op_RegF, 25, V25->as_VMReg()->next(2)); +reg_def R_V26c(SOC, SOC, Op_RegF, 26, V26->as_VMReg()->next(2)); +reg_def R_V27c(SOC, SOC, Op_RegF, 27, V27->as_VMReg()->next(2)); +reg_def R_V28c(SOC, SOC, Op_RegF, 28, V28->as_VMReg()->next(2)); +reg_def R_V29c(SOC, SOC, Op_RegF, 29, V29->as_VMReg()->next(2)); +reg_def R_V30c(SOC, SOC, Op_RegF, 30, V30->as_VMReg()->next(2)); +reg_def R_V31c(SOC, SOC, Op_RegF, 31, V31->as_VMReg()->next(2)); + +reg_def R_V0d(SOC, SOC, Op_RegF, 0, V0->as_VMReg()->next(3)); +reg_def R_V1d(SOC, SOC, Op_RegF, 1, V1->as_VMReg()->next(3)); +reg_def R_V2d(SOC, SOC, Op_RegF, 2, V2->as_VMReg()->next(3)); +reg_def R_V3d(SOC, SOC, Op_RegF, 3, V3->as_VMReg()->next(3)); +reg_def R_V4d(SOC, SOC, Op_RegF, 4, V4->as_VMReg()->next(3)); +reg_def R_V5d(SOC, SOC, Op_RegF, 5, V5->as_VMReg()->next(3)); +reg_def R_V6d(SOC, SOC, Op_RegF, 6, V6->as_VMReg()->next(3)); +reg_def R_V7d(SOC, SOC, Op_RegF, 7, V7->as_VMReg()->next(3)); +reg_def R_V8d(SOC, SOC, Op_RegF, 8, V8->as_VMReg()->next(3)); +reg_def R_V9d(SOC, SOC, Op_RegF, 9, V9->as_VMReg()->next(3)); +reg_def R_V10d(SOC, SOC, Op_RegF, 10, V10->as_VMReg()->next(3)); +reg_def R_V11d(SOC, SOC, Op_RegF, 11, V11->as_VMReg()->next(3)); +reg_def R_V12d(SOC, SOC, Op_RegF, 12, V12->as_VMReg()->next(3)); +reg_def R_V13d(SOC, SOC, Op_RegF, 13, V13->as_VMReg()->next(3)); +reg_def R_V14d(SOC, SOC, Op_RegF, 14, V14->as_VMReg()->next(3)); +reg_def R_V15d(SOC, SOC, Op_RegF, 15, V15->as_VMReg()->next(3)); +reg_def R_V16d(SOC, SOC, Op_RegF, 16, V16->as_VMReg()->next(3)); +reg_def R_V17d(SOC, SOC, Op_RegF, 17, V17->as_VMReg()->next(3)); +reg_def R_V18d(SOC, SOC, Op_RegF, 18, V18->as_VMReg()->next(3)); +reg_def R_V19d(SOC, SOC, Op_RegF, 19, V19->as_VMReg()->next(3)); +reg_def R_V20d(SOC, SOC, Op_RegF, 20, V20->as_VMReg()->next(3)); +reg_def R_V21d(SOC, SOC, Op_RegF, 21, V21->as_VMReg()->next(3)); +reg_def R_V22d(SOC, SOC, Op_RegF, 22, V22->as_VMReg()->next(3)); +reg_def R_V23d(SOC, SOC, Op_RegF, 23, V23->as_VMReg()->next(3)); +reg_def R_V24d(SOC, SOC, Op_RegF, 24, V24->as_VMReg()->next(3)); +reg_def R_V25d(SOC, SOC, Op_RegF, 25, V25->as_VMReg()->next(3)); +reg_def R_V26d(SOC, SOC, Op_RegF, 26, V26->as_VMReg()->next(3)); +reg_def R_V27d(SOC, SOC, Op_RegF, 27, V27->as_VMReg()->next(3)); +reg_def R_V28d(SOC, SOC, Op_RegF, 28, V28->as_VMReg()->next(3)); +reg_def R_V29d(SOC, SOC, Op_RegF, 29, V29->as_VMReg()->next(3)); +reg_def R_V30d(SOC, SOC, Op_RegF, 30, V30->as_VMReg()->next(3)); +reg_def R_V31d(SOC, SOC, Op_RegF, 31, V31->as_VMReg()->next(3)); + +// ---------------------------- +// Special Registers +// Condition Codes Flag Registers +reg_def APSR (SOC, SOC, Op_RegFlags, 255, VMRegImpl::Bad()); +reg_def FPSCR(SOC, SOC, Op_RegFlags, 255, VMRegImpl::Bad()); + +// ---------------------------- +// Specify the enum values for the registers. These enums are only used by the +// OptoReg "class". We can convert these enum values at will to VMReg when needed +// for visibility to the rest of the vm. The order of this enum influences the +// register allocator so having the freedom to set this order and not be stuck +// with the order that is natural for the rest of the vm is worth it. + +// Quad vector must be aligned here, so list them first. +alloc_class fprs( + R_V8, R_V8b, R_V8c, R_V8d, R_V9, R_V9b, R_V9c, R_V9d, + R_V10, R_V10b, R_V10c, R_V10d, R_V11, R_V11b, R_V11c, R_V11d, + R_V12, R_V12b, R_V12c, R_V12d, R_V13, R_V13b, R_V13c, R_V13d, + R_V14, R_V14b, R_V14c, R_V14d, R_V15, R_V15b, R_V15c, R_V15d, + R_V16, R_V16b, R_V16c, R_V16d, R_V17, R_V17b, R_V17c, R_V17d, + R_V18, R_V18b, R_V18c, R_V18d, R_V19, R_V19b, R_V19c, R_V19d, + R_V20, R_V20b, R_V20c, R_V20d, R_V21, R_V21b, R_V21c, R_V21d, + R_V22, R_V22b, R_V22c, R_V22d, R_V23, R_V23b, R_V23c, R_V23d, + R_V24, R_V24b, R_V24c, R_V24d, R_V25, R_V25b, R_V25c, R_V25d, + R_V26, R_V26b, R_V26c, R_V26d, R_V27, R_V27b, R_V27c, R_V27d, + R_V28, R_V28b, R_V28c, R_V28d, R_V29, R_V29b, R_V29c, R_V29d, + R_V30, R_V30b, R_V30c, R_V30d, R_V31, R_V31b, R_V31c, R_V31d, + R_V0, R_V0b, R_V0c, R_V0d, R_V1, R_V1b, R_V1c, R_V1d, + R_V2, R_V2b, R_V2c, R_V2d, R_V3, R_V3b, R_V3c, R_V3d, + R_V4, R_V4b, R_V4c, R_V4d, R_V5, R_V5b, R_V5c, R_V5d, + R_V6, R_V6b, R_V6c, R_V6d, R_V7, R_V7b, R_V7c, R_V7d +); + +// Need double-register alignment here. +// We are already quad-register aligned because of vectors above. +alloc_class gprs( + R_R0, R_R0x, R_R1, R_R1x, R_R2, R_R2x, R_R3, R_R3x, + R_R4, R_R4x, R_R5, R_R5x, R_R6, R_R6x, R_R7, R_R7x, + R_R8, R_R8x, R_R9, R_R9x, R_R10, R_R10x, R_R11, R_R11x, + R_R12, R_R12x, R_R13, R_R13x, R_R14, R_R14x, R_R15, R_R15x, + R_R16, R_R16x, R_R17, R_R17x, R_R18, R_R18x, R_R19, R_R19x, + R_R20, R_R20x, R_R21, R_R21x, R_R22, R_R22x, R_R23, R_R23x, + R_R24, R_R24x, R_R25, R_R25x, R_R26, R_R26x, R_R27, R_R27x, + R_R28, R_R28x, R_R29, R_R29x, R_R30, R_R30x +); +// Continuing with double-reigister alignment... +alloc_class chunk2(APSR, FPSCR); +alloc_class chunk3(R_SP, R_SPx); +alloc_class chunk4(R_ZR, R_ZRx); + +//----------Architecture Description Register Classes-------------------------- +// Several register classes are automatically defined based upon information in +// this architecture description. +// 1) reg_class inline_cache_reg ( as defined in frame section ) +// 2) reg_class interpreter_method_oop_reg ( as defined in frame section ) +// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) +// + +// ---------------------------- +// Integer Register Classes +// ---------------------------- +reg_class int_reg_all(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, + R_R8, R_R9, R_R10, R_R11, R_R12, R_R13, R_R14, R_R15, + R_R16, R_R17, R_R18, R_R19, R_R20, R_R21, R_R22, R_R23, + R_R24, R_R25, R_R26, R_R27, R_R28, R_R29, R_R30 +); + +// Exclusions from i_reg: +// SP (R31) +// Rthread/R28: reserved by HotSpot to the TLS register (invariant within Java) +reg_class int_reg %{ + return _INT_REG_mask; +%} +reg_class ptr_reg %{ + return _PTR_REG_mask; +%} +reg_class vectorx_reg %{ + return _VECTORX_REG_mask; +%} + +reg_class R0_regI(R_R0); +reg_class R1_regI(R_R1); +reg_class R2_regI(R_R2); +reg_class R3_regI(R_R3); +//reg_class R12_regI(R_R12); + +// ---------------------------- +// Pointer Register Classes +// ---------------------------- + +// Special class for storeP instructions, which can store SP or RPC to TLS. +// It is also used for memory addressing, allowing direct TLS addressing. + +reg_class sp_ptr_reg %{ + return _SP_PTR_REG_mask; +%} + +reg_class store_reg %{ + return _STR_REG_mask; +%} + +reg_class store_ptr_reg %{ + return _STR_PTR_REG_mask; +%} + +reg_class spillP_reg %{ + return _SPILLP_REG_mask; +%} + +// Other special pointer regs +reg_class R0_regP(R_R0, R_R0x); +reg_class R1_regP(R_R1, R_R1x); +reg_class R2_regP(R_R2, R_R2x); +reg_class Rexception_regP(R_R19, R_R19x); +reg_class Ricklass_regP(R_R8, R_R8x); +reg_class Rmethod_regP(R_R27, R_R27x); + +reg_class Rthread_regP(R_R28, R_R28x); +reg_class IP_regP(R_R16, R_R16x); +#define RtempRegP IPRegP +reg_class LR_regP(R_R30, R_R30x); + +reg_class SP_regP(R_SP, R_SPx); +reg_class FP_regP(R_R29, R_R29x); + +reg_class ZR_regP(R_ZR, R_ZRx); +reg_class ZR_regI(R_ZR); + +// ---------------------------- +// Long Register Classes +// ---------------------------- +reg_class long_reg %{ return _PTR_REG_mask; %} +// for ldrexd, strexd: first reg of pair must be even +reg_class long_reg_align %{ return LONG_REG_mask(); %} + +reg_class R0_regL(R_R0,R_R0x); // arg 1 or return value + +// ---------------------------- +// Special Class for Condition Code Flags Register +reg_class int_flags(APSR); +reg_class float_flags(FPSCR); + + +// ---------------------------- +// Float Point Register Classes +// ---------------------------- +reg_class sflt_reg_0( + R_V0, R_V1, R_V2, R_V3, R_V4, R_V5, R_V6, R_V7, + R_V8, R_V9, R_V10, R_V11, R_V12, R_V13, R_V14, R_V15, + R_V16, R_V17, R_V18, R_V19, R_V20, R_V21, R_V22, R_V23, + R_V24, R_V25, R_V26, R_V27, R_V28, R_V29, R_V30, R_V31); + +reg_class sflt_reg %{ + return _SFLT_REG_mask; +%} + +reg_class dflt_low_reg %{ + return _DFLT_REG_mask; +%} + +reg_class actual_dflt_reg %{ + return _DFLT_REG_mask; +%} + +reg_class vectorx_reg_0( + R_V0, R_V1, R_V2, R_V3, R_V4, R_V5, R_V6, R_V7, + R_V8, R_V9, R_V10, R_V11, R_V12, R_V13, R_V14, R_V15, + R_V16, R_V17, R_V18, R_V19, R_V20, R_V21, R_V22, R_V23, + R_V24, R_V25, R_V26, R_V27, R_V28, R_V29, R_V30, /*R_V31,*/ + R_V0b, R_V1b, R_V2b, R_V3b, R_V4b, R_V5b, R_V6b, R_V7b, + R_V8b, R_V9b, R_V10b, R_V11b, R_V12b, R_V13b, R_V14b, R_V15b, + R_V16b, R_V17b, R_V18b, R_V19b, R_V20b, R_V21b, R_V22b, R_V23b, + R_V24b, R_V25b, R_V26b, R_V27b, R_V28b, R_V29b, R_V30b, /*R_V31b,*/ + R_V0c, R_V1c, R_V2c, R_V3c, R_V4c, R_V5c, R_V6c, R_V7c, + R_V8c, R_V9c, R_V10c, R_V11c, R_V12c, R_V13c, R_V14c, R_V15c, + R_V16c, R_V17c, R_V18c, R_V19c, R_V20c, R_V21c, R_V22c, R_V23c, + R_V24c, R_V25c, R_V26c, R_V27c, R_V28c, R_V29c, R_V30c, /*R_V31c,*/ + R_V0d, R_V1d, R_V2d, R_V3d, R_V4d, R_V5d, R_V6d, R_V7d, + R_V8d, R_V9d, R_V10d, R_V11d, R_V12d, R_V13d, R_V14d, R_V15d, + R_V16d, R_V17d, R_V18d, R_V19d, R_V20d, R_V21d, R_V22d, R_V23d, + R_V24d, R_V25d, R_V26d, R_V27d, R_V28d, R_V29d, R_V30d, /*R_V31d*/); + +reg_class Rmemcopy_reg %{ + return _RMEMCOPY_REG_mask; +%} + +%} + +source_hpp %{ + +const MachRegisterNumbers R_mem_copy_lo_num = R_V31_num; +const MachRegisterNumbers R_mem_copy_hi_num = R_V31b_num; +const FloatRegister Rmemcopy = V31; + +const MachRegisterNumbers R_hf_ret_lo_num = R_V0_num; +const MachRegisterNumbers R_hf_ret_hi_num = R_V0b_num; +const FloatRegister Rhfret = V0; + +extern OptoReg::Name R_Ricklass_num; +extern OptoReg::Name R_Rmethod_num; +extern OptoReg::Name R_tls_num; +extern OptoReg::Name R_Rheap_base_num; + +extern RegMask _INT_REG_mask; +extern RegMask _PTR_REG_mask; +extern RegMask _SFLT_REG_mask; +extern RegMask _DFLT_REG_mask; +extern RegMask _VECTORX_REG_mask; +extern RegMask _RMEMCOPY_REG_mask; +extern RegMask _SP_PTR_REG_mask; +extern RegMask _SPILLP_REG_mask; +extern RegMask _STR_REG_mask; +extern RegMask _STR_PTR_REG_mask; + +#define LDR_DOUBLE "LDR_D" +#define LDR_FLOAT "LDR_S" +#define STR_DOUBLE "STR_D" +#define STR_FLOAT "STR_S" +#define STR_64 "STR" +#define LDR_64 "LDR" +#define STR_32 "STR_W" +#define LDR_32 "LDR_W" +#define MOV_DOUBLE "FMOV_D" +#define MOV_FLOAT "FMOV_S" +#define FMSR "FMOV_SW" +#define FMRS "FMOV_WS" +#define LDREX "ldxr " +#define STREX "stxr " + +#define str_64 str +#define ldr_64 ldr +#define ldr_32 ldr_w +#define ldrex ldxr +#define strex stxr + +#define fmsr fmov_sw +#define fmrs fmov_ws +#define fconsts fmov_s +#define fconstd fmov_d + +static inline bool is_uimm12(jlong imm, int shift) { + return Assembler::is_unsigned_imm_in_range(imm, 12, shift); +} + +static inline bool is_memoryD(int offset) { + int scale = 3; // LogBytesPerDouble + return is_uimm12(offset, scale); +} + +static inline bool is_memoryfp(int offset) { + int scale = LogBytesPerInt; // include 32-bit word accesses + return is_uimm12(offset, scale); +} + +static inline bool is_memoryI(int offset) { + int scale = LogBytesPerInt; + return is_uimm12(offset, scale); +} + +static inline bool is_memoryP(int offset) { + int scale = LogBytesPerWord; + return is_uimm12(offset, scale); +} + +static inline bool is_memoryHD(int offset) { + int scale = LogBytesPerInt; // include 32-bit word accesses + return is_uimm12(offset, scale); +} + +uintx limmL_low(uintx imm, int n); + +static inline bool Xis_aimm(int imm) { + return Assembler::ArithmeticImmediate(imm).is_encoded(); +} + +static inline bool is_aimm(intptr_t imm) { + return Assembler::ArithmeticImmediate(imm).is_encoded(); +} + +static inline bool is_limmL(uintptr_t imm) { + return Assembler::LogicalImmediate(imm).is_encoded(); +} + +static inline bool is_limmL_low(intptr_t imm, int n) { + return is_limmL(limmL_low(imm, n)); +} + +static inline bool is_limmI(jint imm) { + return Assembler::LogicalImmediate(imm, true).is_encoded(); +} + +static inline uintx limmI_low(jint imm, int n) { + return limmL_low(imm, n); +} + +static inline bool is_limmI_low(jint imm, int n) { + return is_limmL_low(imm, n); +} + +%} + +source %{ + +// Given a register encoding, produce a Integer Register object +static Register reg_to_register_object(int register_encoding) { + assert(R0->encoding() == R_R0_enc && R30->encoding() == R_R30_enc, "right coding"); + assert(Rthread->encoding() == R_R28_enc, "right coding"); + assert(SP->encoding() == R_SP_enc, "right coding"); + return as_Register(register_encoding); +} + +// Given a register encoding, produce a single-precision Float Register object +static FloatRegister reg_to_FloatRegister_object(int register_encoding) { + assert(V0->encoding() == R_V0_enc && V31->encoding() == R_V31_enc, "right coding"); + return as_FloatRegister(register_encoding); +} + +RegMask _INT_REG_mask; +RegMask _PTR_REG_mask; +RegMask _SFLT_REG_mask; +RegMask _DFLT_REG_mask; +RegMask _VECTORX_REG_mask; +RegMask _RMEMCOPY_REG_mask; +RegMask _SP_PTR_REG_mask; +RegMask _SPILLP_REG_mask; +RegMask _STR_REG_mask; +RegMask _STR_PTR_REG_mask; + +OptoReg::Name R_Ricklass_num = -1; +OptoReg::Name R_Rmethod_num = -1; +OptoReg::Name R_tls_num = -1; +OptoReg::Name R_Rtemp_num = -1; +OptoReg::Name R_Rheap_base_num = -1; + +static int mov_oop_size = -1; + +#ifdef ASSERT +static bool same_mask(const RegMask &a, const RegMask &b) { + RegMask a_sub_b = a; a_sub_b.SUBTRACT(b); + RegMask b_sub_a = b; b_sub_a.SUBTRACT(a); + return a_sub_b.Size() == 0 && b_sub_a.Size() == 0; +} +#endif + +void Compile::pd_compiler2_init() { + + R_Ricklass_num = OptoReg::as_OptoReg(Ricklass->as_VMReg()); + R_Rmethod_num = OptoReg::as_OptoReg(Rmethod->as_VMReg()); + R_tls_num = OptoReg::as_OptoReg(Rthread->as_VMReg()); + R_Rtemp_num = OptoReg::as_OptoReg(Rtemp->as_VMReg()); + R_Rheap_base_num = OptoReg::as_OptoReg(Rheap_base->as_VMReg()); + + _INT_REG_mask = _INT_REG_ALL_mask; + _INT_REG_mask.Remove(R_tls_num); + _INT_REG_mask.Remove(R_SP_num); + if (UseCompressedOops) { + _INT_REG_mask.Remove(R_Rheap_base_num); + } + // Remove Rtemp because safepoint poll can trash it + // (see SharedRuntime::generate_handler_blob) + _INT_REG_mask.Remove(R_Rtemp_num); + + _PTR_REG_mask = _INT_REG_mask; + _PTR_REG_mask.smear_to_sets(2); + + // STR_REG = INT_REG+ZR + // SPILLP_REG = INT_REG+SP + // SP_PTR_REG = INT_REG+SP+TLS + _STR_REG_mask = _INT_REG_mask; + _SP_PTR_REG_mask = _STR_REG_mask; + _STR_REG_mask.Insert(R_ZR_num); + _SP_PTR_REG_mask.Insert(R_SP_num); + _SPILLP_REG_mask = _SP_PTR_REG_mask; + _SP_PTR_REG_mask.Insert(R_tls_num); + _STR_PTR_REG_mask = _STR_REG_mask; + _STR_PTR_REG_mask.smear_to_sets(2); + _SP_PTR_REG_mask.smear_to_sets(2); + _SPILLP_REG_mask.smear_to_sets(2); + + _RMEMCOPY_REG_mask = RegMask(R_mem_copy_lo_num); +assert(OptoReg::as_OptoReg(Rmemcopy->as_VMReg()) == R_mem_copy_lo_num, "!"); + + _SFLT_REG_mask = _SFLT_REG_0_mask; + _SFLT_REG_mask.SUBTRACT(_RMEMCOPY_REG_mask); + _DFLT_REG_mask = _SFLT_REG_mask; + _DFLT_REG_mask.smear_to_sets(2); + _VECTORX_REG_mask = _SFLT_REG_mask; + _VECTORX_REG_mask.smear_to_sets(4); + assert(same_mask(_VECTORX_REG_mask, _VECTORX_REG_0_mask), "!"); + +#ifdef ASSERT + RegMask r((RegMask *)&SFLT_REG_mask()); + r.smear_to_sets(2); + assert(same_mask(r, _DFLT_REG_mask), "!"); +#endif + + if (VM_Version::prefer_moves_over_load_literal()) { + mov_oop_size = 4; + } else { + mov_oop_size = 1; + } + + assert(Matcher::interpreter_method_oop_reg_encode() == Rmethod->encoding(), "should be"); +} + +uintx limmL_low(uintx imm, int n) { + // 1: try as is + if (is_limmL(imm)) { + return imm; + } + // 2: try low bits + all 0's + uintx imm0 = imm & right_n_bits(n); + if (is_limmL(imm0)) { + return imm0; + } + // 3: try low bits + all 1's + uintx imm1 = imm0 | left_n_bits(BitsPerWord - n); + if (is_limmL(imm1)) { + return imm1; + } +#if 0 + // 4: try low bits replicated + int field = 1 << log2_intptr(n + n - 1); + assert(field >= n, "!"); + assert(field / n == 1, "!"); + intptr_t immr = immx; + while (field < BitsPerWord) { + intrptr_t bits = immr & right_n_bits(field); + immr = bits | (bits << field); + field = field << 1; + } + // replicate at power-of-2 boundary + if (is_limmL(immr)) { + return immr; + } +#endif + return imm; +} + +// Convert the raw encoding form into the form expected by the +// constructor for Address. +Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { + RelocationHolder rspec; + if (disp_reloc != relocInfo::none) { + rspec = Relocation::spec_simple(disp_reloc); + } + + Register rbase = (base == 0xff) ? SP : as_Register(base); + if (index != 0xff) { + Register rindex = as_Register(index); + if (disp == 0x7fffffff) { // special value to indicate sign-extend + Address madr(rbase, rindex, ex_sxtw, scale); + madr._rspec = rspec; + return madr; + } else { + assert(disp == 0, "unsupported"); + Address madr(rbase, rindex, ex_lsl, scale); + madr._rspec = rspec; + return madr; + } + } else { + assert(scale == 0, "not supported"); + Address madr(rbase, disp); + madr._rspec = rspec; + return madr; + } +} + +// Location of compiled Java return values. Same as C +OptoRegPair c2::return_value(int ideal_reg) { + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num, R_R0_num, R_hf_ret_lo_num, R_hf_ret_lo_num, R_R0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, R_R0x_num, OptoReg::Bad, R_hf_ret_hi_num, R_R0x_num }; + return OptoRegPair( hi[ideal_reg], lo[ideal_reg]); +} + +// !!!!! Special hack to get all type of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. + +int MachCallStaticJavaNode::ret_addr_offset() { + bool far = (_method == NULL) ? maybe_far_call(this) : !cache_reachable(); + bool patchable = _method != NULL; + int call_size = MacroAssembler::call_size(entry_point(), far, patchable); + return (call_size + (_method_handle_invoke ? 1 : 0)) * NativeInstruction::instruction_size; +} + +int MachCallDynamicJavaNode::ret_addr_offset() { + bool far = !cache_reachable(); + int call_size = MacroAssembler::call_size(entry_point(), far, true); + return (mov_oop_size + call_size) * NativeInstruction::instruction_size; +} + +int MachCallRuntimeNode::ret_addr_offset() { + int call_size = 0; + // TODO: check if Leaf nodes also need this + if (!is_MachCallLeaf()) { + // adr $temp, ret_addr + // str $temp, [SP + last_java_pc] + call_size += 2; + } + // bl or mov_slow; blr + bool far = maybe_far_call(this); + call_size += MacroAssembler::call_size(entry_point(), far, false); + return call_size * NativeInstruction::instruction_size; +} + +%} + +// The intptr_t operand types, defined by textual substitution. +// (Cf. opto/type.hpp. This lets us avoid many, many other ifdefs.) +#define immX immL +#define iRegX iRegL +#define aimmX aimmL +#define limmX limmL +#define immX9 immL9 +#define LShiftX LShiftL +#define shimmX immU6 + +#define store_RegLd store_RegL + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(1); // Required cost attribute + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +//----------Simple Operands---------------------------------------------------- +// Immediate Operands + +// Integer Immediate: 9-bit (including sign bit), so same as immI8? +// FIXME: simm9 allows -256, but immI8 doesn't... +operand simm9() %{ + predicate(Assembler::is_imm_in_range(n->get_int(), 9, 0)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + + +operand uimm12() %{ + predicate(Assembler::is_unsigned_imm_in_range(n->get_int(), 12, 0)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand aimmP() %{ + predicate(n->get_ptr() == 0 || (is_aimm(n->get_ptr()) && ((ConPNode*)n)->type()->reloc() == relocInfo::none)); + match(ConP); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 12-bit - for addressing mode +operand immL12() %{ + predicate((-4096 < n->get_long()) && (n->get_long() < 4096)); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 9-bit - for addressing mode +operand immL9() %{ + predicate((-256 <= n->get_long()) && (n->get_long() < 256)); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immIMov() %{ + predicate(n->get_int() >> 16 == 0); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immLMov() %{ + predicate(n->get_long() >> 16 == 0); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immUL12() %{ + predicate(is_uimm12(n->get_long(), 0)); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immUL12x2() %{ + predicate(is_uimm12(n->get_long(), 1)); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immUL12x4() %{ + predicate(is_uimm12(n->get_long(), 2)); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immUL12x8() %{ + predicate(is_uimm12(n->get_long(), 3)); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immUL12x16() %{ + predicate(is_uimm12(n->get_long(), 4)); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Used for long shift +operand immU6() %{ + predicate(0 <= n->get_int() && (n->get_int() <= 63)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Used for register extended shift +operand immI_0_4() %{ + predicate(0 <= n->get_int() && (n->get_int() <= 4)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Compressed Pointer Register +operand iRegN() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegN); + match(ZRRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand SPRegP() %{ + constraint(ALLOC_IN_RC(SP_regP)); + match(RegP); + + format %{ %} + interface(REG_INTER); +%} + +operand ZRRegP() %{ + constraint(ALLOC_IN_RC(ZR_regP)); + match(RegP); + + format %{ %} + interface(REG_INTER); +%} + +operand ZRRegL() %{ + constraint(ALLOC_IN_RC(ZR_regP)); + match(RegL); + + format %{ %} + interface(REG_INTER); +%} + +operand ZRRegI() %{ + constraint(ALLOC_IN_RC(ZR_regI)); + match(RegI); + + format %{ %} + interface(REG_INTER); +%} + +operand ZRRegN() %{ + constraint(ALLOC_IN_RC(ZR_regI)); + match(RegN); + + format %{ %} + interface(REG_INTER); +%} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/assembler_arm.cpp 2016-12-02 11:17:09.337628395 -0500 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "ci/ciEnv.hpp" +#include "gc/shared/cardTableModRefBS.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateInterpreterGenerator.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jvm_misc.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/hashtable.hpp" +#include "utilities/macros.hpp" +#if INCLUDE_ALL_GCS +#include "gc/g1/g1CollectedHeap.inline.hpp" +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#include "gc/g1/heapRegion.hpp" +#endif // INCLUDE_ALL_GCS + +int AbstractAssembler::code_fill_byte() { + return 0xff; // illegal instruction 0xffffffff +} + +#ifdef ASSERT +bool AbstractAssembler::pd_check_instruction_mark() { return false; } +#endif --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/assembler_arm.hpp 2016-12-02 11:17:14.429917164 -0500 @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_ASSEMBLER_ARM_HPP +#define CPU_ARM_VM_ASSEMBLER_ARM_HPP + +#include "utilities/macros.hpp" + +enum AsmCondition { + eq, ne, cs, cc, mi, pl, vs, vc, + hi, ls, ge, lt, gt, le, al, nv, + number_of_conditions, + // alternative names + hs = cs, + lo = cc +}; + +enum AsmShift { + lsl, lsr, asr, ror +}; + +#ifdef AARCH64 +enum AsmExtendOp { + ex_uxtb, ex_uxth, ex_uxtw, ex_uxtx, + ex_sxtb, ex_sxth, ex_sxtw, ex_sxtx, + + ex_lsl = ex_uxtx +}; +#endif + +enum AsmOffset { +#ifdef AARCH64 + basic_offset = 0b00, + pre_indexed = 0b11, + post_indexed = 0b01 +#else + basic_offset = 1 << 24, + pre_indexed = 1 << 24 | 1 << 21, + post_indexed = 0 +#endif +}; + + +#ifndef AARCH64 +enum AsmWriteback { + no_writeback, + writeback +}; + +enum AsmOffsetOp { + sub_offset = 0, + add_offset = 1 +}; +#endif + + +// ARM Addressing Modes 2 and 3 - Load and store +class Address VALUE_OBJ_CLASS_SPEC { + private: + Register _base; + Register _index; + int _disp; + AsmOffset _mode; + RelocationHolder _rspec; + int _shift_imm; +#ifdef AARCH64 + AsmExtendOp _extend; +#else + AsmShift _shift; + AsmOffsetOp _offset_op; + + static inline int abs(int x) { return x < 0 ? -x : x; } + static inline int up (int x) { return x < 0 ? 0 : 1; } +#endif + +#ifdef AARCH64 + static const AsmExtendOp LSL = ex_lsl; +#else + static const AsmShift LSL = lsl; +#endif + + public: + Address() : _base(noreg) {} + + Address(Register rn, int offset = 0, AsmOffset mode = basic_offset) { + _base = rn; + _index = noreg; + _disp = offset; + _mode = mode; + _shift_imm = 0; +#ifdef AARCH64 + _extend = ex_lsl; +#else + _shift = lsl; + _offset_op = add_offset; +#endif + } + +#ifdef ASSERT + Address(Register rn, ByteSize offset, AsmOffset mode = basic_offset) { + _base = rn; + _index = noreg; + _disp = in_bytes(offset); + _mode = mode; + _shift_imm = 0; +#ifdef AARCH64 + _extend = ex_lsl; +#else + _shift = lsl; + _offset_op = add_offset; +#endif + } +#endif + +#ifdef AARCH64 + Address(Register rn, Register rm, AsmExtendOp extend = ex_lsl, int shift_imm = 0) { + assert ((extend == ex_uxtw) || (extend == ex_lsl) || (extend == ex_sxtw) || (extend == ex_sxtx), "invalid extend for address mode"); + assert ((0 <= shift_imm) && (shift_imm <= 4), "shift amount is out of range"); + _base = rn; + _index = rm; + _disp = 0; + _mode = basic_offset; + _extend = extend; + _shift_imm = shift_imm; + } +#else + Address(Register rn, Register rm, AsmShift shift = lsl, + int shift_imm = 0, AsmOffset mode = basic_offset, + AsmOffsetOp offset_op = add_offset) { + _base = rn; + _index = rm; + _disp = 0; + _shift = shift; + _shift_imm = shift_imm; + _mode = mode; + _offset_op = offset_op; + } + + Address(Register rn, RegisterOrConstant offset, AsmShift shift = lsl, + int shift_imm = 0) { + _base = rn; + if (offset.is_constant()) { + _index = noreg; + { + int off = (int) offset.as_constant(); + if (shift_imm != 0) { + assert(shift == lsl,"shift not yet encoded"); + off = off << shift_imm; + } + _disp = off; + } + _shift = lsl; + _shift_imm = 0; + } else { + _index = offset.as_register(); + _disp = 0; + _shift = shift; + _shift_imm = shift_imm; + } + _mode = basic_offset; + _offset_op = add_offset; + } +#endif // AARCH64 + + // [base + index * wordSize] + static Address indexed_ptr(Register base, Register index) { + return Address(base, index, LSL, LogBytesPerWord); + } + + // [base + index * BytesPerInt] + static Address indexed_32(Register base, Register index) { + return Address(base, index, LSL, LogBytesPerInt); + } + + // [base + index * BytesPerHeapOop] + static Address indexed_oop(Register base, Register index) { + return Address(base, index, LSL, LogBytesPerHeapOop); + } + + Address plus_disp(int disp) const { + assert((disp == 0) || (_index == noreg),"can't apply an offset to a register indexed address"); + Address a = (*this); + a._disp += disp; + return a; + } + + Address rebase(Register new_base) const { + Address a = (*this); + a._base = new_base; + return a; + } + +#ifdef AARCH64 + int encoding_simd() const { + assert(_index != SP, "encoding constraint"); + assert(_disp == 0 || _mode == post_indexed, "encoding constraint"); + assert(_index == noreg || _mode == basic_offset, "encoding constraint"); + assert(_mode == basic_offset || _mode == post_indexed, "encoding constraint"); + assert(_extend == ex_lsl, "encoding constraint"); + int index; + if (_index == noreg) { + if (_mode == post_indexed) + index = 0b100 << 5 | 31; + else + index = 0; + } else { + index = 0b100 << 5 | _index->encoding(); + } + return index << 16 | _base->encoding_with_sp() << 5; + } +#else /* !AARCH64 */ + int encoding2() const { + assert(_mode == basic_offset || _base != PC, "unpredictable instruction"); + if (_index == noreg) { + assert(-4096 < _disp && _disp < 4096, "encoding constraint"); + return _mode | up(_disp) << 23 | _base->encoding() << 16 | abs(_disp); + } else { + assert(_index != PC && (_mode == basic_offset || _index != _base), "unpredictable instruction"); + assert(_disp == 0 && (_shift_imm >> 5) == 0, "encoding constraint"); + return 1 << 25 | _offset_op << 23 | _mode | _base->encoding() << 16 | + _shift_imm << 7 | _shift << 5 | _index->encoding(); + } + } + + int encoding3() const { + assert(_mode == basic_offset || _base != PC, "unpredictable instruction"); + if (_index == noreg) { + assert(-256 < _disp && _disp < 256, "encoding constraint"); + return _mode | up(_disp) << 23 | 1 << 22 | _base->encoding() << 16 | + (abs(_disp) & 0xf0) << 4 | abs(_disp) & 0x0f; + } else { + assert(_index != PC && (_mode == basic_offset || _index != _base), "unpredictable instruction"); + assert(_disp == 0 && _shift == lsl && _shift_imm == 0, "encoding constraint"); + return _mode | _offset_op << 23 | _base->encoding() << 16 | _index->encoding(); + } + } + + int encoding_ex() const { + assert(_index == noreg && _disp == 0 && _mode == basic_offset && + _base != PC, "encoding constraint"); + return _base->encoding() << 16; + } + + int encoding_vfp() const { + assert(_index == noreg && _mode == basic_offset, "encoding constraint"); + assert(-1024 < _disp && _disp < 1024 && (_disp & 3) == 0, "encoding constraint"); + return _base->encoding() << 16 | up(_disp) << 23 | abs(_disp) >> 2; + } + + int encoding_simd() const { + assert(_base != PC, "encoding constraint"); + assert(_index != PC && _index != SP, "encoding constraint"); + assert(_disp == 0, "encoding constraint"); + assert(_shift == 0, "encoding constraint"); + assert(_index == noreg || _mode == basic_offset, "encoding constraint"); + assert(_mode == basic_offset || _mode == post_indexed, "encoding constraint"); + int index; + if (_index == noreg) { + if (_mode == post_indexed) + index = 13; + else + index = 15; + } else { + index = _index->encoding(); + } + + return _base->encoding() << 16 | index; + } +#endif // !AARCH64 + + Register base() const { + return _base; + } + + Register index() const { + return _index; + } + + int disp() const { + return _disp; + } + + AsmOffset mode() const { + return _mode; + } + + int shift_imm() const { + return _shift_imm; + } + +#ifdef AARCH64 + AsmExtendOp extend() const { + return _extend; + } +#else + AsmShift shift() const { + return _shift; + } + + AsmOffsetOp offset_op() const { + return _offset_op; + } +#endif + + bool uses(Register reg) const { return _base == reg || _index == reg; } + + const relocInfo::relocType rtype() { return _rspec.type(); } + const RelocationHolder& rspec() { return _rspec; } + + // Convert the raw encoding form into the form expected by the + // constructor for Address. + static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc); +}; + +#ifdef COMPILER2 +class VFP VALUE_OBJ_CLASS_SPEC { + // Helper classes to detect whether a floating point constant can be + // encoded in a fconstd or fconsts instruction + // The conversion from the imm8, 8 bit constant, to the floating + // point value encoding is done with either: + // for single precision: imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,5):imm8<5:0>:Zeros(19) + // or + // for double precision: imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,8):imm8<5:0>:Zeros(48) + + private: + class fpnum { + public: + virtual unsigned int f_hi4() const = 0; + virtual bool f_lo_is_null() const = 0; + virtual int e() const = 0; + virtual unsigned int s() const = 0; + + inline bool can_be_imm8() const { return e() >= -3 && e() <= 4 && f_lo_is_null(); } + inline unsigned char imm8() const { int v = (s() << 7) | (((e() - 1) & 0x7) << 4) | f_hi4(); assert((v >> 8) == 0, "overflow"); return v; } + }; + + public: + class float_num : public fpnum { + public: + float_num(float v) { + _num.val = v; + } + + virtual unsigned int f_hi4() const { return (_num.bits << 9) >> (19+9); } + virtual bool f_lo_is_null() const { return (_num.bits & ((1 << 19) - 1)) == 0; } + virtual int e() const { return ((_num.bits << 1) >> (23+1)) - 127; } + virtual unsigned int s() const { return _num.bits >> 31; } + + private: + union { + float val; + unsigned int bits; + } _num; + }; + + class double_num : public fpnum { + public: + double_num(double v) { + _num.val = v; + } + + virtual unsigned int f_hi4() const { return (_num.bits << 12) >> (48+12); } + virtual bool f_lo_is_null() const { return (_num.bits & ((1LL << 48) - 1)) == 0; } + virtual int e() const { return ((_num.bits << 1) >> (52+1)) - 1023; } + virtual unsigned int s() const { return _num.bits >> 63; } + + private: + union { + double val; + unsigned long long bits; + } _num; + }; +}; +#endif + +#ifdef AARCH64 +#include "assembler_arm_64.hpp" +#else +#include "assembler_arm_32.hpp" +#endif + + +#endif // CPU_ARM_VM_ASSEMBLER_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/assembler_arm.inline.hpp 2016-12-02 11:17:19.250190509 -0500 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_ASSEMBLER_ARM_INLINE_HPP +#define CPU_ARM_VM_ASSEMBLER_ARM_INLINE_HPP + + +#endif // CPU_ARM_VM_ASSEMBLER_ARM_INLINE_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/assembler_arm_32.cpp 2016-12-02 11:17:24.370480866 -0500 @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "ci/ciEnv.hpp" +#include "gc/shared/cardTableModRefBS.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateInterpreterGenerator.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jvm_misc.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/hashtable.hpp" +#include "utilities/macros.hpp" +#if INCLUDE_ALL_GCS +#include "gc/g1/g1CollectedHeap.inline.hpp" +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#include "gc/g1/heapRegion.hpp" +#endif // INCLUDE_ALL_GCS + +#ifdef COMPILER2 +// Convert the raw encoding form into the form expected by the +// constructor for Address. +Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { + RelocationHolder rspec; + if (disp_reloc != relocInfo::none) { + rspec = Relocation::spec_simple(disp_reloc); + } + + Register rindex = as_Register(index); + if (rindex != PC) { + assert(disp == 0, "unsupported"); + Address madr(as_Register(base), rindex, lsl, scale); + madr._rspec = rspec; + return madr; + } else { + assert(scale == 0, "not supported"); + Address madr(as_Register(base), disp); + madr._rspec = rspec; + return madr; + } +} +#endif + +void AsmOperand::initialize_rotated_imm(unsigned int imm) { + for (int shift = 2; shift <= 24; shift += 2) { + if ((imm & ~(0xff << shift)) == 0) { + _encoding = 1 << 25 | (32 - shift) << 7 | imm >> shift; + return; + } + } + assert((imm & 0x0ffffff0) == 0, "too complicated constant: %d (%x)", imm, imm); + _encoding = 1 << 25 | 4 << 7 | imm >> 28 | imm << 4; +} + +bool AsmOperand::is_rotated_imm(unsigned int imm) { + if ((imm >> 8) == 0) { + return true; + } + for (int shift = 2; shift <= 24; shift += 2) { + if ((imm & ~(0xff << shift)) == 0) { + return true; + } + } + if ((imm & 0x0ffffff0) == 0) { + return true; + } + return false; +} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/assembler_arm_32.hpp 2016-12-02 11:17:29.858792094 -0500 @@ -0,0 +1,1245 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_ASSEMBLER_ARM_32_HPP +#define CPU_ARM_VM_ASSEMBLER_ARM_32_HPP + +// ARM Addressing Mode 1 - Data processing operands +class AsmOperand VALUE_OBJ_CLASS_SPEC { + private: + int _encoding; + + void initialize_rotated_imm(unsigned int imm); + + void encode(int imm_8) { + if ((imm_8 >> 8) == 0) { + _encoding = 1 << 25 | imm_8; // the most common case + } else { + initialize_rotated_imm((unsigned int)imm_8); // slow case + } + } + + void encode(Register rm, AsmShift shift, int shift_imm) { + assert((shift_imm >> 5) == 0, "encoding constraint"); + _encoding = shift_imm << 7 | shift << 5 | rm->encoding(); + } + + public: + + AsmOperand(Register reg) { + _encoding = reg->encoding(); + } + + AsmOperand(int imm_8) { + encode(imm_8); + } + +#ifdef ASSERT + AsmOperand(ByteSize bytesize_8) { + const int imm_8 = in_bytes(bytesize_8); + encode(imm_8); + } +#endif // ASSERT + + AsmOperand(Register rm, AsmShift shift, int shift_imm) { + encode(rm,shift,shift_imm); + } + + AsmOperand(Register rm, AsmShift shift, Register rs) { + assert(rm != PC && rs != PC, "unpredictable instruction"); + _encoding = rs->encoding() << 8 | shift << 5 | 1 << 4 | rm->encoding(); + } + + AsmOperand(RegisterOrConstant offset, AsmShift shift = lsl, int shift_imm = 0) { + if (offset.is_register()) { + encode(offset.as_register(), shift, shift_imm); + } else { + assert(shift == lsl,"shift type not yet encoded"); + int imm_8 = ((int)offset.as_constant()) << shift_imm; + encode(imm_8); + } + } + + int encoding() const { + return _encoding; + } + + bool is_immediate() const { + return _encoding & (1 << 25) ? true : false; + } + + Register base_register() const { + assert(!is_immediate(), "is_immediate, no base reg"); + return as_Register(_encoding & 15); + } + + static bool is_rotated_imm(unsigned int imm); +}; + + +// ARM Addressing Mode 4 - Load and store multiple +class RegisterSet VALUE_OBJ_CLASS_SPEC { + private: + int _encoding; + + RegisterSet(int encoding) { + _encoding = encoding; + } + + public: + + RegisterSet(Register reg) { + _encoding = 1 << reg->encoding(); + } + + RegisterSet() { + _encoding = 0; + } + + RegisterSet(Register first, Register last) { + assert(first < last, "encoding constraint"); + _encoding = (1 << (last->encoding() + 1)) - (1 << first->encoding()); + } + + friend RegisterSet operator | (const RegisterSet set1, const RegisterSet set2) { + assert((set1._encoding & set2._encoding) == 0, + "encoding constraint"); + return RegisterSet(set1._encoding | set2._encoding); + } + + int encoding() const { + return _encoding; + } + + bool contains(Register reg) const { + return (_encoding & (1 << reg->encoding())) != 0; + } + + // number of registers in the set + int size() const { + int count = 0; + unsigned int remaining = (unsigned int) _encoding; + while (remaining != 0) { + if ((remaining & 1) != 0) count++; + remaining >>= 1; + } + return count; + } +}; + +#if R9_IS_SCRATCHED +#define R9ifScratched RegisterSet(R9) +#else +#define R9ifScratched RegisterSet() +#endif + +// ARM Addressing Mode 5 - Load and store multiple VFP registers +class FloatRegisterSet VALUE_OBJ_CLASS_SPEC { + private: + int _encoding; + + public: + + FloatRegisterSet(FloatRegister reg) { + if (reg->hi_bit() == 0) { + _encoding = reg->hi_bits() << 12 | reg->lo_bit() << 22 | 1; + } else { + assert (reg->lo_bit() == 0, "impossible encoding"); + _encoding = reg->hi_bits() << 12 | reg->hi_bit() << 22 | 1; + } + } + + FloatRegisterSet(FloatRegister first, int count) { + assert(count >= 1, "encoding constraint"); + if (first->hi_bit() == 0) { + _encoding = first->hi_bits() << 12 | first->lo_bit() << 22 | count; + } else { + assert (first->lo_bit() == 0, "impossible encoding"); + _encoding = first->hi_bits() << 12 | first->hi_bit() << 22 | count; + } + } + + int encoding_s() const { + return _encoding; + } + + int encoding_d() const { + assert((_encoding & 0xFF) <= 16, "no more than 16 double registers" ); + return (_encoding & 0xFFFFFF00) | ((_encoding & 0xFF) << 1); + } + +}; + + +class Assembler : public AbstractAssembler { + + public: + + static const int LogInstructionSize = 2; + static const int InstructionSize = 1 << LogInstructionSize; + + static inline AsmCondition inverse(AsmCondition cond) { + assert ((cond != al) && (cond != nv), "AL and NV conditions cannot be inversed"); + return (AsmCondition)((int)cond ^ 1); + } + + // Returns true if given value can be used as immediate in arithmetic (add/sub/cmp/cmn) instructions. + static inline bool is_arith_imm_in_range(intx value) { + return AsmOperand::is_rotated_imm(value); + } + + // Arithmetic instructions + +#define F(mnemonic, opcode) \ + void mnemonic(Register rd, Register rn, AsmOperand operand, AsmCondition cond = al) { \ + emit_int32(cond << 28 | opcode << 21 | rn->encoding() << 16 | \ + rd->encoding() << 12 | operand.encoding()); \ + } \ + void mnemonic##s(Register rd, Register rn, AsmOperand operand, AsmCondition cond = al) { \ + emit_int32(cond << 28 | opcode << 21 | 1 << 20 | rn->encoding() << 16 | \ + rd->encoding() << 12 | operand.encoding()); \ + } + + F(andr, 0) + F(eor, 1) + F(sub, 2) + F(rsb, 3) + F(add, 4) + F(adc, 5) + F(sbc, 6) + F(rsc, 7) + F(orr, 12) + F(bic, 14) +#undef F + +#define F(mnemonic, opcode) \ + void mnemonic(Register rn, AsmOperand operand, AsmCondition cond = al) { \ + emit_int32(cond << 28 | opcode << 21 | 1 << 20 | rn->encoding() << 16 | \ + operand.encoding()); \ + } + + F(tst, 8) + F(teq, 9) + F(cmp, 10) + F(cmn, 11) +#undef F + +#define F(mnemonic, opcode) \ + void mnemonic(Register rd, AsmOperand operand, AsmCondition cond = al) { \ + emit_int32(cond << 28 | opcode << 21 | rd->encoding() << 12 | \ + operand.encoding()); \ + } \ + void mnemonic##s(Register rd, AsmOperand operand, AsmCondition cond = al) { \ + emit_int32(cond << 28 | opcode << 21 | 1 << 20 | rd->encoding() << 12 | \ + operand.encoding()); \ + } + + F(mov, 13) + F(mvn, 15) +#undef F + + void msr(uint fields, AsmOperand operand, AsmCondition cond = al) { + assert((operand.encoding() & (1<<25)) || ((operand.encoding() & 0xff0) == 0), "invalid addressing mode"); + emit_int32(cond << 28 | 1 << 24 | 1 << 21 | fields << 16 | 0xf << 12 | operand.encoding()); + } + + void mrs(uint fields, Register Rd, AsmCondition cond = al) { + emit_int32(cond << 28 | 1 << 24 | (fields|0xf) << 16 | (Rd->encoding() << 12)); + } + + + enum { + CPSR = 0x00, CPSR_c = 0x01, CPSR_x = 0x02, CPSR_xc = 0x03, + CPSR_s = 0x004, CPSR_sc = 0x05, CPSR_sx = 0x06, CPSR_sxc = 0x07, + CPSR_f = 0x08, CPSR_fc = 0x09, CPSR_fx = 0x0a, CPSR_fxc = 0x0b, + CPSR_fs = 0x0c, CPSR_fsc = 0x0d, CPSR_fsx = 0x0e, CPSR_fsxc = 0x0f, + SPSR = 0x40, SPSR_c = 0x41, SPSR_x = 0x42, SPSR_xc = 0x43, + SPSR_s = 0x44, SPSR_sc = 0x45, SPSR_sx = 0x46, SPSR_sxc = 0x47, + SPSR_f = 0x48, SPSR_fc = 0x49, SPSR_fx = 0x4a, SPSR_fxc = 0x4b, + SPSR_fs = 0x4c, SPSR_fsc = 0x4d, SPSR_fsx = 0x4e, SPSR_fsxc = 0x4f + }; + +#define F(mnemonic, opcode) \ + void mnemonic(Register rdlo, Register rdhi, Register rm, Register rs, \ + AsmCondition cond = al) { \ + emit_int32(cond << 28 | opcode << 21 | rdhi->encoding() << 16 | \ + rdlo->encoding() << 12 | rs->encoding() << 8 | 0x9 << 4 | rm->encoding()); \ + } \ + void mnemonic##s(Register rdlo, Register rdhi, Register rm, Register rs, \ + AsmCondition cond = al) { \ + emit_int32(cond << 28 | opcode << 21 | 1 << 20 | rdhi->encoding() << 16 | \ + rdlo->encoding() << 12 | rs->encoding() << 8 | 0x9 << 4 | rm->encoding()); \ + } + + F(umull, 4) + F(umlal, 5) + F(smull, 6) + F(smlal, 7) +#undef F + + void mul(Register rd, Register rm, Register rs, AsmCondition cond = al) { + emit_int32(cond << 28 | rd->encoding() << 16 | + rs->encoding() << 8 | 0x9 << 4 | rm->encoding()); + } + + void muls(Register rd, Register rm, Register rs, AsmCondition cond = al) { + emit_int32(cond << 28 | 1 << 20 | rd->encoding() << 16 | + rs->encoding() << 8 | 0x9 << 4 | rm->encoding()); + } + + void mla(Register rd, Register rm, Register rs, Register rn, AsmCondition cond = al) { + emit_int32(cond << 28 | 1 << 21 | rd->encoding() << 16 | + rn->encoding() << 12 | rs->encoding() << 8 | 0x9 << 4 | rm->encoding()); + } + + void mlas(Register rd, Register rm, Register rs, Register rn, AsmCondition cond = al) { + emit_int32(cond << 28 | 1 << 21 | 1 << 20 | rd->encoding() << 16 | + rn->encoding() << 12 | rs->encoding() << 8 | 0x9 << 4 | rm->encoding()); + } + + // Loads and stores + +#define F(mnemonic, l, b) \ + void mnemonic(Register rd, Address addr, AsmCondition cond = al) { \ + emit_int32(cond << 28 | 1 << 26 | b << 22 | l << 20 | \ + rd->encoding() << 12 | addr.encoding2()); \ + } + + F(ldr, 1, 0) + F(ldrb, 1, 1) + F(str, 0, 0) + F(strb, 0, 1) +#undef F + +#undef F + +#define F(mnemonic, l, sh, even) \ + void mnemonic(Register rd, Address addr, AsmCondition cond = al) { \ + assert(!even || (rd->encoding() & 1) == 0, "must be even"); \ + emit_int32(cond << 28 | l << 20 | rd->encoding() << 12 | \ + 1 << 7 | sh << 5 | 1 << 4 | addr.encoding3()); \ + } + + F(strh, 0, 1, false) + F(ldrh, 1, 1, false) + F(ldrsb, 1, 2, false) + F(ldrsh, 1, 3, false) + F(strd, 0, 3, true) + +#undef F + + void ldrd(Register rd, Address addr, AsmCondition cond = al) { + assert((rd->encoding() & 1) == 0, "must be even"); + assert(!addr.index()->is_valid() || + (addr.index()->encoding() != rd->encoding() && + addr.index()->encoding() != (rd->encoding()+1)), "encoding constraint"); + emit_int32(cond << 28 | rd->encoding() << 12 | 0xD /* 0b1101 */ << 4 | addr.encoding3()); + } + +#define F(mnemonic, l, pu) \ + void mnemonic(Register rn, RegisterSet reg_set, \ + AsmWriteback w = no_writeback, AsmCondition cond = al) { \ + assert(reg_set.encoding() != 0 && (w == no_writeback || \ + (reg_set.encoding() & (1 << rn->encoding())) == 0), \ + "unpredictable instruction"); \ + emit_int32(cond << 28 | 4 << 25 | pu << 23 | w << 21 | l << 20 | \ + rn->encoding() << 16 | reg_set.encoding()); \ + } + + F(ldmda, 1, 0) F(ldmfa, 1, 0) + F(ldmia, 1, 1) F(ldmfd, 1, 1) + F(ldmdb, 1, 2) F(ldmea, 1, 2) + F(ldmib, 1, 3) F(ldmed, 1, 3) + F(stmda, 0, 0) F(stmed, 0, 0) + F(stmia, 0, 1) F(stmea, 0, 1) + F(stmdb, 0, 2) F(stmfd, 0, 2) + F(stmib, 0, 3) F(stmfa, 0, 3) +#undef F + + void ldrex(Register rd, Address addr, AsmCondition cond = al) { + assert(rd != PC, "unpredictable instruction"); + emit_int32(cond << 28 | 0x19 << 20 | addr.encoding_ex() | + rd->encoding() << 12 | 0xf9f); + } + + void strex(Register rs, Register rd, Address addr, AsmCondition cond = al) { + assert(rd != PC && rs != PC && + rs != rd && rs != addr.base(), "unpredictable instruction"); + emit_int32(cond << 28 | 0x18 << 20 | addr.encoding_ex() | + rs->encoding() << 12 | 0xf90 | rd->encoding()); + } + + void ldrexd(Register rd, Address addr, AsmCondition cond = al) { + assert(rd != PC, "unpredictable instruction"); + emit_int32(cond << 28 | 0x1B << 20 | addr.encoding_ex() | + rd->encoding() << 12 | 0xf9f); + } + + void strexd(Register rs, Register rd, Address addr, AsmCondition cond = al) { + assert(rd != PC && rs != PC && + rs != rd && rs != addr.base(), "unpredictable instruction"); + emit_int32(cond << 28 | 0x1A << 20 | addr.encoding_ex() | + rs->encoding() << 12 | 0xf90 | rd->encoding()); + } + + void clrex() { + emit_int32(0xF << 28 | 0x57 << 20 | 0xFF << 12 | 0x01f); + } + + // Miscellaneous instructions + + void clz(Register rd, Register rm, AsmCondition cond = al) { + emit_int32(cond << 28 | 0x016f0f10 | rd->encoding() << 12 | rm->encoding()); + } + + void rev(Register rd, Register rm, AsmCondition cond = al) { + emit_int32(cond << 28 | 0x06bf0f30 | rd->encoding() << 12 | rm->encoding()); + } + + void rev16(Register rd, Register rm, AsmCondition cond = al) { + emit_int32(cond << 28 | 0x6bf0fb0 | rd->encoding() << 12 | rm->encoding()); + } + + void revsh(Register rd, Register rm, AsmCondition cond = al) { + emit_int32(cond << 28 | 0x6ff0fb0 | rd->encoding() << 12 | rm->encoding()); + } + + void rbit(Register rd, Register rm, AsmCondition cond = al) { + emit_int32(cond << 28 | 0x6ff0f30 | rd->encoding() << 12 | rm->encoding()); + } + + void pld(Address addr) { + emit_int32(0xf550f000 | addr.encoding2()); + } + + void pldw(Address addr) { + assert(VM_Version::arm_arch() >= 7 && os::is_MP(), "no pldw on this processor"); + emit_int32(0xf510f000 | addr.encoding2()); + } + + void svc(int imm_24, AsmCondition cond = al) { + assert((imm_24 >> 24) == 0, "encoding constraint"); + emit_int32(cond << 28 | 0xf << 24 | imm_24); + } + + void ubfx(Register rd, Register rn, unsigned int lsb, unsigned int width, AsmCondition cond = al) { + assert(VM_Version::arm_arch() >= 7, "no ubfx on this processor"); + assert(width > 0, "must be"); + assert(lsb < 32, "must be"); + emit_int32(cond << 28 | 0x3f << 21 | (width - 1) << 16 | rd->encoding() << 12 | + lsb << 7 | 0x5 << 4 | rn->encoding()); + } + + void uxtb(Register rd, Register rm, unsigned int rotation = 0, AsmCondition cond = al) { + assert(VM_Version::arm_arch() >= 7, "no uxtb on this processor"); + assert((rotation % 8) == 0 && (rotation <= 24), "encoding constraint"); + emit_int32(cond << 28 | 0x6e << 20 | 0xf << 16 | rd->encoding() << 12 | + (rotation >> 3) << 10 | 0x7 << 4 | rm->encoding()); + } + + // ARM Memory Barriers + // + // There are two types of memory barriers defined for the ARM processor + // DataSynchronizationBarrier and DataMemoryBarrier + // + // The Linux kernel uses the DataMemoryBarrier for all of it's + // memory barrier operations (smp_mb, smp_rmb, smp_wmb) + // + // There are two forms of each barrier instruction. + // The mcr forms are supported on armv5 and newer architectures + // + // The dmb, dsb instructions were added in armv7 + // architectures and are compatible with their mcr + // predecessors. + // + // Here are the encodings for future reference: + // + // DataSynchronizationBarrier (dsb) + // on ARMv7 - emit_int32(0xF57FF04F) + // + // on ARMv5+ - mcr p15, 0, Rtmp, c7, c10, 4 on earlier processors + // emit_int32(0xe << 28 | 0xe << 24 | 0x7 << 16 | Rtmp->encoding() << 12 | + // 0xf << 8 | 0x9 << 4 | 0xa); + // + // DataMemoryBarrier (dmb) + // on ARMv7 - emit_int32(0xF57FF05F) + // + // on ARMv5+ - mcr p15, 0, Rtmp, c7, c10, 5 on earlier processors + // emit_int32(0xe << 28 | 0xe << 24 | 0x7 << 16 | Rtmp->encoding() << 12 | + // 0xf << 8 | 0xb << 4 | 0xa); + // + + enum DMB_Opt { + DMB_all = 0xf, + DMB_st = 0xe, + }; + + void dmb(DMB_Opt opt, Register reg) { + if (VM_Version::arm_arch() >= 7) { + emit_int32(0xF57FF050 | opt); + } else { + bool preserve_tmp = (reg == noreg); + if(preserve_tmp) { + reg = Rtemp; + str(reg, Address(SP, -wordSize, pre_indexed)); + } + mov(reg, 0); + // DataMemoryBarrier + emit_int32(0xe << 28 | + 0xe << 24 | + 0x7 << 16 | + reg->encoding() << 12 | + 0xf << 8 | + 0xb << 4 | + 0xa); + if(preserve_tmp) { + ldr(reg, Address(SP, wordSize, post_indexed)); + } + } + } + + void dsb(Register reg) { + if (VM_Version::arm_arch() >= 7) { + emit_int32(0xF57FF04F); + } else { + bool preserve_tmp = (reg == noreg); + if(preserve_tmp) { + reg = Rtemp; + str(reg, Address(SP, -wordSize, pre_indexed)); + } + mov(reg, 0); + // DataSynchronizationBarrier + emit_int32(0xe << 28 | + 0xe << 24 | + 0x7 << 16 | + reg->encoding() << 12 | + 0xf << 8 | + 0x9 << 4 | + 0xa); + if(preserve_tmp) { + ldr(reg, Address(SP, wordSize, post_indexed)); + } + } + } + + +#define F(mnemonic, b) \ + void mnemonic(Register rd, Register rm, Register rn, AsmCondition cond = al) { \ + assert(rn != rm && rn != rd, "unpredictable instruction"); \ + emit_int32(cond << 28 | 0x2 << 23 | b << 22 | rn->encoding() << 16 | \ + rd->encoding() << 12 | 9 << 4 | rm->encoding()); \ + } + + F(swp, 0) + F(swpb, 1) +#undef F + + // Branches + +#define F(mnemonic, l) \ + void mnemonic(Register rm, AsmCondition cond = al) { \ + emit_int32(cond << 28 | 0x012fff10 | l << 5 | rm->encoding()); \ + } + + F(bx, 0) + F(blx, 1) +#undef F + +#define F(mnemonic, l) \ + void mnemonic(address target, AsmCondition cond = al) { \ + unsigned int offset = (unsigned int)(target - pc() - 8); \ + assert((offset & 3) == 0, "bad alignment"); \ + assert((offset >> 25) == 0 || ((int)offset >> 25) == -1, "offset is too large"); \ + emit_int32(cond << 28 | l << 24 | offset << 6 >> 8); \ + } + + F(b, 0xa) + F(bl, 0xb) +#undef F + + // ARMv7 instructions + +#define F(mnemonic, wt) \ + void mnemonic(Register rd, int imm_16, AsmCondition cond = al) { \ + assert((imm_16 >> 16) == 0, "encoding constraint"); \ + emit_int32(cond << 28 | wt << 20 | rd->encoding() << 12 | \ + (imm_16 & 0xf000) << 4 | (imm_16 & 0xfff)); \ + } + + F(movw, 0x30) + F(movt, 0x34) +#undef F + + // VFP Support + +// Checks that VFP instructions are not used in SOFTFP mode. +#ifdef __SOFTFP__ +#define CHECK_VFP_PRESENT ShouldNotReachHere() +#else +#define CHECK_VFP_PRESENT +#endif // __SOFTFP__ + + static const int single_cp_num = 0xa00; + static const int double_cp_num = 0xb00; + + // Bits P, Q, R, S collectively form the opcode +#define F(mnemonic, P, Q, R, S) \ + void mnemonic##d(FloatRegister fd, FloatRegister fn, FloatRegister fm, \ + AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fn->lo_bit() == 0 && fd->lo_bit() == 0 && fm->lo_bit() == 0, "single precision register?"); \ + emit_int32(cond << 28 | 0x7 << 25 | double_cp_num | \ + P << 23 | Q << 21 | R << 20 | S << 6 | \ + fn->hi_bits() << 16 | fn->hi_bit() << 7 | \ + fd->hi_bits() << 12 | fd->hi_bit() << 22 | \ + fm->hi_bits() | fm->hi_bit() << 5); \ + } \ + void mnemonic##s(FloatRegister fd, FloatRegister fn, FloatRegister fm, \ + AsmCondition cond = al) { \ + assert(fn->hi_bit() == 0 && fd->hi_bit() == 0 && fm->hi_bit() == 0, "double precision register?"); \ + CHECK_VFP_PRESENT; \ + emit_int32(cond << 28 | 0x7 << 25 | single_cp_num | \ + P << 23 | Q << 21 | R << 20 | S << 6 | \ + fn->hi_bits() << 16 | fn->lo_bit() << 7 | \ + fd->hi_bits() << 12 | fd->lo_bit() << 22 | \ + fm->hi_bits() | fm->lo_bit() << 5); \ + } + + F(fmac, 0, 0, 0, 0) // Fd = Fd + (Fn * Fm) + F(fnmac, 0, 0, 0, 1) // Fd = Fd - (Fn * Fm) + F(fmsc, 0, 0, 1, 0) // Fd = -Fd + (Fn * Fm) + F(fnmsc, 0, 0, 1, 1) // Fd = -Fd - (Fn * Fm) + + F(fmul, 0, 1, 0, 0) // Fd = Fn * Fm + F(fnmul, 0, 1, 0, 1) // Fd = -(Fn * Fm) + F(fadd, 0, 1, 1, 0) // Fd = Fn + Fm + F(fsub, 0, 1, 1, 1) // Fd = Fn - Fm + F(fdiv, 1, 0, 0, 0) // Fd = Fn / Fm +#undef F + + enum VElem_Size { + VELEM_SIZE_8 = 0x00, + VELEM_SIZE_16 = 0x01, + VELEM_SIZE_32 = 0x02, + VELEM_SIZE_64 = 0x03 + }; + + enum VLD_Type { + VLD1_TYPE_1_REG = 0x7 /* 0b0111 */, + VLD1_TYPE_2_REGS = 0xA /* 0b1010 */, + VLD1_TYPE_3_REGS = 0x6 /* 0b0110 */, + VLD1_TYPE_4_REGS = 0x2 /* 0b0010 */ + }; + + enum VFloat_Arith_Size { + VFA_SIZE_F32 = 0x0 /* 0b0 */, + }; + + // Bits P, Q, R, S collectively form the opcode +#define F(mnemonic, P, Q, R, S) \ + void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm, \ + int size, int quad) { \ + CHECK_VFP_PRESENT; \ + assert(VM_Version::has_simd(), "simd instruction"); \ + assert(fn->lo_bit() == 0 && fd->lo_bit() == 0 && fm->lo_bit() == 0, \ + "single precision register?"); \ + assert(!quad || ((fn->hi_bits() | fd->hi_bits() | fm->hi_bits()) & 1) == 0, \ + "quad precision register?"); \ + emit_int32(0xf << 28 | P << 23 | Q << 8 | R << 4 | \ + S << 21 | size << 20 | quad << 6 | \ + fn->hi_bits() << 16 | fn->hi_bit() << 7 | \ + fd->hi_bits() << 12 | fd->hi_bit() << 22 | \ + fm->hi_bits() | fm->hi_bit() << 5); \ + } + + F(vmulI, 0x4 /* 0b0100 */, 0x9 /* 0b1001 */, 1, 0) // Vd = Vn * Vm (int) + F(vaddI, 0x4 /* 0b0100 */, 0x8 /* 0b1000 */, 0, 0) // Vd = Vn + Vm (int) + F(vsubI, 0x6 /* 0b0110 */, 0x8 /* 0b1000 */, 0, 0) // Vd = Vn - Vm (int) + F(vaddF, 0x4 /* 0b0100 */, 0xD /* 0b1101 */, 0, 0) // Vd = Vn + Vm (float) + F(vsubF, 0x4 /* 0b0100 */, 0xD /* 0b1101 */, 0, 1) // Vd = Vn - Vm (float) + F(vmulF, 0x6 /* 0b0110 */, 0xD /* 0b1101 */, 1, 0) // Vd = Vn * Vm (float) + F(vshlSI, 0x4 /* 0b0100 */, 0x4 /* 0b0100 */, 0, 0) // Vd = ashift(Vm,Vn) (int) + F(vshlUI, 0x6 /* 0b0110 */, 0x4 /* 0b0100 */, 0, 0) // Vd = lshift(Vm,Vn) (int) + F(_vandI, 0x4 /* 0b0100 */, 0x1 /* 0b0001 */, 1, 0) // Vd = Vn & Vm (int) + F(_vorI, 0x4 /* 0b0100 */, 0x1 /* 0b0001 */, 1, 1) // Vd = Vn | Vm (int) + F(_vxorI, 0x6 /* 0b0110 */, 0x1 /* 0b0001 */, 1, 0) // Vd = Vn ^ Vm (int) +#undef F + + void vandI(FloatRegister fd, FloatRegister fn, FloatRegister fm, int quad) { + _vandI(fd, fn, fm, 0, quad); + } + void vorI(FloatRegister fd, FloatRegister fn, FloatRegister fm, int quad) { + _vorI(fd, fn, fm, 0, quad); + } + void vxorI(FloatRegister fd, FloatRegister fn, FloatRegister fm, int quad) { + _vxorI(fd, fn, fm, 0, quad); + } + + void vneg(FloatRegister fd, FloatRegister fm, int size, int flt, int quad) { + CHECK_VFP_PRESENT; + assert(VM_Version::has_simd(), "simd instruction"); + assert(fd->lo_bit() == 0 && fm->lo_bit() == 0, + "single precision register?"); + assert(!quad || ((fd->hi_bits() | fm->hi_bits()) & 1) == 0, + "quad precision register?"); + emit_int32(0xf << 28 | 0x3B /* 0b00111011 */ << 20 | 0x1 /* 0b01 */ << 16 | 0x7 /* 0b111 */ << 7 | + size << 18 | quad << 6 | flt << 10 | + fd->hi_bits() << 12 | fd->hi_bit() << 22 | + fm->hi_bits() << 0 | fm->hi_bit() << 5); + } + + void vnegI(FloatRegister fd, FloatRegister fm, int size, int quad) { + int flt = 0; + vneg(fd, fm, size, flt, quad); + } + + void vshli(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) { + CHECK_VFP_PRESENT; + assert(VM_Version::has_simd(), "simd instruction"); + assert(fd->lo_bit() == 0 && fm->lo_bit() == 0, + "single precision register?"); + assert(!quad || ((fd->hi_bits() | fm->hi_bits()) & 1) == 0, + "quad precision register?"); + + if (imm >= size) { + // maximum shift gives all zeroes, direction doesn't matter, + // but only available for shift right + vshri(fd, fm, size, size, true /* unsigned */, quad); + return; + } + assert(imm >= 0 && imm < size, "out of range"); + + int imm6 = 0; + int L = 0; + switch (size) { + case 8: + case 16: + case 32: + imm6 = size + imm ; + break; + case 64: + L = 1; + imm6 = imm ; + break; + default: + ShouldNotReachHere(); + } + emit_int32(0xf << 28 | 0x5 /* 0b00101 */ << 23 | 0x51 /* 0b01010001 */ << 4 | + imm6 << 16 | L << 7 | quad << 6 | + fd->hi_bits() << 12 | fd->hi_bit() << 22 | + fm->hi_bits() << 0 | fm->hi_bit() << 5); + } + + void vshri(FloatRegister fd, FloatRegister fm, int size, int imm, + bool U /* unsigned */, int quad) { + CHECK_VFP_PRESENT; + assert(VM_Version::has_simd(), "simd instruction"); + assert(fd->lo_bit() == 0 && fm->lo_bit() == 0, + "single precision register?"); + assert(!quad || ((fd->hi_bits() | fm->hi_bits()) & 1) == 0, + "quad precision register?"); + assert(imm > 0, "out of range"); + if (imm >= size) { + // maximum shift (all zeroes) + imm = size; + } + int imm6 = 0; + int L = 0; + switch (size) { + case 8: + case 16: + case 32: + imm6 = 2 * size - imm ; + break; + case 64: + L = 1; + imm6 = 64 - imm ; + break; + default: + ShouldNotReachHere(); + } + emit_int32(0xf << 28 | 0x5 /* 0b00101 */ << 23 | 0x1 /* 0b00000001 */ << 4 | + imm6 << 16 | L << 7 | quad << 6 | U << 24 | + fd->hi_bits() << 12 | fd->hi_bit() << 22 | + fm->hi_bits() << 0 | fm->hi_bit() << 5); + } + void vshrUI(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) { + vshri(fd, fm, size, imm, true /* unsigned */, quad); + } + void vshrSI(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) { + vshri(fd, fm, size, imm, false /* signed */, quad); + } + + // Extension opcodes where P,Q,R,S = 1 opcode is in Fn +#define F(mnemonic, N, opcode) \ + void mnemonic##d(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fd->lo_bit() == 0 && fm->hi_bit() == 0, "incorrect register?"); \ + emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \ + double_cp_num | \ + fd->hi_bits() << 12 | fd->hi_bit() << 22 | \ + fm->hi_bits() | fm->lo_bit() << 5); \ + } \ + void mnemonic##s(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fd->hi_bit() == 0 && fm->hi_bit() == 0, "double precision register?"); \ + emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \ + single_cp_num | \ + fd->hi_bits() << 12 | fd->lo_bit() << 22 | \ + fm->hi_bits() | fm->lo_bit() << 5); \ + } + + F(fuito, 0, 0x8) // Unsigned integer to floating point conversion + F(fsito, 1, 0x8) // Signed integer to floating point conversion +#undef F + +#define F(mnemonic, N, opcode) \ + void mnemonic##d(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fd->hi_bit() == 0 && fm->lo_bit() == 0, "incorrect register?"); \ + emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \ + double_cp_num | \ + fd->hi_bits() << 12 | fd->lo_bit() << 22 | \ + fm->hi_bits() | fm->hi_bit() << 5); \ + } \ + void mnemonic##s(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fd->hi_bit() == 0 && fm->hi_bit() == 0, "double precision register?"); \ + emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \ + single_cp_num | \ + fd->hi_bits() << 12 | fd->lo_bit() << 22 | \ + fm->hi_bits() | fm->lo_bit() << 5); \ + } + + F(ftoui, 0, 0xc) // Float to unsigned int conversion + F(ftouiz, 1, 0xc) // Float to unsigned int conversion, RZ mode + F(ftosi, 0, 0xd) // Float to signed int conversion + F(ftosiz, 1, 0xd) // Float to signed int conversion, RZ mode +#undef F + +#define F(mnemonic, N, opcode) \ + void mnemonic##d(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fd->hi_bit() == 0 && fm->lo_bit() == 0, "incorrect register?"); \ + emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \ + double_cp_num | \ + fd->hi_bits() << 12 | fd->lo_bit() << 22 | \ + fm->hi_bits() | fm->hi_bit() << 5); \ + } \ + void mnemonic##s(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fd->lo_bit() == 0 && fm->hi_bit() == 0, "incorrect register?"); \ + emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \ + single_cp_num | \ + fd->hi_bits() << 12 | fd->hi_bit() << 22 | \ + fm->hi_bits() | fm->lo_bit() << 5); \ + } + + F(fcvtd, 1, 0x7) // Single->Double conversion + F(fcvts, 1, 0x7) // Double->Single conversion +#undef F + +#define F(mnemonic, N, opcode) \ + void mnemonic##d(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fd->lo_bit() == 0 && fm->lo_bit() == 0, "single precision register?"); \ + emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \ + double_cp_num | \ + fd->hi_bits() << 12 | fd->hi_bit() << 22 | \ + fm->hi_bits() | fm->hi_bit() << 5); \ + } \ + void mnemonic##s(FloatRegister fd, FloatRegister fm, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fd->hi_bit() == 0 && fm->hi_bit() == 0, "double precision register?"); \ + emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \ + single_cp_num | \ + fd->hi_bits() << 12 | fd->lo_bit() << 22 | \ + fm->hi_bits() | fm->lo_bit() << 5); \ + } + + F(fcpy, 0, 0x0) // Fd = Fm + F(fabs, 1, 0x0) // Fd = abs(Fm) + F(fneg, 0, 0x1) // Fd = -Fm + F(fsqrt, 1, 0x1) // Fd = sqrt(Fm) + F(fcmp, 0, 0x4) // Compare Fd with Fm no exceptions on quiet NANs + F(fcmpe, 1, 0x4) // Compare Fd with Fm with exceptions on quiet NANs +#undef F + + // Opcodes with one operand only +#define F(mnemonic, N, opcode) \ + void mnemonic##d(FloatRegister fd, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fd->lo_bit() == 0, "single precision register?"); \ + emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \ + double_cp_num | fd->hi_bits() << 12 | fd->hi_bit() << 22); \ + } \ + void mnemonic##s(FloatRegister fd, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fd->hi_bit() == 0, "double precision register?"); \ + emit_int32(cond << 28 | 0xeb << 20 | opcode << 16 | N << 7 | 1 << 6 | \ + single_cp_num | fd->hi_bits() << 12 | fd->lo_bit() << 22); \ + } + + F(fcmpz, 0, 0x5) // Compare Fd with 0, no exceptions quiet NANs + F(fcmpez, 1, 0x5) // Compare Fd with 0, with exceptions quiet NANs +#undef F + + // Float loads (L==1) and stores (L==0) +#define F(mnemonic, L) \ + void mnemonic##d(FloatRegister fd, Address addr, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fd->lo_bit() == 0, "single precision register?"); \ + emit_int32(cond << 28 | 0xd << 24 | L << 20 | \ + fd->hi_bits() << 12 | fd->hi_bit() << 22 | \ + double_cp_num | addr.encoding_vfp()); \ + } \ + void mnemonic##s(FloatRegister fd, Address addr, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(fd->hi_bit() == 0, "double precision register?"); \ + emit_int32(cond << 28 | 0xd << 24 | L << 20 | \ + fd->hi_bits() << 12 | fd->lo_bit() << 22 | \ + single_cp_num | addr.encoding_vfp()); \ + } + + F(fst, 0) // Store 1 register + F(fld, 1) // Load 1 register +#undef F + + // Float load and store multiple +#define F(mnemonic, l, pu) \ + void mnemonic##d(Register rn, FloatRegisterSet reg_set, \ + AsmWriteback w = no_writeback, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(w == no_writeback || rn != PC, "unpredictable instruction"); \ + assert(!(w == no_writeback && pu == 2), "encoding constraint"); \ + assert((reg_set.encoding_d() & 1) == 0, "encoding constraint"); \ + emit_int32(cond << 28 | 6 << 25 | pu << 23 | w << 21 | l << 20 | \ + rn->encoding() << 16 | reg_set.encoding_d() | double_cp_num); \ + } \ + void mnemonic##s(Register rn, FloatRegisterSet reg_set, \ + AsmWriteback w = no_writeback, AsmCondition cond = al) { \ + CHECK_VFP_PRESENT; \ + assert(w == no_writeback || rn != PC, "unpredictable instruction"); \ + assert(!(w == no_writeback && pu == 2), "encoding constraint"); \ + emit_int32(cond << 28 | 6 << 25 | pu << 23 | w << 21 | l << 20 | \ + rn->encoding() << 16 | reg_set.encoding_s() | single_cp_num); \ + } + + F(fldmia, 1, 1) F(fldmfd, 1, 1) + F(fldmdb, 1, 2) F(fldmea, 1, 2) + F(fstmia, 0, 1) F(fstmfd, 0, 1) + F(fstmdb, 0, 2) F(fstmea, 0, 2) +#undef F + + // fconst{s,d} encoding: + // 31 28 27 23 22 21 20 19 16 15 12 10 9 8 7 4 3 0 + // | cond | 11101 | D | 11 | imm4H | Vd | 101 | sz | 0000 | imm4L | + // sz = 0 for single precision, 1 otherwise + // Register number is Vd:D for single precision, D:Vd otherwise + // immediate value is imm4H:imm4L + + void fconsts(FloatRegister fd, unsigned char imm_8, AsmCondition cond = al) { + CHECK_VFP_PRESENT; + assert(fd->hi_bit() == 0, "double precision register?"); + emit_int32(cond << 28 | 0xeb << 20 | single_cp_num | + fd->hi_bits() << 12 | fd->lo_bit() << 22 | (imm_8 & 0xf) | (imm_8 >> 4) << 16); + } + + void fconstd(FloatRegister fd, unsigned char imm_8, AsmCondition cond = al) { + CHECK_VFP_PRESENT; + assert(fd->lo_bit() == 0, "double precision register?"); + emit_int32(cond << 28 | 0xeb << 20 | double_cp_num | + fd->hi_bits() << 12 | fd->hi_bit() << 22 | (imm_8 & 0xf) | (imm_8 >> 4) << 16); + } + + // GPR <-> FPR transfers + void fmsr(FloatRegister fd, Register rd, AsmCondition cond = al) { + CHECK_VFP_PRESENT; + assert(fd->hi_bit() == 0, "double precision register?"); + emit_int32(cond << 28 | 0xe0 << 20 | single_cp_num | 1 << 4 | + fd->hi_bits() << 16 | fd->lo_bit() << 7 | rd->encoding() << 12); + } + + void fmrs(Register rd, FloatRegister fd, AsmCondition cond = al) { + CHECK_VFP_PRESENT; + assert(fd->hi_bit() == 0, "double precision register?"); + emit_int32(cond << 28 | 0xe1 << 20 | single_cp_num | 1 << 4 | + fd->hi_bits() << 16 | fd->lo_bit() << 7 | rd->encoding() << 12); + } + + void fmdrr(FloatRegister fd, Register rd, Register rn, AsmCondition cond = al) { + CHECK_VFP_PRESENT; + assert(fd->lo_bit() == 0, "single precision register?"); + emit_int32(cond << 28 | 0xc4 << 20 | double_cp_num | 1 << 4 | + fd->hi_bits() | fd->hi_bit() << 5 | + rn->encoding() << 16 | rd->encoding() << 12); + } + + void fmrrd(Register rd, Register rn, FloatRegister fd, AsmCondition cond = al) { + CHECK_VFP_PRESENT; + assert(fd->lo_bit() == 0, "single precision register?"); + emit_int32(cond << 28 | 0xc5 << 20 | double_cp_num | 1 << 4 | + fd->hi_bits() | fd->hi_bit() << 5 | + rn->encoding() << 16 | rd->encoding() << 12); + } + + void fmstat(AsmCondition cond = al) { + CHECK_VFP_PRESENT; + emit_int32(cond << 28 | 0xef1fa10); + } + + void vmrs(Register rt, VFPSystemRegister sr, AsmCondition cond = al) { + assert((sr->encoding() & (~0xf)) == 0, "what system register is that?"); + emit_int32(cond << 28 | rt->encoding() << 12 | sr->encoding() << 16 | 0xef00a10); + } + + void vmsr(VFPSystemRegister sr, Register rt, AsmCondition cond = al) { + assert((sr->encoding() & (~0xf)) == 0, "what system register is that?"); + emit_int32(cond << 28 | rt->encoding() << 12 | sr->encoding() << 16 | 0xee00a10); + } + + void vcnt(FloatRegister Dd, FloatRegister Dm) { + CHECK_VFP_PRESENT; + // emitted at VM startup to detect whether the instruction is available + assert(!VM_Version::is_initialized() || VM_Version::has_simd(), "simd instruction"); + assert(Dd->lo_bit() == 0 && Dm->lo_bit() == 0, "single precision registers?"); + emit_int32(0xf3b00500 | Dd->hi_bit() << 22 | Dd->hi_bits() << 12 | Dm->hi_bit() << 5 | Dm->hi_bits()); + } + + void vpaddl(FloatRegister Dd, FloatRegister Dm, int size, bool s) { + CHECK_VFP_PRESENT; + assert(VM_Version::has_simd(), "simd instruction"); + assert(Dd->lo_bit() == 0 && Dm->lo_bit() == 0, "single precision registers?"); + assert(size == 8 || size == 16 || size == 32, "unexpected size"); + emit_int32(0xf3b00200 | Dd->hi_bit() << 22 | (size >> 4) << 18 | Dd->hi_bits() << 12 | (s ? 0 : 1) << 7 | Dm->hi_bit() << 5 | Dm->hi_bits()); + } + + void vld1(FloatRegister Dd, Address addr, VElem_Size size, int bits) { + CHECK_VFP_PRESENT; + assert(VM_Version::has_simd(), "simd instruction"); + assert(Dd->lo_bit() == 0, "single precision registers?"); + int align = 0; + assert(bits == 128, "code assumption"); + VLD_Type type = VLD1_TYPE_2_REGS; // 2x64 + emit_int32(0xf4200000 | Dd->hi_bit() << 22 | Dd->hi_bits() << 12 | type << 8 | size << 6 | align << 4 | addr.encoding_simd()); + } + + void vst1(FloatRegister Dd, Address addr, VElem_Size size, int bits) { + CHECK_VFP_PRESENT; + assert(VM_Version::has_simd(), "simd instruction"); + assert(Dd->lo_bit() == 0, "single precision registers?"); + int align = 0; + assert(bits == 128, "code assumption"); + VLD_Type type = VLD1_TYPE_2_REGS; // 2x64 + emit_int32(0xf4000000 | Dd->hi_bit() << 22 | Dd->hi_bits() << 12 | type << 8 | size << 6 | align << 4 | addr.encoding_simd()); + } + + void vmovI(FloatRegister Dd, int imm8, VElem_Size size, int quad) { + CHECK_VFP_PRESENT; + assert(VM_Version::has_simd(), "simd instruction"); + assert(Dd->lo_bit() == 0, "single precision register?"); + assert(!quad || (Dd->hi_bits() & 1) == 0, "quad precision register?"); + assert(imm8 >= 0 && imm8 < 256, "out of range"); + int op; + int cmode; + switch (size) { + case VELEM_SIZE_8: + op = 0; + cmode = 0xE /* 0b1110 */; + break; + case VELEM_SIZE_16: + op = 0; + cmode = 0x8 /* 0b1000 */; + break; + case VELEM_SIZE_32: + op = 0; + cmode = 0x0 /* 0b0000 */; + break; + default: + ShouldNotReachHere(); + } + emit_int32(0xf << 28 | 0x1 << 25 | 0x1 << 23 | 0x1 << 4 | + (imm8 >> 7) << 24 | ((imm8 & 0x70) >> 4) << 16 | (imm8 & 0xf) | + quad << 6 | op << 5 | cmode << 8 | + Dd->hi_bits() << 12 | Dd->hi_bit() << 22); + } + + void vdupI(FloatRegister Dd, Register Rs, VElem_Size size, int quad, + AsmCondition cond = al) { + CHECK_VFP_PRESENT; + assert(VM_Version::has_simd(), "simd instruction"); + assert(Dd->lo_bit() == 0, "single precision register?"); + assert(!quad || (Dd->hi_bits() & 1) == 0, "quad precision register?"); + int b; + int e; + switch (size) { + case VELEM_SIZE_8: + b = 1; + e = 0; + break; + case VELEM_SIZE_16: + b = 0; + e = 1; + break; + case VELEM_SIZE_32: + b = 0; + e = 0; + break; + default: + ShouldNotReachHere(); + } + emit_int32(cond << 28 | 0x1D /* 0b11101 */ << 23 | 0xB /* 0b1011 */ << 8 | 0x1 << 4 | + quad << 21 | b << 22 | e << 5 | Rs->encoding() << 12 | + Dd->hi_bits() << 16 | Dd->hi_bit() << 7); + } + + void vdup(FloatRegister Dd, FloatRegister Ds, int index, int size, int quad) { + CHECK_VFP_PRESENT; + assert(VM_Version::has_simd(), "simd instruction"); + assert(Dd->lo_bit() == 0, "single precision register?"); + assert(Ds->lo_bit() == 0, "single precision register?"); + assert(!quad || (Dd->hi_bits() & 1) == 0, "quad precision register?"); + int range = 64 / size; + assert(index < range, "overflow"); + int imm4; + switch (size) { + case 8: + assert((index & 0x7 /* 0b111 */) == index, "overflow"); + imm4 = index << 1 | 0x1 /* 0b0001 */; + break; + case 16: + assert((index & 0x3 /* 0b11 */) == index, "overflow"); + imm4 = index << 2 | 0x2 /* 0b0010 */; + break; + case 32: + assert((index & 0x1 /* 0b1 */) == index, "overflow"); + imm4 = index << 3 | 0x4 /* 0b0100 */; + break; + default: + ShouldNotReachHere(); + } + emit_int32(0xF /* 0b1111 */ << 28 | 0x3B /* 0b00111011 */ << 20 | 0x6 /* 0b110 */ << 9 | + quad << 6 | imm4 << 16 | + Dd->hi_bits() << 12 | Dd->hi_bit() << 22 | + Ds->hi_bits() << 00 | Ds->hi_bit() << 5); + } + + void vdupF(FloatRegister Dd, FloatRegister Ss, int quad) { + int index = 0; + FloatRegister Ds = as_FloatRegister(Ss->encoding() & ~1); + if (Ss->lo_bit() != 0) { + /* odd S register */ + assert(Ds->successor() == Ss, "bad reg"); + index = 1; + } else { + /* even S register */ + assert(Ds == Ss, "bad reg"); + } + vdup(Dd, Ds, index, 32, quad); + } + + void vrev(FloatRegister Dd, FloatRegister Dm, int quad, int region_size, VElem_Size size) { + CHECK_VFP_PRESENT; + assert(VM_Version::has_simd(), "simd instruction"); + assert(Dd->lo_bit() == 0, "single precision register?"); + assert(Dm->lo_bit() == 0, "single precision register?"); + assert(!quad || ((Dd->hi_bits() | Dm->hi_bits()) & 1) == 0, + "quad precision register?"); + unsigned int op = 0; + switch (region_size) { + case 16: op = 0x2; /*0b10*/ break; + case 32: op = 0x1; /*0b01*/ break; + case 64: op = 0x0; /*0b00*/ break; + default: assert(false, "encoding constraint"); + } + emit_int32(0xf << 28 | 0x7 << 23 | Dd->hi_bit() << 22 | 0x3 << 20 | + size << 18 | Dd->hi_bits() << 12 | op << 7 | quad << 6 | Dm->hi_bit() << 5 | + Dm->hi_bits()); + } + + void veor(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, int quad) { + CHECK_VFP_PRESENT; + assert(VM_Version::has_simd(), "simd instruction"); + assert(Dd->lo_bit() == 0, "single precision register?"); + assert(Dm->lo_bit() == 0, "single precision register?"); + assert(Dn->lo_bit() == 0, "single precision register?"); + assert(!quad || ((Dd->hi_bits() | Dm->hi_bits() | Dn->hi_bits()) & 1) == 0, + "quad precision register?"); + + emit_int32(0xf << 28 | 0x3 << 24 | Dd->hi_bit() << 22 | Dn->hi_bits() << 16 | + Dd->hi_bits() << 12 | 0x1 << 8 | Dn->hi_bit() << 7 | quad << 6 | + Dm->hi_bit() << 5 | 0x1 << 4 | Dm->hi_bits()); + } + + + Assembler(CodeBuffer* code) : AbstractAssembler(code) {} + +#ifdef COMPILER2 + typedef VFP::double_num double_num; + typedef VFP::float_num float_num; +#endif +}; + +#ifdef __SOFTFP__ +// Soft float function declarations +extern "C" { +extern float __aeabi_fadd(float, float); +extern float __aeabi_fmul(float, float); +extern float __aeabi_fsub(float, float); +extern float __aeabi_fdiv(float, float); + +extern double __aeabi_dadd(double, double); +extern double __aeabi_dmul(double, double); +extern double __aeabi_dsub(double, double); +extern double __aeabi_ddiv(double, double); + +extern double __aeabi_f2d(float); +extern float __aeabi_d2f(double); +extern float __aeabi_i2f(int); +extern double __aeabi_i2d(int); +extern int __aeabi_f2iz(float); + +extern int __aeabi_fcmpeq(float, float); +extern int __aeabi_fcmplt(float, float); +extern int __aeabi_fcmple(float, float); +extern int __aeabi_fcmpge(float, float); +extern int __aeabi_fcmpgt(float, float); + +extern int __aeabi_dcmpeq(double, double); +extern int __aeabi_dcmplt(double, double); +extern int __aeabi_dcmple(double, double); +extern int __aeabi_dcmpge(double, double); +extern int __aeabi_dcmpgt(double, double); + +// Imported code from glibc soft-fp bundle for +// calculation accuracy improvement. See CR 6757269. +extern double __aeabi_fadd_glibc(float, float); +extern double __aeabi_fsub_glibc(float, float); +extern double __aeabi_dadd_glibc(double, double); +extern double __aeabi_dsub_glibc(double, double); +}; +#endif // __SOFTFP__ + + +#endif // CPU_ARM_VM_ASSEMBLER_ARM_32_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/assembler_arm_64.cpp 2016-12-02 11:17:35.247097647 -0500 @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "ci/ciEnv.hpp" +#include "gc/shared/cardTableModRefBS.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateInterpreterGenerator.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jvm_misc.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/hashtable.hpp" +#include "utilities/macros.hpp" +#if INCLUDE_ALL_GCS +#include "gc/g1/g1CollectedHeap.inline.hpp" +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#include "gc/g1/heapRegion.hpp" +#endif // INCLUDE_ALL_GCS + +// Returns whether given imm has equal bit fields <0:size-1> and . +inline bool Assembler::LogicalImmediate::has_equal_subpatterns(uintx imm, int size) { + uintx mask = right_n_bits(size); + uintx subpattern1 = mask_bits(imm, mask); + uintx subpattern2 = mask_bits(imm >> size, mask); + return subpattern1 == subpattern2; +} + +// Returns least size that is a power of two from 2 to 64 with the proviso that given +// imm is composed of repeating patterns of this size. +inline int Assembler::LogicalImmediate::least_pattern_size(uintx imm) { + int size = BitsPerWord; + while (size > 2 && has_equal_subpatterns(imm, size >> 1)) { + size >>= 1; + } + return size; +} + +// Returns count of set bits in given imm. Based on variable-precision SWAR algorithm. +inline int Assembler::LogicalImmediate::population_count(uintx x) { + x -= ((x >> 1) & 0x5555555555555555L); + x = (((x >> 2) & 0x3333333333333333L) + (x & 0x3333333333333333L)); + x = (((x >> 4) + x) & 0x0f0f0f0f0f0f0f0fL); + x += (x >> 8); + x += (x >> 16); + x += (x >> 32); + return(x & 0x7f); +} + +// Let given x be where B = 0 and least bit of A = 1. Returns , where C is B-size set bits. +inline uintx Assembler::LogicalImmediate::set_least_zeroes(uintx x) { + return x | (x - 1); +} + + +#ifdef ASSERT + +// Restores immediate by encoded bit masks. +uintx Assembler::LogicalImmediate::decode() { + assert (_encoded, "should be"); + + int len_code = (_immN << 6) | ((~_imms) & 0x3f); + assert (len_code != 0, "should be"); + + int len = 6; + while (!is_set_nth_bit(len_code, len)) len--; + int esize = 1 << len; + assert (len > 0, "should be"); + assert ((_is32bit ? 32 : 64) >= esize, "should be"); + + int levels = right_n_bits(len); + int S = _imms & levels; + int R = _immr & levels; + + assert (S != levels, "should be"); + + uintx welem = right_n_bits(S + 1); + uintx wmask = (R == 0) ? welem : ((welem >> R) | (welem << (esize - R))); + + for (int size = esize; size < 64; size <<= 1) { + wmask |= (wmask << size); + } + + return wmask; +} + +#endif + + +// Constructs LogicalImmediate by given imm. Figures out if given imm can be used in AArch64 logical +// instructions (AND, ANDS, EOR, ORR) and saves its encoding. +void Assembler::LogicalImmediate::construct(uintx imm, bool is32) { + _is32bit = is32; + + if (is32) { + assert(((imm >> 32) == 0) || (((intx)imm >> 31) == -1), "32-bit immediate is out of range"); + + // Replicate low 32 bits. + imm &= 0xffffffff; + imm |= imm << 32; + } + + // All-zeroes and all-ones can not be encoded. + if (imm != 0 && (~imm != 0)) { + + // Let LPS (least pattern size) be the least size (power of two from 2 to 64) of repeating + // patterns in the immediate. If immediate value can be encoded, it is encoded by pattern + // of exactly LPS size (due to structure of valid patterns). In order to verify + // that immediate value can be encoded, LPS is calculated and bits of immediate + // are verified to be valid pattern. + int lps = least_pattern_size(imm); + uintx lps_mask = right_n_bits(lps); + + // A valid pattern has one of the following forms: + // | 0 x A | 1 x B | 0 x C |, where B > 0 and C > 0, or + // | 1 x A | 0 x B | 1 x C |, where B > 0 and C > 0. + // For simplicity, the second form of the pattern is inverted into the first form. + bool inverted = imm & 0x1; + uintx pattern = (inverted ? ~imm : imm) & lps_mask; + + // | 0 x A | 1 x (B + C) | + uintx without_least_zeroes = set_least_zeroes(pattern); + + // Pattern is valid iff without least zeroes it is a power of two - 1. + if ((without_least_zeroes & (without_least_zeroes + 1)) == 0) { + + // Count B as population count of pattern. + int bits_count = population_count(pattern); + + // Count B+C as population count of pattern without least zeroes + int left_range = population_count(without_least_zeroes); + + // S-prefix is a part of imms field which encodes LPS. + // LPS | S prefix + // 64 | not defined + // 32 | 0b0 + // 16 | 0b10 + // 8 | 0b110 + // 4 | 0b1110 + // 2 | 0b11110 + int s_prefix = (lps == 64) ? 0 : ~set_least_zeroes(lps) & 0x3f; + + // immN bit is set iff LPS == 64. + _immN = (lps == 64) ? 1 : 0; + assert (!is32 || (_immN == 0), "32-bit immediate should be encoded with zero N-bit"); + + // immr is the rotation size. + _immr = lps + (inverted ? 0 : bits_count) - left_range; + + // imms is the field that encodes bits count and S-prefix. + _imms = ((inverted ? (lps - bits_count) : bits_count) - 1) | s_prefix; + + _encoded = true; + assert (decode() == imm, "illegal encoding"); + + return; + } + } + + _encoded = false; +} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/assembler_arm_64.hpp 2016-12-02 11:17:41.103429759 -0500 @@ -0,0 +1,1717 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_ASSEMBLER_ARM_64_HPP +#define CPU_ARM_VM_ASSEMBLER_ARM_64_HPP + +enum AsmShift12 { + lsl0, lsl12 +}; + +enum AsmPrefetchOp { + pldl1keep = 0b00000, + pldl1strm, + pldl2keep, + pldl2strm, + pldl3keep, + pldl3strm, + + plil1keep = 0b01000, + plil1strm, + plil2keep, + plil2strm, + plil3keep, + plil3strm, + + pstl1keep = 0b10000, + pstl1strm, + pstl2keep, + pstl2strm, + pstl3keep, + pstl3strm, +}; + +// Shifted register operand for data processing instructions. +class AsmOperand VALUE_OBJ_CLASS_SPEC { + private: + Register _reg; + AsmShift _shift; + int _shift_imm; + + public: + AsmOperand(Register reg) { + assert(reg != SP, "SP is not allowed in shifted register operand"); + _reg = reg; + _shift = lsl; + _shift_imm = 0; + } + + AsmOperand(Register reg, AsmShift shift, int shift_imm) { + assert(reg != SP, "SP is not allowed in shifted register operand"); + assert(shift_imm >= 0, "shift amount should be non-negative"); + _reg = reg; + _shift = shift; + _shift_imm = shift_imm; + } + + Register reg() const { + return _reg; + } + + AsmShift shift() const { + return _shift; + } + + int shift_imm() const { + return _shift_imm; + } +}; + + +class Assembler : public AbstractAssembler { + + public: + + static const int LogInstructionSize = 2; + static const int InstructionSize = 1 << LogInstructionSize; + + Assembler(CodeBuffer* code) : AbstractAssembler(code) {} + + static inline AsmCondition inverse(AsmCondition cond) { + assert ((cond != al) && (cond != nv), "AL and NV conditions cannot be inversed"); + return (AsmCondition)((int)cond ^ 1); + } + + // Returns value of nzcv flags conforming to the given condition. + static inline int flags_for_condition(AsmCondition cond) { + switch(cond) { // NZCV + case mi: case lt: return 0b1000; + case eq: case le: return 0b0100; + case hs: case hi: return 0b0010; + case vs: return 0b0001; + default: return 0b0000; + } + } + + // Immediate, encoded into logical instructions. + class LogicalImmediate { + private: + bool _encoded; + bool _is32bit; + int _immN; + int _immr; + int _imms; + + static inline bool has_equal_subpatterns(uintx imm, int size); + static inline int least_pattern_size(uintx imm); + static inline int population_count(uintx x); + static inline uintx set_least_zeroes(uintx x); + +#ifdef ASSERT + uintx decode(); +#endif + + void construct(uintx imm, bool is32); + + public: + LogicalImmediate(uintx imm, bool is32 = false) { construct(imm, is32); } + + // Returns true if given immediate can be used in AArch64 logical instruction. + bool is_encoded() const { return _encoded; } + + bool is32bit() const { return _is32bit; } + int immN() const { assert(_encoded, "should be"); return _immN; } + int immr() const { assert(_encoded, "should be"); return _immr; } + int imms() const { assert(_encoded, "should be"); return _imms; } + }; + + // Immediate, encoded into arithmetic add/sub instructions. + class ArithmeticImmediate { + private: + bool _encoded; + int _imm; + AsmShift12 _shift; + + public: + ArithmeticImmediate(intx x) { + if (is_unsigned_imm_in_range(x, 12, 0)) { + _encoded = true; + _imm = x; + _shift = lsl0; + } else if (is_unsigned_imm_in_range(x, 12, 12)) { + _encoded = true; + _imm = x >> 12; + _shift = lsl12; + } else { + _encoded = false; + } + } + + ArithmeticImmediate(intx x, AsmShift12 sh) { + if (is_unsigned_imm_in_range(x, 12, 0)) { + _encoded = true; + _imm = x; + _shift = sh; + } else { + _encoded = false; + } + } + + // Returns true if this immediate can be used in AArch64 arithmetic (add/sub/cmp/cmn) instructions. + bool is_encoded() const { return _encoded; } + + int imm() const { assert(_encoded, "should be"); return _imm; } + AsmShift12 shift() const { assert(_encoded, "should be"); return _shift; } + }; + + static inline bool is_imm_in_range(intx value, int bits, int align_bits) { + intx sign_bits = (value >> (bits + align_bits - 1)); + return ((value & right_n_bits(align_bits)) == 0) && ((sign_bits == 0) || (sign_bits == -1)); + } + + static inline int encode_imm(intx value, int bits, int align_bits, int low_bit_in_encoding) { + assert (is_imm_in_range(value, bits, align_bits), "immediate value is out of range"); + return ((value >> align_bits) & right_n_bits(bits)) << low_bit_in_encoding; + } + + static inline bool is_unsigned_imm_in_range(intx value, int bits, int align_bits) { + return (value >= 0) && ((value & right_n_bits(align_bits)) == 0) && ((value >> (align_bits + bits)) == 0); + } + + static inline int encode_unsigned_imm(intx value, int bits, int align_bits, int low_bit_in_encoding) { + assert (is_unsigned_imm_in_range(value, bits, align_bits), "immediate value is out of range"); + return (value >> align_bits) << low_bit_in_encoding; + } + + static inline bool is_offset_in_range(intx offset, int bits) { + assert (bits == 14 || bits == 19 || bits == 26, "wrong bits number"); + return is_imm_in_range(offset, bits, 2); + } + + static inline int encode_offset(intx offset, int bits, int low_bit_in_encoding) { + return encode_imm(offset, bits, 2, low_bit_in_encoding); + } + + // Returns true if given value can be used as immediate in arithmetic (add/sub/cmp/cmn) instructions. + static inline bool is_arith_imm_in_range(intx value) { + return ArithmeticImmediate(value).is_encoded(); + } + + + // Load/store instructions + +#define F(mnemonic, opc) \ + void mnemonic(Register rd, address literal_addr) { \ + intx offset = literal_addr - pc(); \ + assert (opc != 0b01 || offset == 0 || ((uintx)literal_addr & 7) == 0, "ldr target should be aligned"); \ + assert (is_offset_in_range(offset, 19), "offset is out of range"); \ + emit_int32(opc << 30 | 0b011 << 27 | encode_offset(offset, 19, 5) | rd->encoding_with_zr()); \ + } + + F(ldr_w, 0b00) + F(ldr, 0b01) + F(ldrsw, 0b10) +#undef F + +#define F(mnemonic, opc) \ + void mnemonic(FloatRegister rt, address literal_addr) { \ + intx offset = literal_addr - pc(); \ + assert (offset == 0 || ((uintx)literal_addr & right_n_bits(2 + opc)) == 0, "ldr target should be aligned"); \ + assert (is_offset_in_range(offset, 19), "offset is out of range"); \ + emit_int32(opc << 30 | 0b011100 << 24 | encode_offset(offset, 19, 5) | rt->encoding()); \ + } + + F(ldr_s, 0b00) + F(ldr_d, 0b01) + F(ldr_q, 0b10) +#undef F + +#define F(mnemonic, size, o2, L, o1, o0) \ + void mnemonic(Register rt, Register rn) { \ + emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | 0b11111 << 16 | \ + o0 << 15 | 0b11111 << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ + } + + F(ldxrb, 0b00, 0, 1, 0, 0) + F(ldaxrb, 0b00, 0, 1, 0, 1) + F(ldarb, 0b00, 1, 1, 0, 1) + F(ldxrh, 0b01, 0, 1, 0, 0) + F(ldaxrh, 0b01, 0, 1, 0, 1) + F(ldarh, 0b01, 1, 1, 0, 1) + F(ldxr_w, 0b10, 0, 1, 0, 0) + F(ldaxr_w, 0b10, 0, 1, 0, 1) + F(ldar_w, 0b10, 1, 1, 0, 1) + F(ldxr, 0b11, 0, 1, 0, 0) + F(ldaxr, 0b11, 0, 1, 0, 1) + F(ldar, 0b11, 1, 1, 0, 1) + + F(stlrb, 0b00, 1, 0, 0, 1) + F(stlrh, 0b01, 1, 0, 0, 1) + F(stlr_w, 0b10, 1, 0, 0, 1) + F(stlr, 0b11, 1, 0, 0, 1) +#undef F + +#define F(mnemonic, size, o2, L, o1, o0) \ + void mnemonic(Register rs, Register rt, Register rn) { \ + assert (rs != rt, "should be different"); \ + assert (rs != rn, "should be different"); \ + emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | rs->encoding_with_zr() << 16 | \ + o0 << 15 | 0b11111 << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ + } + + F(stxrb, 0b00, 0, 0, 0, 0) + F(stlxrb, 0b00, 0, 0, 0, 1) + F(stxrh, 0b01, 0, 0, 0, 0) + F(stlxrh, 0b01, 0, 0, 0, 1) + F(stxr_w, 0b10, 0, 0, 0, 0) + F(stlxr_w, 0b10, 0, 0, 0, 1) + F(stxr, 0b11, 0, 0, 0, 0) + F(stlxr, 0b11, 0, 0, 0, 1) +#undef F + +#define F(mnemonic, size, o2, L, o1, o0) \ + void mnemonic(Register rt, Register rt2, Register rn) { \ + assert (rt != rt2, "should be different"); \ + emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | 0b11111 << 16 | \ + o0 << 15 | rt2->encoding_with_zr() << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ + } + + F(ldxp_w, 0b10, 0, 1, 1, 0) + F(ldaxp_w, 0b10, 0, 1, 1, 1) + F(ldxp, 0b11, 0, 1, 1, 0) + F(ldaxp, 0b11, 0, 1, 1, 1) +#undef F + +#define F(mnemonic, size, o2, L, o1, o0) \ + void mnemonic(Register rs, Register rt, Register rt2, Register rn) { \ + assert (rs != rt, "should be different"); \ + assert (rs != rt2, "should be different"); \ + assert (rs != rn, "should be different"); \ + emit_int32(size << 30 | 0b001000 << 24 | o2 << 23 | L << 22 | o1 << 21 | rs->encoding_with_zr() << 16 | \ + o0 << 15 | rt2->encoding_with_zr() << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ + } + + F(stxp_w, 0b10, 0, 0, 1, 0) + F(stlxp_w, 0b10, 0, 0, 1, 1) + F(stxp, 0b11, 0, 0, 1, 0) + F(stlxp, 0b11, 0, 0, 1, 1) +#undef F + +#define F(mnemonic, opc, V, L) \ + void mnemonic(Register rt, Register rt2, Register rn, int offset = 0) { \ + assert (!L || rt != rt2, "should be different"); \ + int align_bits = 2 + (opc >> 1); \ + assert (is_imm_in_range(offset, 7, align_bits), "offset is out of range"); \ + emit_int32(opc << 30 | 0b101 << 27 | V << 26 | L << 22 | encode_imm(offset, 7, align_bits, 15) | \ + rt2->encoding_with_zr() << 10 | rn->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ + } + + F(stnp_w, 0b00, 0, 0) + F(ldnp_w, 0b00, 0, 1) + F(stnp, 0b10, 0, 0) + F(ldnp, 0b10, 0, 1) +#undef F + +#define F(mnemonic, opc, V, L) \ + void mnemonic(FloatRegister rt, FloatRegister rt2, Register rn, int offset = 0) { \ + assert (!L || (rt != rt2), "should be different"); \ + int align_bits = 2 + opc; \ + assert (is_imm_in_range(offset, 7, align_bits), "offset is out of range"); \ + emit_int32(opc << 30 | 0b101 << 27 | V << 26 | L << 22 | encode_imm(offset, 7, align_bits, 15) | \ + rt2->encoding() << 10 | rn->encoding_with_sp() << 5 | rt->encoding()); \ + } + + F(stnp_s, 0b00, 1, 0) + F(stnp_d, 0b01, 1, 0) + F(stnp_q, 0b10, 1, 0) + F(ldnp_s, 0b00, 1, 1) + F(ldnp_d, 0b01, 1, 1) + F(ldnp_q, 0b10, 1, 1) +#undef F + +#define F(mnemonic, size, V, opc) \ + void mnemonic(Register rt, Address addr) { \ + assert((addr.mode() == basic_offset) || (rt != addr.base()), "should be different"); \ + if (addr.index() == noreg) { \ + if ((addr.mode() == basic_offset) && is_unsigned_imm_in_range(addr.disp(), 12, size)) { \ + emit_int32(size << 30 | 0b111 << 27 | V << 26 | 0b01 << 24 | opc << 22 | \ + encode_unsigned_imm(addr.disp(), 12, size, 10) | \ + addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ + } else { \ + assert(is_imm_in_range(addr.disp(), 9, 0), "offset is out of range"); \ + emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | encode_imm(addr.disp(), 9, 0, 12) | \ + addr.mode() << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ + } \ + } else { \ + assert (addr.disp() == 0, "non-zero displacement for [reg + reg] address mode"); \ + assert ((addr.shift_imm() == 0) || (addr.shift_imm() == size), "invalid shift amount"); \ + emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | 1 << 21 | \ + addr.index()->encoding_with_zr() << 16 | addr.extend() << 13 | (addr.shift_imm() != 0) << 12 | \ + 0b10 << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ + } \ + } + + F(strb, 0b00, 0, 0b00) + F(ldrb, 0b00, 0, 0b01) + F(ldrsb, 0b00, 0, 0b10) + F(ldrsb_w, 0b00, 0, 0b11) + + F(strh, 0b01, 0, 0b00) + F(ldrh, 0b01, 0, 0b01) + F(ldrsh, 0b01, 0, 0b10) + F(ldrsh_w, 0b01, 0, 0b11) + + F(str_w, 0b10, 0, 0b00) + F(ldr_w, 0b10, 0, 0b01) + F(ldrsw, 0b10, 0, 0b10) + + F(str, 0b11, 0, 0b00) + F(ldr, 0b11, 0, 0b01) +#undef F + +#define F(mnemonic, size, V, opc) \ + void mnemonic(AsmPrefetchOp prfop, Address addr) { \ + assert (addr.mode() == basic_offset, #mnemonic " supports only basic_offset address mode"); \ + if (addr.index() == noreg) { \ + if (is_unsigned_imm_in_range(addr.disp(), 12, size)) { \ + emit_int32(size << 30 | 0b111 << 27 | V << 26 | 0b01 << 24 | opc << 22 | \ + encode_unsigned_imm(addr.disp(), 12, size, 10) | \ + addr.base()->encoding_with_sp() << 5 | prfop); \ + } else { \ + assert(is_imm_in_range(addr.disp(), 9, 0), "offset is out of range"); \ + emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | encode_imm(addr.disp(), 9, 0, 12) | \ + addr.base()->encoding_with_sp() << 5 | prfop); \ + } \ + } else { \ + assert (addr.disp() == 0, "non-zero displacement for [reg + reg] address mode"); \ + assert ((addr.shift_imm() == 0) || (addr.shift_imm() == size), "invalid shift amount"); \ + emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | 1 << 21 | \ + addr.index()->encoding_with_zr() << 16 | addr.extend() << 13 | (addr.shift_imm() != 0) << 12 | \ + 0b10 << 10 | addr.base()->encoding_with_sp() << 5 | prfop); \ + } \ + } + + F(prfm, 0b11, 0, 0b10) +#undef F + +#define F(mnemonic, size, V, opc) \ + void mnemonic(FloatRegister rt, Address addr) { \ + int align_bits = (((opc & 0b10) >> 1) << 2) | size; \ + if (addr.index() == noreg) { \ + if ((addr.mode() == basic_offset) && is_unsigned_imm_in_range(addr.disp(), 12, align_bits)) { \ + emit_int32(size << 30 | 0b111 << 27 | V << 26 | 0b01 << 24 | opc << 22 | \ + encode_unsigned_imm(addr.disp(), 12, align_bits, 10) | \ + addr.base()->encoding_with_sp() << 5 | rt->encoding()); \ + } else { \ + assert(is_imm_in_range(addr.disp(), 9, 0), "offset is out of range"); \ + emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | encode_imm(addr.disp(), 9, 0, 12) | \ + addr.mode() << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding()); \ + } \ + } else { \ + assert (addr.disp() == 0, "non-zero displacement for [reg + reg] address mode"); \ + assert ((addr.shift_imm() == 0) || (addr.shift_imm() == align_bits), "invalid shift amount"); \ + emit_int32(size << 30 | 0b111 << 27 | V << 26 | opc << 22 | 1 << 21 | \ + addr.index()->encoding_with_zr() << 16 | addr.extend() << 13 | (addr.shift_imm() != 0) << 12 | \ + 0b10 << 10 | addr.base()->encoding_with_sp() << 5 | rt->encoding()); \ + } \ + } + + F(str_b, 0b00, 1, 0b00) + F(ldr_b, 0b00, 1, 0b01) + F(str_h, 0b01, 1, 0b00) + F(ldr_h, 0b01, 1, 0b01) + F(str_s, 0b10, 1, 0b00) + F(ldr_s, 0b10, 1, 0b01) + F(str_d, 0b11, 1, 0b00) + F(ldr_d, 0b11, 1, 0b01) + F(str_q, 0b00, 1, 0b10) + F(ldr_q, 0b00, 1, 0b11) +#undef F + +#define F(mnemonic, opc, V, L) \ + void mnemonic(Register rt, Register rt2, Address addr) { \ + assert((addr.mode() == basic_offset) || ((rt != addr.base()) && (rt2 != addr.base())), "should be different"); \ + assert(!L || (rt != rt2), "should be different"); \ + assert(addr.index() == noreg, "[reg + reg] address mode is not available for load/store pair"); \ + int align_bits = 2 + (opc >> 1); \ + int mode_encoding = (addr.mode() == basic_offset) ? 0b10 : addr.mode(); \ + assert(is_imm_in_range(addr.disp(), 7, align_bits), "offset is out of range"); \ + emit_int32(opc << 30 | 0b101 << 27 | V << 26 | mode_encoding << 23 | L << 22 | \ + encode_imm(addr.disp(), 7, align_bits, 15) | rt2->encoding_with_zr() << 10 | \ + addr.base()->encoding_with_sp() << 5 | rt->encoding_with_zr()); \ + } + + F(stp_w, 0b00, 0, 0) + F(ldp_w, 0b00, 0, 1) + F(ldpsw, 0b01, 0, 1) + F(stp, 0b10, 0, 0) + F(ldp, 0b10, 0, 1) +#undef F + +#define F(mnemonic, opc, V, L) \ + void mnemonic(FloatRegister rt, FloatRegister rt2, Address addr) { \ + assert(!L || (rt != rt2), "should be different"); \ + assert(addr.index() == noreg, "[reg + reg] address mode is not available for load/store pair"); \ + int align_bits = 2 + opc; \ + int mode_encoding = (addr.mode() == basic_offset) ? 0b10 : addr.mode(); \ + assert(is_imm_in_range(addr.disp(), 7, align_bits), "offset is out of range"); \ + emit_int32(opc << 30 | 0b101 << 27 | V << 26 | mode_encoding << 23 | L << 22 | \ + encode_imm(addr.disp(), 7, align_bits, 15) | rt2->encoding() << 10 | \ + addr.base()->encoding_with_sp() << 5 | rt->encoding()); \ + } + + F(stp_s, 0b00, 1, 0) + F(ldp_s, 0b00, 1, 1) + F(stp_d, 0b01, 1, 0) + F(ldp_d, 0b01, 1, 1) + F(stp_q, 0b10, 1, 0) + F(ldp_q, 0b10, 1, 1) +#undef F + + // Data processing instructions + +#define F(mnemonic, sf, opc) \ + void mnemonic(Register rd, Register rn, const LogicalImmediate& imm) { \ + assert (imm.is_encoded(), "illegal immediate for logical instruction"); \ + assert (imm.is32bit() == (sf == 0), "immediate size does not match instruction size"); \ + emit_int32(sf << 31 | opc << 29 | 0b100100 << 23 | imm.immN() << 22 | imm.immr() << 16 | \ + imm.imms() << 10 | rn->encoding_with_zr() << 5 | \ + ((opc == 0b11) ? rd->encoding_with_zr() : rd->encoding_with_sp())); \ + } \ + void mnemonic(Register rd, Register rn, uintx imm) { \ + LogicalImmediate limm(imm, (sf == 0)); \ + mnemonic(rd, rn, limm); \ + } \ + void mnemonic(Register rd, Register rn, unsigned int imm) { \ + mnemonic(rd, rn, (uintx)imm); \ + } + + F(andr_w, 0, 0b00) + F(orr_w, 0, 0b01) + F(eor_w, 0, 0b10) + F(ands_w, 0, 0b11) + + F(andr, 1, 0b00) + F(orr, 1, 0b01) + F(eor, 1, 0b10) + F(ands, 1, 0b11) +#undef F + + void tst(Register rn, unsigned int imm) { + ands(ZR, rn, imm); + } + + void tst_w(Register rn, unsigned int imm) { + ands_w(ZR, rn, imm); + } + +#define F(mnemonic, sf, opc, N) \ + void mnemonic(Register rd, Register rn, AsmOperand operand) { \ + assert (operand.shift_imm() >> (5 + sf) == 0, "shift amount is too large"); \ + emit_int32(sf << 31 | opc << 29 | 0b01010 << 24 | operand.shift() << 22 | N << 21 | \ + operand.reg()->encoding_with_zr() << 16 | operand.shift_imm() << 10 | \ + rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ + } + + F(andr_w, 0, 0b00, 0) + F(bic_w, 0, 0b00, 1) + F(orr_w, 0, 0b01, 0) + F(orn_w, 0, 0b01, 1) + F(eor_w, 0, 0b10, 0) + F(eon_w, 0, 0b10, 1) + F(ands_w, 0, 0b11, 0) + F(bics_w, 0, 0b11, 1) + + F(andr, 1, 0b00, 0) + F(bic, 1, 0b00, 1) + F(orr, 1, 0b01, 0) + F(orn, 1, 0b01, 1) + F(eor, 1, 0b10, 0) + F(eon, 1, 0b10, 1) + F(ands, 1, 0b11, 0) + F(bics, 1, 0b11, 1) +#undef F + + void tst(Register rn, AsmOperand operand) { + ands(ZR, rn, operand); + } + + void tst_w(Register rn, AsmOperand operand) { + ands_w(ZR, rn, operand); + } + + void mvn(Register rd, AsmOperand operand) { + orn(rd, ZR, operand); + } + + void mvn_w(Register rd, AsmOperand operand) { + orn_w(rd, ZR, operand); + } + +#define F(mnemonic, sf, op, S) \ + void mnemonic(Register rd, Register rn, const ArithmeticImmediate& imm) { \ + assert(imm.is_encoded(), "immediate is out of range"); \ + emit_int32(sf << 31 | op << 30 | S << 29 | 0b10001 << 24 | imm.shift() << 22 | \ + imm.imm() << 10 | rn->encoding_with_sp() << 5 | \ + (S == 1 ? rd->encoding_with_zr() : rd->encoding_with_sp())); \ + } \ + void mnemonic(Register rd, Register rn, int imm) { \ + mnemonic(rd, rn, ArithmeticImmediate(imm)); \ + } \ + void mnemonic(Register rd, Register rn, int imm, AsmShift12 shift) { \ + mnemonic(rd, rn, ArithmeticImmediate(imm, shift)); \ + } \ + void mnemonic(Register rd, Register rn, Register rm, AsmExtendOp extend, int shift_imm = 0) { \ + assert ((0 <= shift_imm) && (shift_imm <= 4), "shift amount is out of range"); \ + emit_int32(sf << 31 | op << 30 | S << 29 | 0b01011001 << 21 | rm->encoding_with_zr() << 16 | \ + extend << 13 | shift_imm << 10 | rn->encoding_with_sp() << 5 | \ + (S == 1 ? rd->encoding_with_zr() : rd->encoding_with_sp())); \ + } \ + void mnemonic(Register rd, Register rn, AsmOperand operand) { \ + assert (operand.shift() != ror, "illegal shift type"); \ + assert (operand.shift_imm() >> (5 + sf) == 0, "shift amount is too large"); \ + emit_int32(sf << 31 | op << 30 | S << 29 | 0b01011 << 24 | operand.shift() << 22 | \ + operand.reg()->encoding_with_zr() << 16 | operand.shift_imm() << 10 | \ + rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ + } + + F(add_w, 0, 0, 0) + F(adds_w, 0, 0, 1) + F(sub_w, 0, 1, 0) + F(subs_w, 0, 1, 1) + + F(add, 1, 0, 0) + F(adds, 1, 0, 1) + F(sub, 1, 1, 0) + F(subs, 1, 1, 1) +#undef F + + void mov(Register rd, Register rm) { + if ((rd == SP) || (rm == SP)) { + add(rd, rm, 0); + } else { + orr(rd, ZR, rm); + } + } + + void mov_w(Register rd, Register rm) { + if ((rd == SP) || (rm == SP)) { + add_w(rd, rm, 0); + } else { + orr_w(rd, ZR, rm); + } + } + + void cmp(Register rn, int imm) { + subs(ZR, rn, imm); + } + + void cmp_w(Register rn, int imm) { + subs_w(ZR, rn, imm); + } + + void cmp(Register rn, Register rm) { + assert (rm != SP, "SP should not be used as the 2nd operand of cmp"); + if (rn == SP) { + subs(ZR, rn, rm, ex_uxtx); + } else { + subs(ZR, rn, rm); + } + } + + void cmp_w(Register rn, Register rm) { + assert ((rn != SP) && (rm != SP), "SP should not be used in 32-bit cmp"); + subs_w(ZR, rn, rm); + } + + void cmp(Register rn, AsmOperand operand) { + assert (rn != SP, "SP is not allowed in cmp with shifted register (AsmOperand)"); + subs(ZR, rn, operand); + } + + void cmn(Register rn, int imm) { + adds(ZR, rn, imm); + } + + void cmn_w(Register rn, int imm) { + adds_w(ZR, rn, imm); + } + + void cmn(Register rn, Register rm) { + assert (rm != SP, "SP should not be used as the 2nd operand of cmp"); + if (rn == SP) { + adds(ZR, rn, rm, ex_uxtx); + } else { + adds(ZR, rn, rm); + } + } + + void cmn_w(Register rn, Register rm) { + assert ((rn != SP) && (rm != SP), "SP should not be used in 32-bit cmp"); + adds_w(ZR, rn, rm); + } + + void neg(Register rd, Register rm) { + sub(rd, ZR, rm); + } + + void neg_w(Register rd, Register rm) { + sub_w(rd, ZR, rm); + } + +#define F(mnemonic, sf, op, S) \ + void mnemonic(Register rd, Register rn, Register rm) { \ + emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010000 << 21 | rm->encoding_with_zr() << 16 | \ + rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ + } + + F(adc_w, 0, 0, 0) + F(adcs_w, 0, 0, 1) + F(sbc_w, 0, 1, 0) + F(sbcs_w, 0, 1, 1) + + F(adc, 1, 0, 0) + F(adcs, 1, 0, 1) + F(sbc, 1, 1, 0) + F(sbcs, 1, 1, 1) +#undef F + +#define F(mnemonic, sf, N) \ + void mnemonic(Register rd, Register rn, Register rm, int lsb) { \ + assert ((lsb >> (5 + sf)) == 0, "illegal least significant bit position"); \ + emit_int32(sf << 31 | 0b100111 << 23 | N << 22 | rm->encoding_with_zr() << 16 | \ + lsb << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ + } + + F(extr_w, 0, 0) + F(extr, 1, 1) +#undef F + +#define F(mnemonic, sf, opc) \ + void mnemonic(Register rd, int imm, int shift) { \ + assert ((imm >> 16) == 0, "immediate is out of range"); \ + assert (((shift & 0xf) == 0) && ((shift >> (5 + sf)) == 0), "invalid shift"); \ + emit_int32(sf << 31 | opc << 29 | 0b100101 << 23 | (shift >> 4) << 21 | \ + imm << 5 | rd->encoding_with_zr()); \ + } + + F(movn_w, 0, 0b00) + F(movz_w, 0, 0b10) + F(movk_w, 0, 0b11) + F(movn, 1, 0b00) + F(movz, 1, 0b10) + F(movk, 1, 0b11) +#undef F + + void mov(Register rd, int imm) { + assert ((imm >> 16) == 0, "immediate is out of range"); + movz(rd, imm, 0); + } + + void mov_w(Register rd, int imm) { + assert ((imm >> 16) == 0, "immediate is out of range"); + movz_w(rd, imm, 0); + } + +#define F(mnemonic, sf, op, S) \ + void mnemonic(Register rn, int imm, int nzcv, AsmCondition cond) { \ + assert ((imm >> 5) == 0, "immediate is out of range"); \ + assert ((nzcv >> 4) == 0, "illegal nzcv"); \ + emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010010 << 21 | imm << 16 | \ + cond << 12 | 1 << 11 | rn->encoding_with_zr() << 5 | nzcv); \ + } + + F(ccmn_w, 0, 0, 1) + F(ccmp_w, 0, 1, 1) + F(ccmn, 1, 0, 1) + F(ccmp, 1, 1, 1) +#undef F + +#define F(mnemonic, sf, op, S) \ + void mnemonic(Register rn, Register rm, int nzcv, AsmCondition cond) { \ + assert ((nzcv >> 4) == 0, "illegal nzcv"); \ + emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010010 << 21 | rm->encoding_with_zr() << 16 | \ + cond << 12 | rn->encoding_with_zr() << 5 | nzcv); \ + } + + F(ccmn_w, 0, 0, 1) + F(ccmp_w, 0, 1, 1) + F(ccmn, 1, 0, 1) + F(ccmp, 1, 1, 1) +#undef F + +#define F(mnemonic, sf, op, S, op2) \ + void mnemonic(Register rd, Register rn, Register rm, AsmCondition cond) { \ + emit_int32(sf << 31 | op << 30 | S << 29 | 0b11010100 << 21 | rm->encoding_with_zr() << 16 | \ + cond << 12 | op2 << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ + } + + F(csel_w, 0, 0, 0, 0b00) + F(csinc_w, 0, 0, 0, 0b01) + F(csinv_w, 0, 1, 0, 0b00) + F(csneg_w, 0, 1, 0, 0b01) + + F(csel, 1, 0, 0, 0b00) + F(csinc, 1, 0, 0, 0b01) + F(csinv, 1, 1, 0, 0b00) + F(csneg, 1, 1, 0, 0b01) +#undef F + + void cset(Register rd, AsmCondition cond) { + csinc(rd, ZR, ZR, inverse(cond)); + } + + void cset_w(Register rd, AsmCondition cond) { + csinc_w(rd, ZR, ZR, inverse(cond)); + } + + void csetm(Register rd, AsmCondition cond) { + csinv(rd, ZR, ZR, inverse(cond)); + } + + void csetm_w(Register rd, AsmCondition cond) { + csinv_w(rd, ZR, ZR, inverse(cond)); + } + + void cinc(Register rd, Register rn, AsmCondition cond) { + csinc(rd, rn, rn, inverse(cond)); + } + + void cinc_w(Register rd, Register rn, AsmCondition cond) { + csinc_w(rd, rn, rn, inverse(cond)); + } + + void cinv(Register rd, Register rn, AsmCondition cond) { + csinv(rd, rn, rn, inverse(cond)); + } + + void cinv_w(Register rd, Register rn, AsmCondition cond) { + csinv_w(rd, rn, rn, inverse(cond)); + } + +#define F(mnemonic, sf, S, opcode) \ + void mnemonic(Register rd, Register rn) { \ + emit_int32(sf << 31 | 1 << 30 | S << 29 | 0b11010110 << 21 | opcode << 10 | \ + rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ + } + + F(rbit_w, 0, 0, 0b000000) + F(rev16_w, 0, 0, 0b000001) + F(rev_w, 0, 0, 0b000010) + F(clz_w, 0, 0, 0b000100) + F(cls_w, 0, 0, 0b000101) + + F(rbit, 1, 0, 0b000000) + F(rev16, 1, 0, 0b000001) + F(rev32, 1, 0, 0b000010) + F(rev, 1, 0, 0b000011) + F(clz, 1, 0, 0b000100) + F(cls, 1, 0, 0b000101) +#undef F + +#define F(mnemonic, sf, S, opcode) \ + void mnemonic(Register rd, Register rn, Register rm) { \ + emit_int32(sf << 31 | S << 29 | 0b11010110 << 21 | rm->encoding_with_zr() << 16 | \ + opcode << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ + } + + F(udiv_w, 0, 0, 0b000010) + F(sdiv_w, 0, 0, 0b000011) + F(lslv_w, 0, 0, 0b001000) + F(lsrv_w, 0, 0, 0b001001) + F(asrv_w, 0, 0, 0b001010) + F(rorv_w, 0, 0, 0b001011) + + F(udiv, 1, 0, 0b000010) + F(sdiv, 1, 0, 0b000011) + F(lslv, 1, 0, 0b001000) + F(lsrv, 1, 0, 0b001001) + F(asrv, 1, 0, 0b001010) + F(rorv, 1, 0, 0b001011) +#undef F + +#define F(mnemonic, sf, op31, o0) \ + void mnemonic(Register rd, Register rn, Register rm, Register ra) { \ + emit_int32(sf << 31 | 0b11011 << 24 | op31 << 21 | rm->encoding_with_zr() << 16 | \ + o0 << 15 | ra->encoding_with_zr() << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ + } + + F(madd_w, 0, 0b000, 0) + F(msub_w, 0, 0b000, 1) + F(madd, 1, 0b000, 0) + F(msub, 1, 0b000, 1) + + F(smaddl, 1, 0b001, 0) + F(smsubl, 1, 0b001, 1) + F(umaddl, 1, 0b101, 0) + F(umsubl, 1, 0b101, 1) +#undef F + + void mul(Register rd, Register rn, Register rm) { + madd(rd, rn, rm, ZR); + } + + void mul_w(Register rd, Register rn, Register rm) { + madd_w(rd, rn, rm, ZR); + } + +#define F(mnemonic, sf, op31, o0) \ + void mnemonic(Register rd, Register rn, Register rm) { \ + emit_int32(sf << 31 | 0b11011 << 24 | op31 << 21 | rm->encoding_with_zr() << 16 | \ + o0 << 15 | 0b11111 << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ + } + + F(smulh, 1, 0b010, 0) + F(umulh, 1, 0b110, 0) +#undef F + +#define F(mnemonic, op) \ + void mnemonic(Register rd, address addr) { \ + intx offset; \ + if (op == 0) { \ + offset = addr - pc(); \ + } else { \ + offset = (((intx)addr) - (((intx)pc()) & ~0xfff)) >> 12; \ + } \ + assert (is_imm_in_range(offset, 21, 0), "offset is out of range"); \ + emit_int32(op << 31 | (offset & 3) << 29 | 0b10000 << 24 | \ + encode_imm(offset >> 2, 19, 0, 5) | rd->encoding_with_zr()); \ + } \ + + F(adr, 0) + F(adrp, 1) +#undef F + + void adr(Register rd, Label& L) { + adr(rd, target(L)); + } + +#define F(mnemonic, sf, opc, N) \ + void mnemonic(Register rd, Register rn, int immr, int imms) { \ + assert ((immr >> (5 + sf)) == 0, "immr is out of range"); \ + assert ((imms >> (5 + sf)) == 0, "imms is out of range"); \ + emit_int32(sf << 31 | opc << 29 | 0b100110 << 23 | N << 22 | immr << 16 | \ + imms << 10 | rn->encoding_with_zr() << 5 | rd->encoding_with_zr()); \ + } + + F(sbfm_w, 0, 0b00, 0) + F(bfm_w, 0, 0b01, 0) + F(ubfm_w, 0, 0b10, 0) + + F(sbfm, 1, 0b00, 1) + F(bfm, 1, 0b01, 1) + F(ubfm, 1, 0b10, 1) +#undef F + +#define F(alias, mnemonic, sf, immr, imms) \ + void alias(Register rd, Register rn, int lsb, int width) { \ + assert ((lsb >> (5 + sf)) == 0, "lsb is out of range"); \ + assert ((1 <= width) && (width <= (32 << sf) - lsb), "width is out of range"); \ + mnemonic(rd, rn, immr, imms); \ + } + + F(bfi_w, bfm_w, 0, (-lsb) & 0x1f, width - 1) + F(bfi, bfm, 1, (-lsb) & 0x3f, width - 1) + F(bfxil_w, bfm_w, 0, lsb, lsb + width - 1) + F(bfxil, bfm, 1, lsb, lsb + width - 1) + F(sbfiz_w, sbfm_w, 0, (-lsb) & 0x1f, width - 1) + F(sbfiz, sbfm, 1, (-lsb) & 0x3f, width - 1) + F(sbfx_w, sbfm_w, 0, lsb, lsb + width - 1) + F(sbfx, sbfm, 1, lsb, lsb + width - 1) + F(ubfiz_w, ubfm_w, 0, (-lsb) & 0x1f, width - 1) + F(ubfiz, ubfm, 1, (-lsb) & 0x3f, width - 1) + F(ubfx_w, ubfm_w, 0, lsb, lsb + width - 1) + F(ubfx, ubfm, 1, lsb, lsb + width - 1) +#undef F + +#define F(alias, mnemonic, sf, immr, imms) \ + void alias(Register rd, Register rn, int shift) { \ + assert ((shift >> (5 + sf)) == 0, "shift is out of range"); \ + mnemonic(rd, rn, immr, imms); \ + } + + F(_asr_w, sbfm_w, 0, shift, 31) + F(_asr, sbfm, 1, shift, 63) + F(_lsl_w, ubfm_w, 0, (-shift) & 0x1f, 31 - shift) + F(_lsl, ubfm, 1, (-shift) & 0x3f, 63 - shift) + F(_lsr_w, ubfm_w, 0, shift, 31) + F(_lsr, ubfm, 1, shift, 63) +#undef F + +#define F(alias, mnemonic, immr, imms) \ + void alias(Register rd, Register rn) { \ + mnemonic(rd, rn, immr, imms); \ + } + + F(sxtb_w, sbfm_w, 0, 7) + F(sxtb, sbfm, 0, 7) + F(sxth_w, sbfm_w, 0, 15) + F(sxth, sbfm, 0, 15) + F(sxtw, sbfm, 0, 31) + F(uxtb_w, ubfm_w, 0, 7) + F(uxtb, ubfm, 0, 7) + F(uxth_w, ubfm_w, 0, 15) + F(uxth, ubfm, 0, 15) +#undef F + + // Branch instructions + +#define F(mnemonic, op) \ + void mnemonic(Register rn) { \ + emit_int32(0b1101011 << 25 | op << 21 | 0b11111 << 16 | rn->encoding_with_zr() << 5); \ + } + + F(br, 0b00) + F(blr, 0b01) + F(ret, 0b10) +#undef F + + void ret() { + ret(LR); + } + +#define F(mnemonic, op) \ + void mnemonic(address target) { \ + intx offset = target - pc(); \ + assert (is_offset_in_range(offset, 26), "offset is out of range"); \ + emit_int32(op << 31 | 0b00101 << 26 | encode_offset(offset, 26, 0)); \ + } + + F(b, 0) + F(bl, 1) +#undef F + + void b(address target, AsmCondition cond) { + if (cond == al) { + b(target); + } else { + intx offset = target - pc(); + assert (is_offset_in_range(offset, 19), "offset is out of range"); + emit_int32(0b0101010 << 25 | encode_offset(offset, 19, 5) | cond); + } + } + + +#define F(mnemonic, sf, op) \ + void mnemonic(Register rt, address target) { \ + intx offset = target - pc(); \ + assert (is_offset_in_range(offset, 19), "offset is out of range"); \ + emit_int32(sf << 31 | 0b011010 << 25 | op << 24 | encode_offset(offset, 19, 5) | rt->encoding_with_zr()); \ + } \ + + F(cbz_w, 0, 0) + F(cbnz_w, 0, 1) + F(cbz, 1, 0) + F(cbnz, 1, 1) +#undef F + +#define F(mnemonic, op) \ + void mnemonic(Register rt, int bit, address target) { \ + intx offset = target - pc(); \ + assert (is_offset_in_range(offset, 14), "offset is out of range"); \ + assert (0 <= bit && bit < 64, "bit number is out of range"); \ + emit_int32((bit >> 5) << 31 | 0b011011 << 25 | op << 24 | (bit & 0x1f) << 19 | \ + encode_offset(offset, 14, 5) | rt->encoding_with_zr()); \ + } \ + + F(tbz, 0) + F(tbnz, 1) +#undef F + + // System instructions + + enum DMB_Opt { + DMB_ld = 0b1101, + DMB_st = 0b1110, + DMB_all = 0b1111 + }; + +#define F(mnemonic, L, op0, op1, CRn, op2, Rt) \ + void mnemonic(DMB_Opt option) { \ + emit_int32(0b1101010100 << 22 | L << 21 | op0 << 19 | op1 << 16 | \ + CRn << 12 | option << 8 | op2 << 5 | Rt); \ + } + + F(dsb, 0, 0b00, 0b011, 0b0011, 0b100, 0b11111) + F(dmb, 0, 0b00, 0b011, 0b0011, 0b101, 0b11111) +#undef F + +#define F(mnemonic, L, op0, op1, CRn, Rt) \ + void mnemonic(int imm) { \ + assert ((imm >> 7) == 0, "immediate is out of range"); \ + emit_int32(0b1101010100 << 22 | L << 21 | op0 << 19 | op1 << 16 | \ + CRn << 12 | imm << 5 | Rt); \ + } + + F(hint, 0, 0b00, 0b011, 0b0010, 0b11111) +#undef F + + void nop() { + hint(0); + } + + void yield() { + hint(1); + } + +#define F(mnemonic, opc, op2, LL) \ + void mnemonic(int imm = 0) { \ + assert ((imm >> 16) == 0, "immediate is out of range"); \ + emit_int32(0b11010100 << 24 | opc << 21 | imm << 5 | op2 << 2 | LL); \ + } + + F(brk, 0b001, 0b000, 0b00) + F(hlt, 0b010, 0b000, 0b00) +#undef F + + enum SystemRegister { // o0<1> op1<3> CRn<4> CRm<4> op2<3> + SysReg_NZCV = 0b101101000010000, + SysReg_FPCR = 0b101101000100000, + }; + + void mrs(Register rt, SystemRegister systemReg) { + assert ((systemReg >> 15) == 0, "systemReg is out of range"); + emit_int32(0b110101010011 << 20 | systemReg << 5 | rt->encoding_with_zr()); + } + + void msr(SystemRegister systemReg, Register rt) { + assert ((systemReg >> 15) == 0, "systemReg is out of range"); + emit_int32(0b110101010001 << 20 | systemReg << 5 | rt->encoding_with_zr()); + } + + // Floating-point instructions + +#define F(mnemonic, M, S, type, opcode2) \ + void mnemonic(FloatRegister rn, FloatRegister rm) { \ + emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ + rm->encoding() << 16 | 0b1000 << 10 | rn->encoding() << 5 | opcode2); \ + } + + F(fcmp_s, 0, 0, 0b00, 0b00000) + F(fcmpe_s, 0, 0, 0b00, 0b01000) + F(fcmp_d, 0, 0, 0b01, 0b00000) + F(fcmpe_d, 0, 0, 0b01, 0b10000) +#undef F + +#define F(mnemonic, M, S, type, opcode2) \ + void mnemonic(FloatRegister rn) { \ + emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ + 0b1000 << 10 | rn->encoding() << 5 | opcode2); \ + } + + F(fcmp0_s, 0, 0, 0b00, 0b01000) + F(fcmpe0_s, 0, 0, 0b00, 0b11000) + F(fcmp0_d, 0, 0, 0b01, 0b01000) + F(fcmpe0_d, 0, 0, 0b01, 0b11000) +#undef F + +#define F(mnemonic, M, S, type, op) \ + void mnemonic(FloatRegister rn, FloatRegister rm, int nzcv, AsmCondition cond) { \ + assert ((nzcv >> 4) == 0, "illegal nzcv"); \ + emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ + rm->encoding() << 16 | cond << 12 | 0b01 << 10 | rn->encoding() << 5 | op << 4 | nzcv); \ + } + + F(fccmp_s, 0, 0, 0b00, 0) + F(fccmpe_s, 0, 0, 0b00, 1) + F(fccmp_d, 0, 0, 0b01, 0) + F(fccmpe_d, 0, 0, 0b01, 1) +#undef F + +#define F(mnemonic, M, S, type) \ + void mnemonic(FloatRegister rd, FloatRegister rn, FloatRegister rm, AsmCondition cond) { \ + emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ + rm->encoding() << 16 | cond << 12 | 0b11 << 10 | rn->encoding() << 5 | rd->encoding()); \ + } + + F(fcsel_s, 0, 0, 0b00) + F(fcsel_d, 0, 0, 0b01) +#undef F + +#define F(mnemonic, M, S, type, opcode) \ + void mnemonic(FloatRegister rd, FloatRegister rn) { \ + emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ + opcode << 15 | 0b10000 << 10 | rn->encoding() << 5 | rd->encoding()); \ + } + + F(fmov_s, 0, 0, 0b00, 0b000000) + F(fabs_s, 0, 0, 0b00, 0b000001) + F(fneg_s, 0, 0, 0b00, 0b000010) + F(fsqrt_s, 0, 0, 0b00, 0b000011) + F(fcvt_ds, 0, 0, 0b00, 0b000101) + F(fcvt_hs, 0, 0, 0b00, 0b000111) + F(frintn_s, 0, 0, 0b00, 0b001000) + F(frintp_s, 0, 0, 0b00, 0b001001) + F(frintm_s, 0, 0, 0b00, 0b001010) + F(frintz_s, 0, 0, 0b00, 0b001011) + F(frinta_s, 0, 0, 0b00, 0b001100) + F(frintx_s, 0, 0, 0b00, 0b001110) + F(frinti_s, 0, 0, 0b00, 0b001111) + + F(fmov_d, 0, 0, 0b01, 0b000000) + F(fabs_d, 0, 0, 0b01, 0b000001) + F(fneg_d, 0, 0, 0b01, 0b000010) + F(fsqrt_d, 0, 0, 0b01, 0b000011) + F(fcvt_sd, 0, 0, 0b01, 0b000100) + F(fcvt_hd, 0, 0, 0b01, 0b000111) + F(frintn_d, 0, 0, 0b01, 0b001000) + F(frintp_d, 0, 0, 0b01, 0b001001) + F(frintm_d, 0, 0, 0b01, 0b001010) + F(frintz_d, 0, 0, 0b01, 0b001011) + F(frinta_d, 0, 0, 0b01, 0b001100) + F(frintx_d, 0, 0, 0b01, 0b001110) + F(frinti_d, 0, 0, 0b01, 0b001111) + + F(fcvt_sh, 0, 0, 0b11, 0b000100) + F(fcvt_dh, 0, 0, 0b11, 0b000101) +#undef F + +#define F(mnemonic, M, S, type, opcode) \ + void mnemonic(FloatRegister rd, FloatRegister rn, FloatRegister rm) { \ + emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ + rm->encoding() << 16 | opcode << 12 | 0b10 << 10 | rn->encoding() << 5 | rd->encoding()); \ + } + + F(fmul_s, 0, 0, 0b00, 0b0000) + F(fdiv_s, 0, 0, 0b00, 0b0001) + F(fadd_s, 0, 0, 0b00, 0b0010) + F(fsub_s, 0, 0, 0b00, 0b0011) + F(fmax_s, 0, 0, 0b00, 0b0100) + F(fmin_s, 0, 0, 0b00, 0b0101) + F(fmaxnm_s, 0, 0, 0b00, 0b0110) + F(fminnm_s, 0, 0, 0b00, 0b0111) + F(fnmul_s, 0, 0, 0b00, 0b1000) + + F(fmul_d, 0, 0, 0b01, 0b0000) + F(fdiv_d, 0, 0, 0b01, 0b0001) + F(fadd_d, 0, 0, 0b01, 0b0010) + F(fsub_d, 0, 0, 0b01, 0b0011) + F(fmax_d, 0, 0, 0b01, 0b0100) + F(fmin_d, 0, 0, 0b01, 0b0101) + F(fmaxnm_d, 0, 0, 0b01, 0b0110) + F(fminnm_d, 0, 0, 0b01, 0b0111) + F(fnmul_d, 0, 0, 0b01, 0b1000) +#undef F + +#define F(mnemonic, M, S, type, o1, o0) \ + void mnemonic(FloatRegister rd, FloatRegister rn, FloatRegister rm, FloatRegister ra) { \ + emit_int32(M << 31 | S << 29 | 0b11111 << 24 | type << 22 | o1 << 21 | rm->encoding() << 16 | \ + o0 << 15 | ra->encoding() << 10 | rn->encoding() << 5 | rd->encoding()); \ + } + + F(fmadd_s, 0, 0, 0b00, 0, 0) + F(fmsub_s, 0, 0, 0b00, 0, 1) + F(fnmadd_s, 0, 0, 0b00, 1, 0) + F(fnmsub_s, 0, 0, 0b00, 1, 1) + + F(fmadd_d, 0, 0, 0b01, 0, 0) + F(fmsub_d, 0, 0, 0b01, 0, 1) + F(fnmadd_d, 0, 0, 0b01, 1, 0) + F(fnmsub_d, 0, 0, 0b01, 1, 1) +#undef F + +#define F(mnemonic, M, S, type) \ + void mnemonic(FloatRegister rd, int imm8) { \ + assert ((imm8 >> 8) == 0, "immediate is out of range"); \ + emit_int32(M << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ + imm8 << 13 | 0b100 << 10 | rd->encoding()); \ + } + + F(fmov_s, 0, 0, 0b00) + F(fmov_d, 0, 0, 0b01) +#undef F + +#define F(mnemonic, sf, S, type, rmode, opcode) \ + void mnemonic(Register rd, FloatRegister rn) { \ + emit_int32(sf << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ + rmode << 19 | opcode << 16 | rn->encoding() << 5 | rd->encoding_with_zr()); \ + } + + F(fcvtns_ws, 0, 0, 0b00, 0b00, 0b000) + F(fcvtnu_ws, 0, 0, 0b00, 0b00, 0b001) + F(fcvtas_ws, 0, 0, 0b00, 0b00, 0b100) + F(fcvtau_ws, 0, 0, 0b00, 0b00, 0b101) + F(fmov_ws, 0, 0, 0b00, 0b00, 0b110) + F(fcvtps_ws, 0, 0, 0b00, 0b01, 0b000) + F(fcvtpu_ws, 0, 0, 0b00, 0b01, 0b001) + F(fcvtms_ws, 0, 0, 0b00, 0b10, 0b000) + F(fcvtmu_ws, 0, 0, 0b00, 0b10, 0b001) + F(fcvtzs_ws, 0, 0, 0b00, 0b11, 0b000) + F(fcvtzu_ws, 0, 0, 0b00, 0b11, 0b001) + + F(fcvtns_wd, 0, 0, 0b01, 0b00, 0b000) + F(fcvtnu_wd, 0, 0, 0b01, 0b00, 0b001) + F(fcvtas_wd, 0, 0, 0b01, 0b00, 0b100) + F(fcvtau_wd, 0, 0, 0b01, 0b00, 0b101) + F(fcvtps_wd, 0, 0, 0b01, 0b01, 0b000) + F(fcvtpu_wd, 0, 0, 0b01, 0b01, 0b001) + F(fcvtms_wd, 0, 0, 0b01, 0b10, 0b000) + F(fcvtmu_wd, 0, 0, 0b01, 0b10, 0b001) + F(fcvtzs_wd, 0, 0, 0b01, 0b11, 0b000) + F(fcvtzu_wd, 0, 0, 0b01, 0b11, 0b001) + + F(fcvtns_xs, 1, 0, 0b00, 0b00, 0b000) + F(fcvtnu_xs, 1, 0, 0b00, 0b00, 0b001) + F(fcvtas_xs, 1, 0, 0b00, 0b00, 0b100) + F(fcvtau_xs, 1, 0, 0b00, 0b00, 0b101) + F(fcvtps_xs, 1, 0, 0b00, 0b01, 0b000) + F(fcvtpu_xs, 1, 0, 0b00, 0b01, 0b001) + F(fcvtms_xs, 1, 0, 0b00, 0b10, 0b000) + F(fcvtmu_xs, 1, 0, 0b00, 0b10, 0b001) + F(fcvtzs_xs, 1, 0, 0b00, 0b11, 0b000) + F(fcvtzu_xs, 1, 0, 0b00, 0b11, 0b001) + + F(fcvtns_xd, 1, 0, 0b01, 0b00, 0b000) + F(fcvtnu_xd, 1, 0, 0b01, 0b00, 0b001) + F(fcvtas_xd, 1, 0, 0b01, 0b00, 0b100) + F(fcvtau_xd, 1, 0, 0b01, 0b00, 0b101) + F(fmov_xd, 1, 0, 0b01, 0b00, 0b110) + F(fcvtps_xd, 1, 0, 0b01, 0b01, 0b000) + F(fcvtpu_xd, 1, 0, 0b01, 0b01, 0b001) + F(fcvtms_xd, 1, 0, 0b01, 0b10, 0b000) + F(fcvtmu_xd, 1, 0, 0b01, 0b10, 0b001) + F(fcvtzs_xd, 1, 0, 0b01, 0b11, 0b000) + F(fcvtzu_xd, 1, 0, 0b01, 0b11, 0b001) + + F(fmov_xq, 1, 0, 0b10, 0b01, 0b110) +#undef F + +#define F(mnemonic, sf, S, type, rmode, opcode) \ + void mnemonic(FloatRegister rd, Register rn) { \ + emit_int32(sf << 31 | S << 29 | 0b11110 << 24 | type << 22 | 1 << 21 | \ + rmode << 19 | opcode << 16 | rn->encoding_with_zr() << 5 | rd->encoding()); \ + } + + F(scvtf_sw, 0, 0, 0b00, 0b00, 0b010) + F(ucvtf_sw, 0, 0, 0b00, 0b00, 0b011) + F(fmov_sw, 0, 0, 0b00, 0b00, 0b111) + F(scvtf_dw, 0, 0, 0b01, 0b00, 0b010) + F(ucvtf_dw, 0, 0, 0b01, 0b00, 0b011) + + F(scvtf_sx, 1, 0, 0b00, 0b00, 0b010) + F(ucvtf_sx, 1, 0, 0b00, 0b00, 0b011) + F(scvtf_dx, 1, 0, 0b01, 0b00, 0b010) + F(ucvtf_dx, 1, 0, 0b01, 0b00, 0b011) + F(fmov_dx, 1, 0, 0b01, 0b00, 0b111) + + F(fmov_qx, 1, 0, 0b10, 0b01, 0b111) +#undef F + +#define F(mnemonic, opcode) \ + void mnemonic(FloatRegister Vd, FloatRegister Vn) { \ + emit_int32( opcode << 10 | Vn->encoding() << 5 | Vd->encoding()); \ + } + + F(aese, 0b0100111000101000010010); + F(aesd, 0b0100111000101000010110); + F(aesmc, 0b0100111000101000011010); + F(aesimc, 0b0100111000101000011110); +#undef F + +#ifdef COMPILER2 + typedef VFP::double_num double_num; + typedef VFP::float_num float_num; +#endif + + void vcnt(FloatRegister Dd, FloatRegister Dn, int quad = 0, int size = 0) { + // emitted at VM startup to detect whether the instruction is available + assert(!VM_Version::is_initialized() || VM_Version::has_simd(), "simd instruction"); + assert(size == 0, "illegal size value"); + emit_int32(0x0e205800 | quad << 30 | size << 22 | Dn->encoding() << 5 | Dd->encoding()); + } + +#ifdef COMPILER2 + void addv(FloatRegister Dd, FloatRegister Dm, int quad, int size) { + // emitted at VM startup to detect whether the instruction is available + assert(VM_Version::has_simd(), "simd instruction"); + assert((quad & ~1) == 0, "illegal value"); + assert(size >= 0 && size < 3, "illegal value"); + assert(((size << 1) | quad) != 4, "illegal values (size 2, quad 0)"); + emit_int32(0x0e31b800 | quad << 30 | size << 22 | Dm->encoding() << 5 | Dd->encoding()); + } + + enum VElem_Size { + VELEM_SIZE_8 = 0x00, + VELEM_SIZE_16 = 0x01, + VELEM_SIZE_32 = 0x02, + VELEM_SIZE_64 = 0x03 + }; + + enum VLD_Type { + VLD1_TYPE_1_REG = 0b0111, + VLD1_TYPE_2_REGS = 0b1010, + VLD1_TYPE_3_REGS = 0b0110, + VLD1_TYPE_4_REGS = 0b0010 + }; + + enum VFloat_Arith_Size { + VFA_SIZE_F32 = 0b0, + VFA_SIZE_F64 = 0b1 + }; + +#define F(mnemonic, U, S, P) \ + void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm, \ + int size, int quad) { \ + assert(VM_Version::has_simd(), "simd instruction"); \ + assert(!(size == VFA_SIZE_F64 && !quad), "reserved"); \ + assert((size & 1) == size, "overflow"); \ + emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | \ + S << 23 | size << 22 | 1 << 21 | P << 11 | 1 << 10 | \ + fm->encoding() << 16 | \ + fn->encoding() << 5 | \ + fd->encoding()); \ + } + + F(vaddF, 0, 0, 0b11010) // Vd = Vn + Vm (float) + F(vsubF, 0, 1, 0b11010) // Vd = Vn - Vm (float) + F(vmulF, 1, 0, 0b11011) // Vd = Vn - Vm (float) + F(vdivF, 1, 0, 0b11111) // Vd = Vn / Vm (float) +#undef F + +#define F(mnemonic, U) \ + void mnemonic(FloatRegister fd, FloatRegister fm, FloatRegister fn, \ + int size, int quad) { \ + assert(VM_Version::has_simd(), "simd instruction"); \ + assert(!(size == VELEM_SIZE_64 && !quad), "reserved"); \ + assert((size & 0b11) == size, "overflow"); \ + int R = 0; /* rounding */ \ + int S = 0; /* saturating */ \ + emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | size << 22 | \ + 1 << 21 | R << 12 | S << 11 | 0b10001 << 10 | \ + fm->encoding() << 16 | \ + fn->encoding() << 5 | \ + fd->encoding()); \ + } + + F(vshlSI, 0) // Vd = ashift(Vn,Vm) (int) + F(vshlUI, 1) // Vd = lshift(Vn,Vm) (int) +#undef F + +#define F(mnemonic, U, P, M) \ + void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm, \ + int size, int quad) { \ + assert(VM_Version::has_simd(), "simd instruction"); \ + assert(!(size == VELEM_SIZE_64 && !quad), "reserved"); \ + assert(!(size == VELEM_SIZE_64 && M), "reserved"); \ + assert((size & 0b11) == size, "overflow"); \ + emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | size << 22 | \ + 1 << 21 | P << 11 | 1 << 10 | \ + fm->encoding() << 16 | \ + fn->encoding() << 5 | \ + fd->encoding()); \ + } + + F(vmulI, 0, 0b10011, true) // Vd = Vn * Vm (int) + F(vaddI, 0, 0b10000, false) // Vd = Vn + Vm (int) + F(vsubI, 1, 0b10000, false) // Vd = Vn - Vm (int) +#undef F + +#define F(mnemonic, U, O) \ + void mnemonic(FloatRegister fd, FloatRegister fn, FloatRegister fm, \ + int quad) { \ + assert(VM_Version::has_simd(), "simd instruction"); \ + emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | O << 22 | \ + 1 << 21 | 0b00011 << 11 | 1 << 10 | \ + fm->encoding() << 16 | \ + fn->encoding() << 5 | \ + fd->encoding()); \ + } + + F(vandI, 0, 0b00) // Vd = Vn & Vm (int) + F(vorI, 0, 0b10) // Vd = Vn | Vm (int) + F(vxorI, 1, 0b00) // Vd = Vn ^ Vm (int) +#undef F + + void vnegI(FloatRegister fd, FloatRegister fn, int size, int quad) { + int U = 1; + assert(VM_Version::has_simd(), "simd instruction"); + assert(quad || size != VELEM_SIZE_64, "reserved"); + emit_int32(quad << 30 | U << 29 | 0b01110 << 24 | + size << 22 | 0b100000101110 << 10 | + fn->encoding() << 5 | + fd->encoding() << 0); + } + + void vshli(FloatRegister fd, FloatRegister fn, int esize, int imm, int quad) { + assert(VM_Version::has_simd(), "simd instruction"); + + if (imm >= esize) { + // maximum shift gives all zeroes, direction doesn't matter, + // but only available for shift right + vshri(fd, fn, esize, esize, true /* unsigned */, quad); + return; + } + assert(imm >= 0 && imm < esize, "out of range"); + + int imm7 = esize + imm; + int immh = imm7 >> 3; + assert(immh != 0, "encoding constraint"); + assert((uint)immh < 16, "sanity"); + assert(((immh >> 2) | quad) != 0b10, "reserved"); + emit_int32(quad << 30 | 0b011110 << 23 | imm7 << 16 | + 0b010101 << 10 | fn->encoding() << 5 | fd->encoding() << 0); + } + + void vshri(FloatRegister fd, FloatRegister fn, int esize, int imm, + bool U /* unsigned */, int quad) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(imm > 0, "out of range"); + if (imm >= esize) { + // maximum shift (all zeroes) + imm = esize; + } + int imm7 = 2 * esize - imm ; + int immh = imm7 >> 3; + assert(immh != 0, "encoding constraint"); + assert((uint)immh < 16, "sanity"); + assert(((immh >> 2) | quad) != 0b10, "reserved"); + emit_int32(quad << 30 | U << 29 | 0b011110 << 23 | imm7 << 16 | + 0b000001 << 10 | fn->encoding() << 5 | fd->encoding() << 0); + } + void vshrUI(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) { + vshri(fd, fm, size, imm, true /* unsigned */, quad); + } + void vshrSI(FloatRegister fd, FloatRegister fm, int size, int imm, int quad) { + vshri(fd, fm, size, imm, false /* signed */, quad); + } + + void vld1(FloatRegister Vt, Address addr, VElem_Size size, int bits) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(bits == 128, "unsupported"); + assert(addr.disp() == 0 || addr.disp() == 16, "must be"); + int type = 0b11; // 2D + int quad = 1; + int L = 1; + int opcode = VLD1_TYPE_1_REG; + emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | + Vt->encoding() << 0 | addr.encoding_simd()); + } + + void vst1(FloatRegister Vt, Address addr, VElem_Size size, int bits) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(bits == 128, "unsupported"); + assert(addr.disp() == 0 || addr.disp() == 16, "must be"); + int type = 0b11; // 2D + int quad = 1; + int L = 0; + int opcode = VLD1_TYPE_1_REG; + emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | + Vt->encoding() << 0 | addr.encoding_simd()); + } + + void vld1(FloatRegister Vt, FloatRegister Vt2, Address addr, VElem_Size size, int bits) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(bits == 128, "unsupported"); + assert(Vt->successor() == Vt2, "Registers must be ordered"); + assert(addr.disp() == 0 || addr.disp() == 32, "must be"); + int type = 0b11; // 2D + int quad = 1; + int L = 1; + int opcode = VLD1_TYPE_2_REGS; + emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | + Vt->encoding() << 0 | addr.encoding_simd()); + } + + void vst1(FloatRegister Vt, FloatRegister Vt2, Address addr, VElem_Size size, int bits) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(Vt->successor() == Vt2, "Registers must be ordered"); + assert(bits == 128, "unsupported"); + assert(addr.disp() == 0 || addr.disp() == 32, "must be"); + int type = 0b11; // 2D + int quad = 1; + int L = 0; + int opcode = VLD1_TYPE_2_REGS; + emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | + Vt->encoding() << 0 | addr.encoding_simd()); + } + + void vld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, + Address addr, VElem_Size size, int bits) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(bits == 128, "unsupported"); + assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, + "Registers must be ordered"); + assert(addr.disp() == 0 || addr.disp() == 48, "must be"); + int type = 0b11; // 2D + int quad = 1; + int L = 1; + int opcode = VLD1_TYPE_3_REGS; + emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | + Vt->encoding() << 0 | addr.encoding_simd()); + } + + void vst1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, + Address addr, VElem_Size size, int bits) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(bits == 128, "unsupported"); + assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, + "Registers must be ordered"); + assert(addr.disp() == 0 || addr.disp() == 48, "must be"); + int type = 0b11; // 2D + int quad = 1; + int L = 0; + int opcode = VLD1_TYPE_3_REGS; + emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | + Vt->encoding() << 0 | addr.encoding_simd()); + } + + void vld1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, + FloatRegister Vt4, Address addr, VElem_Size size, int bits) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(bits == 128, "unsupported"); + assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && + Vt3->successor() == Vt4, "Registers must be ordered"); + assert(addr.disp() == 0 || addr.disp() == 64, "must be"); + int type = 0b11; // 2D + int quad = 1; + int L = 1; + int opcode = VLD1_TYPE_4_REGS; + emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | + Vt->encoding() << 0 | addr.encoding_simd()); + } + + void vst1(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, + FloatRegister Vt4, Address addr, VElem_Size size, int bits) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(bits == 128, "unsupported"); + assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && + Vt3->successor() == Vt4, "Registers must be ordered"); + assert(addr.disp() == 0 || addr.disp() == 64, "must be"); + int type = 0b11; // 2D + int quad = 1; + int L = 0; + int opcode = VLD1_TYPE_4_REGS; + emit_int32(quad << 30 | 0b11 << 26 | L << 22 | opcode << 12 | size << 10 | + Vt->encoding() << 0 | addr.encoding_simd()); + } + + void rev32(FloatRegister Vd, FloatRegister Vn, VElem_Size size, int quad) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(size == VELEM_SIZE_8 || size == VELEM_SIZE_16, "must be"); + emit_int32(quad << 30 | 0b101110 << 24 | size << 22 | + 0b100000000010 << 10 | Vn->encoding() << 5 | Vd->encoding()); + } + + void eor(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, VElem_Size size, int quad) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(size == VELEM_SIZE_8, "must be"); + emit_int32(quad << 30 | 0b101110001 << 21 | Vm->encoding() << 16 | + 0b000111 << 10 | Vn->encoding() << 5 | Vd->encoding()); + } + + void orr(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, VElem_Size size, int quad) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(size == VELEM_SIZE_8, "must be"); + emit_int32(quad << 30 | 0b001110101 << 21 | Vm->encoding() << 16 | + 0b000111 << 10 | Vn->encoding() << 5 | Vd->encoding()); + } + + void vmovI(FloatRegister Dd, int imm8, VElem_Size size, int quad) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(imm8 >= 0 && imm8 < 256, "out of range"); + int op; + int cmode; + switch (size) { + case VELEM_SIZE_8: + op = 0; + cmode = 0b1110; + break; + case VELEM_SIZE_16: + op = 0; + cmode = 0b1000; + break; + case VELEM_SIZE_32: + op = 0; + cmode = 0b0000; + break; + default: + cmode = 0; + ShouldNotReachHere(); + } + int abc = imm8 >> 5; + int defgh = imm8 & 0b11111; + emit_int32(quad << 30 | op << 29 | 0b1111 << 24 | + abc << 16 | cmode << 12 | 0b01 << 10 | + defgh << 5 | Dd->encoding() << 0); + } + + void vdupI(FloatRegister Dd, Register Rn, VElem_Size size, int quad) { + assert(VM_Version::has_simd(), "simd instruction"); + assert(size <= 3, "unallocated encoding"); + assert(size != 3 || quad == 1, "reserved"); + int imm5 = 1 << size; +#ifdef ASSERT + switch (size) { + case VELEM_SIZE_8: + assert(imm5 == 0b00001, "sanity"); + break; + case VELEM_SIZE_16: + assert(imm5 == 0b00010, "sanity"); + break; + case VELEM_SIZE_32: + assert(imm5 == 0b00100, "sanity"); + break; + case VELEM_SIZE_64: + assert(imm5 == 0b01000, "sanity"); + break; + default: + ShouldNotReachHere(); + } +#endif + emit_int32(quad << 30 | 0b111 << 25 | 0b11 << 10 | + imm5 << 16 | Rn->encoding() << 5 | + Dd->encoding() << 0); + } + + void vdup(FloatRegister Vd, FloatRegister Vn, VElem_Size size, int quad) { + assert(VM_Version::has_simd(), "simd instruction"); + int index = 0; + int bytes = 1 << size; + int range = 16 / bytes; + assert(index < range, "overflow"); + + assert(size != VELEM_SIZE_64 || quad, "reserved"); + assert(8 << VELEM_SIZE_8 == 8, "sanity"); + assert(8 << VELEM_SIZE_16 == 16, "sanity"); + assert(8 << VELEM_SIZE_32 == 32, "sanity"); + assert(8 << VELEM_SIZE_64 == 64, "sanity"); + + int imm5 = (index << (size + 1)) | bytes; + + emit_int32(quad << 30 | 0b001110000 << 21 | imm5 << 16 | 0b000001 << 10 | + Vn->encoding() << 5 | Vd->encoding() << 0); + } + + void vdupF(FloatRegister Vd, FloatRegister Vn, int quad) { + vdup(Vd, Vn, VELEM_SIZE_32, quad); + } + + void vdupD(FloatRegister Vd, FloatRegister Vn, int quad) { + vdup(Vd, Vn, VELEM_SIZE_64, quad); + } +#endif +}; + + +#endif // CPU_ARM_VM_ASSEMBLER_ARM_64_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/bytes_arm.hpp 2016-12-02 11:17:46.451733060 -0500 @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_BYTES_ARM_HPP +#define CPU_ARM_VM_BYTES_ARM_HPP + +#include "memory/allocation.hpp" +#include "utilities/macros.hpp" + +#ifndef VM_LITTLE_ENDIAN +#define VM_LITTLE_ENDIAN 1 +#endif + +class Bytes: AllStatic { + + public: + // Returns true if the byte ordering used by Java is different from the native byte ordering + // of the underlying machine. + static inline bool is_Java_byte_ordering_different() { + return VM_LITTLE_ENDIAN != 0; + } + + static inline u2 get_Java_u2(address p) { + return (u2(p[0]) << 8) | u2(p[1]); + } + + static inline u4 get_Java_u4(address p) { + return u4(p[0]) << 24 | + u4(p[1]) << 16 | + u4(p[2]) << 8 | + u4(p[3]); + } + + static inline u8 get_Java_u8(address p) { + return u8(p[0]) << 56 | + u8(p[1]) << 48 | + u8(p[2]) << 40 | + u8(p[3]) << 32 | + u8(p[4]) << 24 | + u8(p[5]) << 16 | + u8(p[6]) << 8 | + u8(p[7]); + } + + static inline void put_Java_u2(address p, u2 x) { + p[0] = x >> 8; + p[1] = x; + } + + static inline void put_Java_u4(address p, u4 x) { + ((u1*)p)[0] = x >> 24; + ((u1*)p)[1] = x >> 16; + ((u1*)p)[2] = x >> 8; + ((u1*)p)[3] = x; + } + + static inline void put_Java_u8(address p, u8 x) { + ((u1*)p)[0] = x >> 56; + ((u1*)p)[1] = x >> 48; + ((u1*)p)[2] = x >> 40; + ((u1*)p)[3] = x >> 32; + ((u1*)p)[4] = x >> 24; + ((u1*)p)[5] = x >> 16; + ((u1*)p)[6] = x >> 8; + ((u1*)p)[7] = x; + } + +#ifdef VM_LITTLE_ENDIAN + + static inline u2 get_native_u2(address p) { + return (intptr_t(p) & 1) == 0 ? *(u2*)p : u2(p[0]) | (u2(p[1]) << 8); + } + + static inline u4 get_native_u4(address p) { + switch (intptr_t(p) & 3) { + case 0: return *(u4*)p; + case 2: return u4(((u2*)p)[0]) | + u4(((u2*)p)[1]) << 16; + default: return u4(p[0]) | + u4(p[1]) << 8 | + u4(p[2]) << 16 | + u4(p[3]) << 24; + } + } + + static inline u8 get_native_u8(address p) { + switch (intptr_t(p) & 7) { + case 0: return *(u8*)p; + case 4: return u8(((u4*)p)[0]) | + u8(((u4*)p)[1]) << 32; + case 2: return u8(((u2*)p)[0]) | + u8(((u2*)p)[1]) << 16 | + u8(((u2*)p)[2]) << 32 | + u8(((u2*)p)[3]) << 48; + default: return u8(p[0]) | + u8(p[1]) << 8 | + u8(p[2]) << 16 | + u8(p[3]) << 24 | + u8(p[4]) << 32 | + u8(p[5]) << 40 | + u8(p[6]) << 48 | + u8(p[7]) << 56; + } + } + + static inline void put_native_u2(address p, u2 x) { + if ((intptr_t(p) & 1) == 0) { + *(u2*)p = x; + } else { + p[0] = x; + p[1] = x >> 8; + } + } + + static inline void put_native_u4(address p, u4 x) { + switch (intptr_t(p) & 3) { + case 0: *(u4*)p = x; + break; + case 2: ((u2*)p)[0] = x; + ((u2*)p)[1] = x >> 16; + break; + default: ((u1*)p)[0] = x; + ((u1*)p)[1] = x >> 8; + ((u1*)p)[2] = x >> 16; + ((u1*)p)[3] = x >> 24; + break; + } + } + + static inline void put_native_u8(address p, u8 x) { + switch (intptr_t(p) & 7) { + case 0: *(u8*)p = x; + break; + case 4: ((u4*)p)[0] = x; + ((u4*)p)[1] = x >> 32; + break; + case 2: ((u2*)p)[0] = x; + ((u2*)p)[1] = x >> 16; + ((u2*)p)[2] = x >> 32; + ((u2*)p)[3] = x >> 48; + break; + default: ((u1*)p)[0] = x; + ((u1*)p)[1] = x >> 8; + ((u1*)p)[2] = x >> 16; + ((u1*)p)[3] = x >> 24; + ((u1*)p)[4] = x >> 32; + ((u1*)p)[5] = x >> 40; + ((u1*)p)[6] = x >> 48; + ((u1*)p)[7] = x >> 56; + } + } + +#else + + static inline u2 get_native_u2(address p) { return get_Java_u2(p); } + static inline u4 get_native_u4(address p) { return get_Java_u4(p); } + static inline u8 get_native_u8(address p) { return get_Java_u8(p); } + static inline void put_native_u2(address p, u2 x) { put_Java_u2(p, x); } + static inline void put_native_u4(address p, u4 x) { put_Java_u4(p, x); } + static inline void put_native_u8(address p, u8 x) { put_Java_u8(p, x); } + +#endif // VM_LITTLE_ENDIAN + + // Efficient swapping of byte ordering + static inline u2 swap_u2(u2 x); + static inline u4 swap_u4(u4 x); + static inline u8 swap_u8(u8 x); +}; + + +// The following header contains the implementations of swap_u2, swap_u4, and swap_u8 +#include OS_CPU_HEADER_INLINE(bytes) + +#endif // CPU_ARM_VM_BYTES_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_CodeStubs_arm.cpp 2016-12-02 11:17:51.460017084 -0500 @@ -0,0 +1,510 @@ +/* + * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "nativeInst_arm.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/macros.hpp" +#include "vmreg_arm.inline.hpp" +#if INCLUDE_ALL_GCS +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#endif // INCLUDE_ALL_GCS + +#define __ ce->masm()-> + +void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + ce->store_parameter(_bci, 0); + ce->store_parameter(_method->as_constant_ptr()->as_metadata(), 1); + __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + + __ b(_continuation); +} + + +// TODO: ARM - is it possible to inline these stubs into the main code stream? + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, + bool throw_index_out_of_bounds_exception) + : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception) + , _index(index) +{ + _info = info == NULL ? NULL : new CodeEmitInfo(info); +} + + +void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + if (_info->deoptimize_on_exception()) { +#ifdef AARCH64 + __ NOT_TESTED(); +#endif + __ call(Runtime1::entry_for(Runtime1::predicate_failed_trap_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); + return; + } + // Pass the array index on stack because all registers must be preserved + ce->verify_reserved_argument_area_size(1); + if (_index->is_cpu_register()) { + __ str_32(_index->as_register(), Address(SP)); + } else { + __ mov_slow(Rtemp, _index->as_jint()); // Rtemp should be OK in C1 + __ str_32(Rtemp, Address(SP)); + } + + if (_throw_index_out_of_bounds_exception) { +#ifdef AARCH64 + __ NOT_TESTED(); +#endif + __ call(Runtime1::entry_for(Runtime1::throw_index_exception_id), relocInfo::runtime_call_type); + } else { + __ call(Runtime1::entry_for(Runtime1::throw_range_check_failed_id), relocInfo::runtime_call_type); + } + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + DEBUG_ONLY(STOP("RangeCheck");) +} + +PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + +void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + __ call(Runtime1::entry_for(Runtime1::predicate_failed_trap_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } + __ bind(_entry); + __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), + relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + DEBUG_ONLY(STOP("DivByZero");) +} + + +// Implementation of NewInstanceStub + +NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); + assert(stub_id == Runtime1::new_instance_id || + stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + + +void NewInstanceStub::emit_code(LIR_Assembler* ce) { + assert(_result->as_register() == R0, "runtime call setup"); + assert(_klass_reg->as_register() == R1, "runtime call setup"); + __ bind(_entry); + __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + + +// Implementation of NewTypeArrayStub + +NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + + +void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + assert(_result->as_register() == R0, "runtime call setup"); + assert(_klass_reg->as_register() == R1, "runtime call setup"); + assert(_length->as_register() == R2, "runtime call setup"); + __ bind(_entry); + __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + + +// Implementation of NewObjectArrayStub + +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _result = result; + _length = length; + _info = new CodeEmitInfo(info); +} + + +void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + assert(_result->as_register() == R0, "runtime call setup"); + assert(_klass_reg->as_register() == R1, "runtime call setup"); + assert(_length->as_register() == R2, "runtime call setup"); + __ bind(_entry); + __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + + +// Implementation of MonitorAccessStubs + +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) +: MonitorAccessStub(obj_reg, lock_reg) +{ + _info = new CodeEmitInfo(info); +} + + +void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + const Register obj_reg = _obj_reg->as_pointer_register(); + const Register lock_reg = _lock_reg->as_pointer_register(); + + ce->verify_reserved_argument_area_size(2); +#ifdef AARCH64 + __ stp(obj_reg, lock_reg, Address(SP)); +#else + if (obj_reg < lock_reg) { + __ stmia(SP, RegisterSet(obj_reg) | RegisterSet(lock_reg)); + } else { + __ str(obj_reg, Address(SP)); + __ str(lock_reg, Address(SP, BytesPerWord)); + } +#endif // AARCH64 + + Runtime1::StubID enter_id = ce->compilation()->has_fpu_code() ? + Runtime1::monitorenter_id : + Runtime1::monitorenter_nofpu_id; + __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + + +void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_compute_lock) { + ce->monitor_address(_monitor_ix, _lock_reg); + } + const Register lock_reg = _lock_reg->as_pointer_register(); + + ce->verify_reserved_argument_area_size(1); + __ str(lock_reg, Address(SP)); + + // Non-blocking leaf routine - no call info needed + Runtime1::StubID exit_id = ce->compilation()->has_fpu_code() ? + Runtime1::monitorexit_id : + Runtime1::monitorexit_nofpu_id; + __ call(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type); + __ b(_continuation); +} + + +// Call return is directly after patch word +int PatchingStub::_patch_info_offset = 0; + +void PatchingStub::align_patch_site(MacroAssembler* masm) { +#if 0 + // TODO: investigate if we required to implement this + ShouldNotReachHere(); +#endif +} + +void PatchingStub::emit_code(LIR_Assembler* ce) { + const int patchable_instruction_offset = AARCH64_ONLY(NativeInstruction::instruction_size) NOT_AARCH64(0); + + assert(NativeCall::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF, + "not enough room for call"); + assert((_bytes_to_copy & 3) == 0, "must copy a multiple of four bytes"); + Label call_patch; + bool is_load = (_id == load_klass_id) || (_id == load_mirror_id) || (_id == load_appendix_id); + +#ifdef AARCH64 + assert(nativeInstruction_at(_pc_start)->is_nop(), "required for MT safe patching"); + + // Same alignment of reg2mem code and PatchingStub code. Required to make copied bind_literal() code properly aligned. + __ align(wordSize); +#endif // AARCH64 + + if (is_load NOT_AARCH64(&& !VM_Version::supports_movw())) { + address start = __ pc(); + + // The following sequence duplicates code provided in MacroAssembler::patchable_mov_oop() + // without creating relocation info entry. +#ifdef AARCH64 + // Extra nop for MT safe patching + __ nop(); +#endif // AARCH64 + + assert((__ pc() - start) == patchable_instruction_offset, "should be"); +#ifdef AARCH64 + __ ldr(_obj, __ pc()); +#else + __ ldr(_obj, Address(PC)); + // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). + __ nop(); +#endif // AARCH64 + +#ifdef ASSERT + for (int i = 0; i < _bytes_to_copy; i++) { + assert(((address)_pc_start)[i] == start[i], "should be the same code"); + } +#endif // ASSERT + } + + address being_initialized_entry = __ pc(); + if (CommentedAssembly) { + __ block_comment(" patch template"); + } + if (is_load) { + address start = __ pc(); + if (_id == load_mirror_id || _id == load_appendix_id) { + __ patchable_mov_oop(_obj, (jobject)Universe::non_oop_word(), _index); + } else { + __ patchable_mov_metadata(_obj, (Metadata*)Universe::non_oop_word(), _index); + } +#ifdef ASSERT + for (int i = 0; i < _bytes_to_copy; i++) { + assert(((address)_pc_start)[i] == start[i], "should be the same code"); + } +#endif // ASSERT + } else { + int* start = (int*)_pc_start; + int* end = start + (_bytes_to_copy / BytesPerInt); + while (start < end) { + __ emit_int32(*start++); + } + } + address end_of_patch = __ pc(); + + int bytes_to_skip = 0; + if (_id == load_mirror_id) { + int offset = __ offset(); + if (CommentedAssembly) { + __ block_comment(" being_initialized check"); + } + + assert(_obj != noreg, "must be a valid register"); + // Rtemp should be OK in C1 + __ ldr(Rtemp, Address(_obj, java_lang_Class::klass_offset_in_bytes())); + __ ldr(Rtemp, Address(Rtemp, InstanceKlass::init_thread_offset())); + __ cmp(Rtemp, Rthread); + __ b(call_patch, ne); + __ b(_patch_site_continuation); + + bytes_to_skip += __ offset() - offset; + } + + if (CommentedAssembly) { + __ block_comment("patch data - 3 high bytes of the word"); + } + const int sizeof_patch_record = 4; + bytes_to_skip += sizeof_patch_record; + int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record; + __ emit_int32(0xff | being_initialized_entry_offset << 8 | bytes_to_skip << 16 | _bytes_to_copy << 24); + + address patch_info_pc = __ pc(); + assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info"); + + // runtime call will return here + Label call_return; + __ bind(call_return); + ce->add_call_info_here(_info); + assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change"); + __ b(_patch_site_entry); + + address entry = __ pc(); + NativeGeneralJump::insert_unconditional((address)_pc_start, entry); + address target = NULL; + relocInfo::relocType reloc_type = relocInfo::none; + switch (_id) { + case access_field_id: target = Runtime1::entry_for(Runtime1::access_field_patching_id); break; + case load_klass_id: target = Runtime1::entry_for(Runtime1::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break; + case load_mirror_id: target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break; + case load_appendix_id: target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break; + default: ShouldNotReachHere(); + } + __ bind(call_patch); + + if (CommentedAssembly) { + __ block_comment("patch entry point"); + } + + // arrange for call to return just after patch word + __ adr(LR, call_return); + __ jump(target, relocInfo::runtime_call_type, Rtemp); + + if (is_load) { + CodeSection* cs = __ code_section(); + address pc = (address)_pc_start; + RelocIterator iter(cs, pc, pc + 1); + relocInfo::change_reloc_info_for_address(&iter, pc, reloc_type, relocInfo::none); + } +} + +void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + __ mov_slow(Rtemp, _trap_request); + ce->verify_reserved_argument_area_size(1); + __ str(Rtemp, Address(SP)); + __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + DEBUG_ONLY(__ should_not_reach_here()); +} + + +void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is + // probably wrong to do it here. + a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + } else { + a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); + } + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + __ bind(_entry); + __ call(a, relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + DEBUG_ONLY(STOP("ImplicitNullCheck");) +} + + +void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + // Pass the object on stack because all registers must be preserved + if (_obj->is_cpu_register()) { + ce->verify_reserved_argument_area_size(1); + __ str(_obj->as_pointer_register(), Address(SP)); + } else { + assert(_obj->is_illegal(), "should be"); + } + __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + DEBUG_ONLY(STOP("SimpleException");) +} + + +void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + VMRegPair args[5]; + BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT }; + SharedRuntime::java_calling_convention(signature, args, 5, true); + + Register r[5]; + r[0] = src()->as_pointer_register(); + r[1] = src_pos()->as_register(); + r[2] = dst()->as_pointer_register(); + r[3] = dst_pos()->as_register(); + r[4] = length()->as_register(); + + for (int i = 0; i < 5; i++) { + VMReg arg = args[i].first(); + if (arg->is_stack()) { + __ str(r[i], Address(SP, arg->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + assert(r[i] == arg->as_Register(), "Calling conventions must match"); + } + } + + ce->emit_static_call_stub(); + if (ce->compilation()->bailed_out()) { + return; // CodeCache is full + } + int ret_addr_offset = __ patchable_call(SharedRuntime::get_resolve_static_call_stub(), relocInfo::static_call_type); + assert(ret_addr_offset == __ offset(), "embedded return address not allowed"); + ce->add_call_info_here(info()); + ce->verify_oop_map(info()); + __ b(_continuation); +} + +///////////////////////////////////////////////////////////////////////////// +#if INCLUDE_ALL_GCS + +void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + + __ bind(_entry); + assert(pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = pre_val()->as_register(); + + if (do_load()) { + ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/); + } + + __ cbz(pre_val_reg, _continuation); + ce->verify_reserved_argument_area_size(1); + __ str(pre_val_reg, Address(SP)); + __ call(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id), relocInfo::runtime_call_type); + + __ b(_continuation); +} + +void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + assert(addr()->is_register(), "Precondition."); + assert(new_val()->is_register(), "Precondition."); + Register new_val_reg = new_val()->as_register(); + __ cbz(new_val_reg, _continuation); + ce->verify_reserved_argument_area_size(1); + __ str(addr()->as_pointer_register(), Address(SP)); + __ call(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id), relocInfo::runtime_call_type); + __ b(_continuation); +} + +#endif // INCLUDE_ALL_GCS +///////////////////////////////////////////////////////////////////////////// + +#undef __ --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_Defs_arm.hpp 2016-12-02 11:17:57.044333770 -0500 @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_C1_DEFS_ARM_HPP +#define CPU_ARM_VM_C1_DEFS_ARM_HPP + +// native word offsets from memory address (little endian) +enum { + pd_lo_word_offset_in_bytes = 0, + pd_hi_word_offset_in_bytes = BytesPerWord +}; + +// explicit rounding operations are required to implement the strictFP mode +enum { + pd_strict_fp_requires_explicit_rounding = false +}; + +#ifdef __SOFTFP__ +#define SOFT(n) n +#define VFP(n) +#else // __SOFTFP__ +#define SOFT(n) +#define VFP(n) n +#endif // __SOFTFP__ + + +// registers +enum { + pd_nof_cpu_regs_frame_map = AARCH64_ONLY(33) NOT_AARCH64(16), // number of registers used during code emission + pd_nof_caller_save_cpu_regs_frame_map = AARCH64_ONLY(27) NOT_AARCH64(10), // number of registers killed by calls + pd_nof_cpu_regs_reg_alloc = AARCH64_ONLY(27) NOT_AARCH64(10), // number of registers that are visible to register allocator (including Rheap_base which is visible only if compressed pointers are not enabled) + pd_nof_cpu_regs_linearscan = pd_nof_cpu_regs_frame_map, // number of registers visible to linear scan + pd_nof_cpu_regs_processed_in_linearscan = pd_nof_cpu_regs_reg_alloc + 1, // number of registers processed in linear scan; includes LR as it is used as temporary register in c1_LIRGenerator_arm + pd_first_cpu_reg = 0, + pd_last_cpu_reg = pd_nof_cpu_regs_frame_map - 1, + + pd_nof_fpu_regs_frame_map = VFP(32) SOFT(0), // number of float registers used during code emission + pd_nof_caller_save_fpu_regs_frame_map = VFP(32) SOFT(0), // number of float registers killed by calls + pd_nof_fpu_regs_reg_alloc = AARCH64_ONLY(32) NOT_AARCH64(VFP(30) SOFT(0)), // number of float registers that are visible to register allocator + pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan + pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, + pd_last_fpu_reg = pd_first_fpu_reg + pd_nof_fpu_regs_frame_map - 1, + + pd_nof_xmm_regs_linearscan = 0, + pd_nof_caller_save_xmm_regs = 0, + pd_first_xmm_reg = -1, + pd_last_xmm_reg = -1 +}; + + +// encoding of float value in debug info: +enum { + pd_float_saved_as_double = false +}; + +#ifdef AARCH64 +#define PATCHED_ADDR 0xff8 +#else +#define PATCHED_ADDR (204) +#endif +#define CARDTABLEMODREF_POST_BARRIER_HELPER +#define GENERATE_ADDRESS_IS_PREFERRED + +#endif // CPU_ARM_VM_C1_DEFS_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_FpuStackSim_arm.cpp 2016-12-02 11:18:02.372635941 -0500 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FpuStackSim.hpp" +#include "c1/c1_FrameMap.hpp" +#include "utilities/array.hpp" +#include "utilities/ostream.hpp" + +// Nothing needed here --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_FpuStackSim_arm.hpp 2016-12-02 11:18:07.816944689 -0500 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_C1_FPUSTACKSIM_ARM_HPP +#define CPU_ARM_VM_C1_FPUSTACKSIM_ARM_HPP + +// Nothing needed here + +#endif // CPU_ARM_VM_C1_FPUSTACKSIM_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_FrameMap_arm.cpp 2016-12-02 11:18:13.185249126 -0500 @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIR.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_arm.inline.hpp" + +LIR_Opr FrameMap::R0_opr; +LIR_Opr FrameMap::R1_opr; +LIR_Opr FrameMap::R2_opr; +LIR_Opr FrameMap::R3_opr; +LIR_Opr FrameMap::R4_opr; +LIR_Opr FrameMap::R5_opr; + +LIR_Opr FrameMap::R0_oop_opr; +LIR_Opr FrameMap::R1_oop_opr; +LIR_Opr FrameMap::R2_oop_opr; +LIR_Opr FrameMap::R3_oop_opr; +LIR_Opr FrameMap::R4_oop_opr; +LIR_Opr FrameMap::R5_oop_opr; + +LIR_Opr FrameMap::R0_metadata_opr; +LIR_Opr FrameMap::R1_metadata_opr; +LIR_Opr FrameMap::R2_metadata_opr; +LIR_Opr FrameMap::R3_metadata_opr; +LIR_Opr FrameMap::R4_metadata_opr; +LIR_Opr FrameMap::R5_metadata_opr; + +#ifdef AARCH64 +LIR_Opr FrameMap::ZR_opr; +#endif // AARCH64 + +LIR_Opr FrameMap::LR_opr; +LIR_Opr FrameMap::LR_oop_opr; +LIR_Opr FrameMap::LR_ptr_opr; +LIR_Opr FrameMap::FP_opr; +LIR_Opr FrameMap::SP_opr; +LIR_Opr FrameMap::Rthread_opr; + +LIR_Opr FrameMap::Int_result_opr; +LIR_Opr FrameMap::Long_result_opr; +LIR_Opr FrameMap::Object_result_opr; +LIR_Opr FrameMap::Float_result_opr; +LIR_Opr FrameMap::Double_result_opr; + +LIR_Opr FrameMap::Exception_oop_opr; +LIR_Opr FrameMap::Exception_pc_opr; + +LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 }; +LIR_Opr FrameMap::_caller_save_fpu_regs[]; // same as initialize to zero + +LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); + if (r_1->is_stack()) { + int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; + opr = LIR_OprFact::address(new LIR_Address(SP_opr, st_off, type)); + } else if (r_1->is_Register()) { + Register reg = r_1->as_Register(); + if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { +#ifdef AARCH64 + assert(r_1->next() == r_2, "should be the same"); + opr = as_long_opr(reg); +#else + opr = as_long_opr(reg, r_2->as_Register()); +#endif + } else if (type == T_OBJECT || type == T_ARRAY) { + opr = as_oop_opr(reg); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg); + } else { + // PreferInterpreterNativeStubs should ensure we never need to + // handle a long opr passed as R3+stack_slot + assert(! r_2->is_stack(), "missing support for ALIGN_WIDE_ARGUMENTS==0"); + opr = as_opr(reg); + } + } else if (r_1->is_FloatRegister()) { + FloatRegister reg = r_1->as_FloatRegister(); + opr = type == T_FLOAT ? as_float_opr(reg) : as_double_opr(reg); + } else { + ShouldNotReachHere(); + } + return opr; +} + + +void FrameMap::initialize() { + if (_init_done) return; + + int i; + int rnum = 0; + + // Registers used for allocation +#ifdef AARCH64 + assert(Rthread == R28 && Rheap_base == R27 && Rtemp == R16, "change the code here"); + for (i = 0; i < 16; i++) { + map_register(rnum++, as_Register(i)); + } + for (i = 17; i < 28; i++) { + map_register(rnum++, as_Register(i)); + } +#else + assert(Rthread == R10 && Rtemp == R12, "change the code here"); + for (i = 0; i < 10; i++) { + map_register(rnum++, as_Register(i)); + } +#endif // AARCH64 + assert(rnum == pd_nof_cpu_regs_reg_alloc, "should be"); + + // Registers not used for allocation + map_register(rnum++, LR); // LR register should be listed first, see c1_LinearScan_arm.hpp::is_processed_reg_num. + assert(rnum == pd_nof_cpu_regs_processed_in_linearscan, "should be"); + + map_register(rnum++, Rtemp); + map_register(rnum++, Rthread); + map_register(rnum++, FP); // ARM32: R7 or R11 + map_register(rnum++, SP); +#ifdef AARCH64 + map_register(rnum++, ZR); +#else + map_register(rnum++, PC); +#endif + assert(rnum == pd_nof_cpu_regs_frame_map, "should be"); + + _init_done = true; + + R0_opr = as_opr(R0); R0_oop_opr = as_oop_opr(R0); R0_metadata_opr = as_metadata_opr(R0); + R1_opr = as_opr(R1); R1_oop_opr = as_oop_opr(R1); R1_metadata_opr = as_metadata_opr(R1); + R2_opr = as_opr(R2); R2_oop_opr = as_oop_opr(R2); R2_metadata_opr = as_metadata_opr(R2); + R3_opr = as_opr(R3); R3_oop_opr = as_oop_opr(R3); R3_metadata_opr = as_metadata_opr(R3); + R4_opr = as_opr(R4); R4_oop_opr = as_oop_opr(R4); R4_metadata_opr = as_metadata_opr(R4); + R5_opr = as_opr(R5); R5_oop_opr = as_oop_opr(R5); R5_metadata_opr = as_metadata_opr(R5); + +#ifdef AARCH64 + ZR_opr = as_opr(ZR); +#endif // AARCH64 + + LR_opr = as_opr(LR); + LR_oop_opr = as_oop_opr(LR); + LR_ptr_opr = as_pointer_opr(LR); + FP_opr = as_pointer_opr(FP); + SP_opr = as_pointer_opr(SP); + Rthread_opr = as_pointer_opr(Rthread); + + // LIR operands for result + Int_result_opr = R0_opr; + Object_result_opr = R0_oop_opr; +#ifdef AARCH64 + Long_result_opr = as_long_opr(R0); + Float_result_opr = as_float_opr(S0); + Double_result_opr = as_double_opr(D0); +#else + Long_result_opr = as_long_opr(R0, R1); +#ifdef __ABI_HARD__ + Float_result_opr = as_float_opr(S0); + Double_result_opr = as_double_opr(D0); +#else + Float_result_opr = LIR_OprFact::single_softfp(0); + Double_result_opr = LIR_OprFact::double_softfp(0, 1); +#endif // __ABI_HARD__ +#endif // AARCH64 + + Exception_oop_opr = as_oop_opr(Rexception_obj); + Exception_pc_opr = as_opr(Rexception_pc); + + for (i = 0; i < nof_caller_save_cpu_regs(); i++) { + _caller_save_cpu_regs[i] = LIR_OprFact::single_cpu(i); + } + for (i = 0; i < nof_caller_save_fpu_regs; i++) { + _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); + } +} + + +Address FrameMap::make_new_address(ByteSize sp_offset) const { + return Address(SP, sp_offset); +} + +LIR_Opr FrameMap::stack_pointer() { + return FrameMap::SP_opr; +} + +LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { + assert(Rmh_SP_save == FP, "Fix register used for saving SP for MethodHandle calls"); + return FP_opr; +} + +bool FrameMap::validate_frame() { + int max_offset = in_bytes(framesize_in_bytes()); + int java_index = 0; + for (int i = 0; i < _incoming_arguments->length(); i++) { + LIR_Opr opr = _incoming_arguments->at(i); + if (opr->is_stack()) { + int arg_offset = _argument_locations->at(java_index); + if (arg_offset > max_offset) { + max_offset = arg_offset; + } + } + java_index += type2size[opr->type()]; + } + return max_offset < AARCH64_ONLY(16384) NOT_AARCH64(4096); // TODO-AARCH64 check that LIRAssembler does not generate load/store of byte and half-word with SP as address base +} + +VMReg FrameMap::fpu_regname(int n) { + return as_FloatRegister(n)->as_VMReg(); +} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_FrameMap_arm.hpp 2016-12-02 11:18:18.141530198 -0500 @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_C1_FRAMEMAP_ARM_HPP +#define CPU_ARM_VM_C1_FRAMEMAP_ARM_HPP + + public: + + enum { + first_available_sp_in_frame = 0, + frame_pad_in_bytes = 2*wordSize // Account for FP/LR saved at build_frame(). + }; + + static LIR_Opr R0_opr; + static LIR_Opr R1_opr; + static LIR_Opr R2_opr; + static LIR_Opr R3_opr; + static LIR_Opr R4_opr; + static LIR_Opr R5_opr; + // add more predefined register oprs as needed + + static LIR_Opr R0_oop_opr; + static LIR_Opr R1_oop_opr; + static LIR_Opr R2_oop_opr; + static LIR_Opr R3_oop_opr; + static LIR_Opr R4_oop_opr; + static LIR_Opr R5_oop_opr; + + static LIR_Opr R0_metadata_opr; + static LIR_Opr R1_metadata_opr; + static LIR_Opr R2_metadata_opr; + static LIR_Opr R3_metadata_opr; + static LIR_Opr R4_metadata_opr; + static LIR_Opr R5_metadata_opr; + +#ifdef AARCH64 + static LIR_Opr ZR_opr; +#endif // AARCH64 + + static LIR_Opr LR_opr; + static LIR_Opr LR_oop_opr; + static LIR_Opr LR_ptr_opr; + + static LIR_Opr FP_opr; + static LIR_Opr SP_opr; + static LIR_Opr Rthread_opr; + + static LIR_Opr Int_result_opr; + static LIR_Opr Long_result_opr; + static LIR_Opr Object_result_opr; + static LIR_Opr Float_result_opr; + static LIR_Opr Double_result_opr; + + static LIR_Opr Exception_oop_opr; + static LIR_Opr Exception_pc_opr; + +#ifdef AARCH64 + static LIR_Opr as_long_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + + static LIR_Opr as_pointer_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + + static LIR_Opr as_double_opr(FloatRegister r) { + return LIR_OprFact::double_fpu(r->encoding()); + } +#else + static LIR_Opr as_long_opr(Register r, Register r2) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r2)); + } + + static LIR_Opr as_pointer_opr(Register r) { + return LIR_OprFact::single_cpu(cpu_reg2rnr(r)); + } + + static LIR_Opr as_double_opr(FloatRegister r) { + return LIR_OprFact::double_fpu(r->encoding(), r->successor()->encoding()); + } +#endif + + static LIR_Opr as_float_opr(FloatRegister r) { + return LIR_OprFact::single_fpu(r->encoding()); + } + + static VMReg fpu_regname(int n); + + static bool is_caller_save_register(LIR_Opr opr) { + return true; + } + + static int adjust_reg_range(int range) { + // Reduce the number of available regs (to free Rheap_base) in case of compressed oops + if (UseCompressedOops || UseCompressedClassPointers) return range - 1; + return range; + } + + static int nof_caller_save_cpu_regs() { + return adjust_reg_range(pd_nof_caller_save_cpu_regs_frame_map); + } + + static int last_cpu_reg() { + return pd_last_cpu_reg; + } + +#endif // CPU_ARM_VM_C1_FRAMEMAP_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_LIRAssembler_arm.cpp 2016-12-02 11:18:23.561837584 -0500 @@ -0,0 +1,3610 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/cardTableModRefBS.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "nativeInst_arm.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_arm.inline.hpp" + +#define __ _masm-> + +// Note: Rtemp usage is this file should not impact C2 and should be +// correct as long as it is not implicitly used in lower layers (the +// arm [macro]assembler) and used with care in the other C1 specific +// files. + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { + ShouldNotCallThis(); // Not used on ARM + return false; +} + + +LIR_Opr LIR_Assembler::receiverOpr() { + // The first register in Java calling conventions + return FrameMap::R0_oop_opr; +} + +LIR_Opr LIR_Assembler::osrBufferPointer() { + return FrameMap::as_pointer_opr(R0); +} + +#ifndef PRODUCT +void LIR_Assembler::verify_reserved_argument_area_size(int args_count) { + assert(args_count * wordSize <= frame_map()->reserved_argument_area_size(), "not enough space for arguments"); +} +#endif // !PRODUCT + +void LIR_Assembler::store_parameter(jint c, int offset_from_sp_in_words) { + assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); + int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; + assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "not enough space"); + __ mov_slow(Rtemp, c); + __ str(Rtemp, Address(SP, offset_from_sp_in_bytes)); +} + +void LIR_Assembler::store_parameter(Metadata* m, int offset_from_sp_in_words) { + assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); + int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; + assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "not enough space"); + __ mov_metadata(Rtemp, m); + __ str(Rtemp, Address(SP, offset_from_sp_in_bytes)); +} + +//--------------fpu register translations----------------------- + + +void LIR_Assembler::set_24bit_FPU() { + ShouldNotReachHere(); +} + +void LIR_Assembler::reset_FPU() { + ShouldNotReachHere(); +} + +void LIR_Assembler::fpop() { + Unimplemented(); +} + +void LIR_Assembler::fxch(int i) { + Unimplemented(); +} + +void LIR_Assembler::fld(int i) { + Unimplemented(); +} + +void LIR_Assembler::ffree(int i) { + Unimplemented(); +} + +void LIR_Assembler::breakpoint() { + __ breakpoint(); +} + +void LIR_Assembler::push(LIR_Opr opr) { + Unimplemented(); +} + +void LIR_Assembler::pop(LIR_Opr opr) { + Unimplemented(); +} + +//------------------------------------------- +Address LIR_Assembler::as_Address(LIR_Address* addr) { + Register base = addr->base()->as_pointer_register(); + +#ifdef AARCH64 + int align = exact_log2(type2aelembytes(addr->type(), true)); +#endif + + if (addr->index()->is_illegal() || addr->index()->is_constant()) { + int offset = addr->disp(); + if (addr->index()->is_constant()) { + offset += addr->index()->as_constant_ptr()->as_jint() << addr->scale(); + } + +#ifdef AARCH64 + if (!Assembler::is_unsigned_imm_in_range(offset, 12, align) && !Assembler::is_imm_in_range(offset, 9, 0)) { + BAILOUT_("offset not in range", Address(base)); + } + assert(UseUnalignedAccesses || (offset & right_n_bits(align)) == 0, "offset should be aligned"); +#else + if ((offset <= -4096) || (offset >= 4096)) { + BAILOUT_("offset not in range", Address(base)); + } +#endif // AARCH64 + + return Address(base, offset); + + } else { + assert(addr->disp() == 0, "can't have both"); + int scale = addr->scale(); + +#ifdef AARCH64 + assert((scale == 0) || (scale == align), "scale should be zero or equal to embedded shift"); + + bool is_index_extended = (addr->index()->type() == T_INT); + if (is_index_extended) { + assert(addr->index()->is_single_cpu(), "should be"); + return Address(base, addr->index()->as_register(), ex_sxtw, scale); + } else { + assert(addr->index()->is_double_cpu(), "should be"); + return Address(base, addr->index()->as_register_lo(), ex_lsl, scale); + } +#else + assert(addr->index()->is_single_cpu(), "should be"); + return scale >= 0 ? Address(base, addr->index()->as_register(), lsl, scale) : + Address(base, addr->index()->as_register(), lsr, -scale); +#endif // AARCH64 + } +} + +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { +#ifdef AARCH64 + ShouldNotCallThis(); // Not used on AArch64 + return Address(); +#else + Address base = as_Address(addr); + assert(base.index() == noreg, "must be"); + if (base.disp() + BytesPerWord >= 4096) { BAILOUT_("offset not in range", Address(base.base(),0)); } + return Address(base.base(), base.disp() + BytesPerWord); +#endif // AARCH64 +} + +Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { +#ifdef AARCH64 + ShouldNotCallThis(); // Not used on AArch64 + return Address(); +#else + return as_Address(addr); +#endif // AARCH64 +} + + +void LIR_Assembler::osr_entry() { + offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); + BlockBegin* osr_entry = compilation()->hir()->osr_entry(); + ValueStack* entry_state = osr_entry->end()->state(); + int number_of_locks = entry_state->locks_size(); + + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + Register OSR_buf = osrBufferPointer()->as_pointer_register(); + + assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); + int monitor_offset = (method()->max_locals() + 2 * (number_of_locks - 1)) * BytesPerWord; + for (int i = 0; i < number_of_locks; i++) { + int slot_offset = monitor_offset - (i * 2 * BytesPerWord); + __ ldr(R1, Address(OSR_buf, slot_offset + 0*BytesPerWord)); + __ ldr(R2, Address(OSR_buf, slot_offset + 1*BytesPerWord)); + __ str(R1, frame_map()->address_for_monitor_lock(i)); + __ str(R2, frame_map()->address_for_monitor_object(i)); + } +} + + +int LIR_Assembler::check_icache() { + Register receiver = LIR_Assembler::receiverOpr()->as_register(); + int offset = __ offset(); + __ inline_cache_check(receiver, Ricklass); + return offset; +} + + +void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo* info) { + jobject o = (jobject)Universe::non_oop_word(); + int index = __ oop_recorder()->allocate_oop_index(o); + + PatchingStub* patch = new PatchingStub(_masm, patching_id(info), index); + + __ patchable_mov_oop(reg, o, index); + patching_epilog(patch, lir_patch_normal, reg, info); +} + + +void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { + Metadata* o = (Metadata*)Universe::non_oop_word(); + int index = __ oop_recorder()->allocate_metadata_index(o); + PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id, index); + + __ patchable_mov_metadata(reg, o, index); + patching_epilog(patch, lir_patch_normal, reg, info); +} + + +int LIR_Assembler::initial_frame_size_in_bytes() const { + // Subtracts two words to account for return address and link + return frame_map()->framesize()*VMRegImpl::stack_slot_size - 2*wordSize; +} + + +int LIR_Assembler::emit_exception_handler() { + // TODO: ARM + __ nop(); // See comments in other ports + + address handler_base = __ start_a_stub(exception_handler_size); + if (handler_base == NULL) { + bailout("exception handler overflow"); + return -1; + } + + int offset = code_offset(); + + // check that there is really an exception + __ verify_not_null_oop(Rexception_obj); + + __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type); + __ should_not_reach_here(); + + assert(code_offset() - offset <= exception_handler_size, "overflow"); + __ end_a_stub(); + + return offset; +} + +// Emit the code to remove the frame from the stack in the exception +// unwind path. +int LIR_Assembler::emit_unwind_handler() { +#ifndef PRODUCT + if (CommentedAssembly) { + _masm->block_comment("Unwind handler"); + } +#endif + + int offset = code_offset(); + + // Fetch the exception from TLS and clear out exception related thread state + Register zero = __ zero_register(Rtemp); + __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset())); + __ str(zero, Address(Rthread, JavaThread::exception_oop_offset())); + __ str(zero, Address(Rthread, JavaThread::exception_pc_offset())); + + __ bind(_unwind_handler_entry); + __ verify_not_null_oop(Rexception_obj); + + // Preform needed unlocking + MonitorExitStub* stub = NULL; + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::R0_opr); + stub = new MonitorExitStub(FrameMap::R0_opr, true, 0); + __ unlock_object(R2, R1, R0, Rtemp, *stub->entry()); + __ bind(*stub->continuation()); + } + + // remove the activation and dispatch to the unwind handler + __ remove_frame(initial_frame_size_in_bytes()); // restores FP and LR + __ jump(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type, Rtemp); + + // Emit the slow path assembly + if (stub != NULL) { + stub->emit_code(this); + } + + return offset; +} + + +int LIR_Assembler::emit_deopt_handler() { + address handler_base = __ start_a_stub(deopt_handler_size); + if (handler_base == NULL) { + bailout("deopt handler overflow"); + return -1; + } + + int offset = code_offset(); + + __ mov_relative_address(LR, __ pc()); +#ifdef AARCH64 + __ raw_push(LR, LR); + __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, Rtemp); +#else + __ push(LR); // stub expects LR to be saved + __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, noreg); +#endif // AARCH64 + + assert(code_offset() - offset <= deopt_handler_size, "overflow"); + __ end_a_stub(); + + return offset; +} + + +void LIR_Assembler::return_op(LIR_Opr result) { + // Pop the frame before safepoint polling + __ remove_frame(initial_frame_size_in_bytes()); + + // mov_slow here is usually one or two instruction + // TODO-AARCH64 3 instructions on AArch64, so try to load polling page by ldr_literal + __ mov_address(Rtemp, os::get_polling_page(), symbolic_Relocation::polling_page_reference); + __ relocate(relocInfo::poll_return_type); + __ ldr(Rtemp, Address(Rtemp)); + __ ret(); +} + + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { + __ mov_address(Rtemp, os::get_polling_page(), symbolic_Relocation::polling_page_reference); + if (info != NULL) { + add_debug_info_for_branch(info); + } + int offset = __ offset(); + __ relocate(relocInfo::poll_type); + __ ldr(Rtemp, Address(Rtemp)); + return offset; +} + + +void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { + if (from_reg != to_reg) { + __ mov(to_reg, from_reg); + } +} + +void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + assert(src->is_constant() && dest->is_register(), "must be"); + LIR_Const* c = src->as_constant_ptr(); + + switch (c->type()) { + case T_ADDRESS: + case T_INT: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mov_slow(dest->as_register(), c->as_jint()); + break; + + case T_LONG: + assert(patch_code == lir_patch_none, "no patching handled here"); +#ifdef AARCH64 + __ mov_slow(dest->as_pointer_register(), (intptr_t)c->as_jlong()); +#else + __ mov_slow(dest->as_register_lo(), c->as_jint_lo()); + __ mov_slow(dest->as_register_hi(), c->as_jint_hi()); +#endif // AARCH64 + break; + + case T_OBJECT: + if (patch_code == lir_patch_none) { + __ mov_oop(dest->as_register(), c->as_jobject()); + } else { + jobject2reg_with_patching(dest->as_register(), info); + } + break; + + case T_METADATA: + if (patch_code == lir_patch_none) { + __ mov_metadata(dest->as_register(), c->as_metadata()); + } else { + klass2reg_with_patching(dest->as_register(), info); + } + break; + + case T_FLOAT: + if (dest->is_single_fpu()) { + __ mov_float(dest->as_float_reg(), c->as_jfloat()); + } else { +#ifdef AARCH64 + ShouldNotReachHere(); +#else + // Simple getters can return float constant directly into r0 + __ mov_slow(dest->as_register(), c->as_jint_bits()); +#endif // AARCH64 + } + break; + + case T_DOUBLE: + if (dest->is_double_fpu()) { + __ mov_double(dest->as_double_reg(), c->as_jdouble()); + } else { +#ifdef AARCH64 + ShouldNotReachHere(); +#else + // Simple getters can return double constant directly into r1r0 + __ mov_slow(dest->as_register_lo(), c->as_jint_lo_bits()); + __ mov_slow(dest->as_register_hi(), c->as_jint_hi_bits()); +#endif // AARCH64 + } + break; + + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { + assert(src->is_constant(), "must be"); + assert(dest->is_stack(), "must be"); + LIR_Const* c = src->as_constant_ptr(); + + switch (c->type()) { + case T_INT: // fall through + case T_FLOAT: + __ mov_slow(Rtemp, c->as_jint_bits()); + __ str_32(Rtemp, frame_map()->address_for_slot(dest->single_stack_ix())); + break; + + case T_ADDRESS: + __ mov_slow(Rtemp, c->as_jint()); + __ str(Rtemp, frame_map()->address_for_slot(dest->single_stack_ix())); + break; + + case T_OBJECT: + __ mov_oop(Rtemp, c->as_jobject()); + __ str(Rtemp, frame_map()->address_for_slot(dest->single_stack_ix())); + break; + + case T_LONG: // fall through + case T_DOUBLE: +#ifdef AARCH64 + __ mov_slow(Rtemp, c->as_jlong_bits()); + __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix())); +#else + __ mov_slow(Rtemp, c->as_jint_lo_bits()); + __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes)); + if (c->as_jint_hi_bits() != c->as_jint_lo_bits()) { + __ mov_slow(Rtemp, c->as_jint_hi_bits()); + } + __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes)); +#endif // AARCH64 + break; + + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, + CodeEmitInfo* info, bool wide) { +#ifdef AARCH64 + assert((src->as_constant_ptr()->type() == T_OBJECT && src->as_constant_ptr()->as_jobject() == NULL) || + (src->as_constant_ptr()->type() == T_INT && src->as_constant_ptr()->as_jint() == 0) || + (src->as_constant_ptr()->type() == T_LONG && src->as_constant_ptr()->as_jlong() == 0) || + (src->as_constant_ptr()->type() == T_FLOAT && src->as_constant_ptr()->as_jint_bits() == 0) || + (src->as_constant_ptr()->type() == T_DOUBLE && src->as_constant_ptr()->as_jlong_bits() == 0), + "cannot handle otherwise"); + assert(dest->as_address_ptr()->type() == type, "should be"); + + Address addr = as_Address(dest->as_address_ptr()); + int null_check_offset = code_offset(); + switch (type) { + case T_OBJECT: // fall through + case T_ARRAY: + if (UseCompressedOops && !wide) { + __ str_w(ZR, addr); + } else { + __ str(ZR, addr); + } + break; + case T_ADDRESS: // fall through + case T_DOUBLE: // fall through + case T_LONG: __ str(ZR, addr); break; + case T_FLOAT: // fall through + case T_INT: __ str_w(ZR, addr); break; + case T_BOOLEAN: // fall through + case T_BYTE: __ strb(ZR, addr); break; + case T_CHAR: // fall through + case T_SHORT: __ strh(ZR, addr); break; + default: ShouldNotReachHere(); + } +#else + assert((src->as_constant_ptr()->type() == T_OBJECT && src->as_constant_ptr()->as_jobject() == NULL),"cannot handle otherwise"); + __ mov(Rtemp, 0); + + int null_check_offset = code_offset(); + __ str(Rtemp, as_Address(dest->as_address_ptr())); +#endif // AARCH64 + + if (info != NULL) { +#ifndef AARCH64 + assert(false, "arm32 didn't support this before, investigate if bug"); +#endif + add_debug_info_for_null_check(null_check_offset, info); + } +} + +void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { + assert(src->is_register() && dest->is_register(), "must be"); + + if (src->is_single_cpu()) { + if (dest->is_single_cpu()) { + move_regs(src->as_register(), dest->as_register()); +#ifdef AARCH64 + } else if (dest->is_double_cpu()) { + assert ((src->type() == T_OBJECT) || (src->type() == T_ARRAY) || (src->type() == T_ADDRESS), "invalid src type"); + move_regs(src->as_register(), dest->as_register_lo()); +#else + } else if (dest->is_single_fpu()) { + __ fmsr(dest->as_float_reg(), src->as_register()); +#endif // AARCH64 + } else { + ShouldNotReachHere(); + } + } else if (src->is_double_cpu()) { +#ifdef AARCH64 + move_regs(src->as_register_lo(), dest->as_register_lo()); +#else + if (dest->is_double_cpu()) { + __ long_move(dest->as_register_lo(), dest->as_register_hi(), src->as_register_lo(), src->as_register_hi()); + } else { + __ fmdrr(dest->as_double_reg(), src->as_register_lo(), src->as_register_hi()); + } +#endif // AARCH64 + } else if (src->is_single_fpu()) { + if (dest->is_single_fpu()) { + __ mov_float(dest->as_float_reg(), src->as_float_reg()); + } else if (dest->is_single_cpu()) { + __ mov_fpr2gpr_float(dest->as_register(), src->as_float_reg()); + } else { + ShouldNotReachHere(); + } + } else if (src->is_double_fpu()) { + if (dest->is_double_fpu()) { + __ mov_double(dest->as_double_reg(), src->as_double_reg()); + } else if (dest->is_double_cpu()) { +#ifdef AARCH64 + __ fmov_xd(dest->as_register_lo(), src->as_double_reg()); +#else + __ fmrrd(dest->as_register_lo(), dest->as_register_hi(), src->as_double_reg()); +#endif // AARCH64 + } else { + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { + assert(src->is_register(), "should not call otherwise"); + assert(dest->is_stack(), "should not call otherwise"); + + Address addr = dest->is_single_word() ? + frame_map()->address_for_slot(dest->single_stack_ix()) : + frame_map()->address_for_slot(dest->double_stack_ix()); + +#ifndef AARCH64 + assert(lo_word_offset_in_bytes == 0 && hi_word_offset_in_bytes == 4, "little ending"); + if (src->is_single_fpu() || src->is_double_fpu()) { + if (addr.disp() >= 1024) { BAILOUT("Too exotic case to handle here"); } + } +#endif // !AARCH64 + + if (src->is_single_cpu()) { + switch (type) { + case T_OBJECT: + case T_ARRAY: __ verify_oop(src->as_register()); // fall through + case T_ADDRESS: + case T_METADATA: __ str(src->as_register(), addr); break; + case T_FLOAT: // used in intBitsToFloat intrinsic implementation, fall through + case T_INT: __ str_32(src->as_register(), addr); break; + default: + ShouldNotReachHere(); + } + } else if (src->is_double_cpu()) { + __ str(src->as_register_lo(), addr); +#ifndef AARCH64 + __ str(src->as_register_hi(), frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes)); +#endif // !AARCH64 + } else if (src->is_single_fpu()) { + __ str_float(src->as_float_reg(), addr); + } else if (src->is_double_fpu()) { + __ str_double(src->as_double_reg(), addr); + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, + LIR_PatchCode patch_code, CodeEmitInfo* info, + bool pop_fpu_stack, bool wide, + bool unaligned) { + LIR_Address* to_addr = dest->as_address_ptr(); + Register base_reg = to_addr->base()->as_pointer_register(); + const bool needs_patching = (patch_code != lir_patch_none); + + PatchingStub* patch = NULL; + if (needs_patching) { +#ifdef AARCH64 + // Same alignment of reg2mem code and PatchingStub code. Required to make copied bind_literal() code properly aligned. + __ align(wordSize); +#endif + patch = new PatchingStub(_masm, PatchingStub::access_field_id); +#ifdef AARCH64 + // Extra nop for MT safe patching + __ nop(); +#endif // AARCH64 + } + + int null_check_offset = code_offset(); + + switch (type) { + case T_ARRAY: + case T_OBJECT: + if (UseCompressedOops && !wide) { +#ifdef AARCH64 + const Register temp_src = Rtemp; + assert_different_registers(temp_src, src->as_register()); + __ encode_heap_oop(temp_src, src->as_register()); + null_check_offset = code_offset(); + __ str_32(temp_src, as_Address(to_addr)); +#else + ShouldNotReachHere(); +#endif // AARCH64 + } else { + __ str(src->as_register(), as_Address(to_addr)); + } + break; + + case T_ADDRESS: +#ifdef AARCH64 + case T_LONG: +#endif // AARCH64 + __ str(src->as_pointer_register(), as_Address(to_addr)); + break; + + case T_BYTE: + case T_BOOLEAN: + __ strb(src->as_register(), as_Address(to_addr)); + break; + + case T_CHAR: + case T_SHORT: + __ strh(src->as_register(), as_Address(to_addr)); + break; + + case T_INT: +#ifdef __SOFTFP__ + case T_FLOAT: +#endif // __SOFTFP__ + __ str_32(src->as_register(), as_Address(to_addr)); + break; + +#ifdef AARCH64 + + case T_FLOAT: + __ str_s(src->as_float_reg(), as_Address(to_addr)); + break; + + case T_DOUBLE: + __ str_d(src->as_double_reg(), as_Address(to_addr)); + break; + +#else // AARCH64 + +#ifdef __SOFTFP__ + case T_DOUBLE: +#endif // __SOFTFP__ + case T_LONG: { + Register from_lo = src->as_register_lo(); + Register from_hi = src->as_register_hi(); + if (to_addr->index()->is_register()) { + assert(to_addr->scale() == LIR_Address::times_1,"Unexpected scaled register"); + assert(to_addr->disp() == 0, "Not yet supporting both"); + __ add(Rtemp, base_reg, to_addr->index()->as_register()); + base_reg = Rtemp; + __ str(from_lo, Address(Rtemp)); + if (patch != NULL) { + patching_epilog(patch, lir_patch_low, base_reg, info); + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + patch_code = lir_patch_high; + } + __ str(from_hi, Address(Rtemp, BytesPerWord)); + } else if (base_reg == from_lo) { + __ str(from_hi, as_Address_hi(to_addr)); + if (patch != NULL) { + patching_epilog(patch, lir_patch_high, base_reg, info); + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + patch_code = lir_patch_low; + } + __ str(from_lo, as_Address_lo(to_addr)); + } else { + __ str(from_lo, as_Address_lo(to_addr)); + if (patch != NULL) { + patching_epilog(patch, lir_patch_low, base_reg, info); + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + patch_code = lir_patch_high; + } + __ str(from_hi, as_Address_hi(to_addr)); + } + break; + } + +#ifndef __SOFTFP__ + case T_FLOAT: + if (to_addr->index()->is_register()) { + assert(to_addr->scale() == LIR_Address::times_1,"Unexpected scaled register"); + __ add(Rtemp, base_reg, to_addr->index()->as_register()); + if ((to_addr->disp() <= -4096) || (to_addr->disp() >= 4096)) { BAILOUT("offset not in range"); } + __ fsts(src->as_float_reg(), Address(Rtemp, to_addr->disp())); + } else { + __ fsts(src->as_float_reg(), as_Address(to_addr)); + } + break; + + case T_DOUBLE: + if (to_addr->index()->is_register()) { + assert(to_addr->scale() == LIR_Address::times_1,"Unexpected scaled register"); + __ add(Rtemp, base_reg, to_addr->index()->as_register()); + if ((to_addr->disp() <= -4096) || (to_addr->disp() >= 4096)) { BAILOUT("offset not in range"); } + __ fstd(src->as_double_reg(), Address(Rtemp, to_addr->disp())); + } else { + __ fstd(src->as_double_reg(), as_Address(to_addr)); + } + break; +#endif // __SOFTFP__ + +#endif // AARCH64 + + default: + ShouldNotReachHere(); + } + + if (info != NULL) { + add_debug_info_for_null_check(null_check_offset, info); + } + + if (patch != NULL) { + // Offset embeedded into LDR/STR instruction may appear not enough + // to address a field. So, provide a space for one more instruction + // that will deal with larger offsets. + __ nop(); + patching_epilog(patch, patch_code, base_reg, info); + } +} + + +void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { + assert(src->is_stack(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + Address addr = src->is_single_word() ? + frame_map()->address_for_slot(src->single_stack_ix()) : + frame_map()->address_for_slot(src->double_stack_ix()); + +#ifndef AARCH64 + assert(lo_word_offset_in_bytes == 0 && hi_word_offset_in_bytes == 4, "little ending"); + if (dest->is_single_fpu() || dest->is_double_fpu()) { + if (addr.disp() >= 1024) { BAILOUT("Too exotic case to handle here"); } + } +#endif // !AARCH64 + + if (dest->is_single_cpu()) { + switch (type) { + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: __ ldr(dest->as_register(), addr); break; + case T_FLOAT: // used in floatToRawIntBits intrinsic implemenation + case T_INT: __ ldr_u32(dest->as_register(), addr); break; + default: + ShouldNotReachHere(); + } + if ((type == T_OBJECT) || (type == T_ARRAY)) { + __ verify_oop(dest->as_register()); + } + } else if (dest->is_double_cpu()) { + __ ldr(dest->as_register_lo(), addr); +#ifndef AARCH64 + __ ldr(dest->as_register_hi(), frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes)); +#endif // !AARCH64 + } else if (dest->is_single_fpu()) { + __ ldr_float(dest->as_float_reg(), addr); + } else if (dest->is_double_fpu()) { + __ ldr_double(dest->as_double_reg(), addr); + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + if (src->is_single_stack()) { + switch (src->type()) { + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + __ ldr(Rtemp, frame_map()->address_for_slot(src->single_stack_ix())); + __ str(Rtemp, frame_map()->address_for_slot(dest->single_stack_ix())); + break; + + case T_INT: + case T_FLOAT: + __ ldr_u32(Rtemp, frame_map()->address_for_slot(src->single_stack_ix())); + __ str_32(Rtemp, frame_map()->address_for_slot(dest->single_stack_ix())); + break; + + default: + ShouldNotReachHere(); + } + } else { + assert(src->is_double_stack(), "must be"); + __ ldr(Rtemp, frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes)); + __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes)); +#ifdef AARCH64 + assert(lo_word_offset_in_bytes == 0, "adjust this code"); +#else + __ ldr(Rtemp, frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes)); + __ str(Rtemp, frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes)); +#endif // AARCH64 + } +} + + +void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, + LIR_PatchCode patch_code, CodeEmitInfo* info, + bool wide, bool unaligned) { + assert(src->is_address(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + LIR_Address* addr = src->as_address_ptr(); + + Register base_reg = addr->base()->as_pointer_register(); + + PatchingStub* patch = NULL; + if (patch_code != lir_patch_none) { + patch = new PatchingStub(_masm, PatchingStub::access_field_id); +#ifdef AARCH64 + // Extra nop for MT safe patching + __ nop(); +#endif // AARCH64 + } + if (info != NULL) { + add_debug_info_for_null_check_here(info); + } + + switch (type) { + case T_OBJECT: // fall through + case T_ARRAY: + if (UseCompressedOops && !wide) { + __ ldr_u32(dest->as_register(), as_Address(addr)); + } else { + __ ldr(dest->as_register(), as_Address(addr)); + } + break; + + case T_ADDRESS: + if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { + __ ldr_u32(dest->as_pointer_register(), as_Address(addr)); + } else { + __ ldr(dest->as_pointer_register(), as_Address(addr)); + } + break; + +#ifdef AARCH64 + case T_LONG: +#else + case T_INT: +#ifdef __SOFTFP__ + case T_FLOAT: +#endif // __SOFTFP__ +#endif // AARCH64 + __ ldr(dest->as_pointer_register(), as_Address(addr)); + break; + + case T_BOOLEAN: + __ ldrb(dest->as_register(), as_Address(addr)); + break; + + case T_BYTE: + __ ldrsb(dest->as_register(), as_Address(addr)); + break; + + case T_CHAR: + __ ldrh(dest->as_register(), as_Address(addr)); + break; + + case T_SHORT: + __ ldrsh(dest->as_register(), as_Address(addr)); + break; + +#ifdef AARCH64 + + case T_INT: + __ ldr_w(dest->as_register(), as_Address(addr)); + break; + + case T_FLOAT: + __ ldr_s(dest->as_float_reg(), as_Address(addr)); + break; + + case T_DOUBLE: + __ ldr_d(dest->as_double_reg(), as_Address(addr)); + break; + +#else // AARCH64 + +#ifdef __SOFTFP__ + case T_DOUBLE: +#endif // __SOFTFP__ + case T_LONG: { + Register to_lo = dest->as_register_lo(); + Register to_hi = dest->as_register_hi(); + if (addr->index()->is_register()) { + assert(addr->scale() == LIR_Address::times_1,"Unexpected scaled register"); + assert(addr->disp() == 0, "Not yet supporting both"); + __ add(Rtemp, base_reg, addr->index()->as_register()); + base_reg = Rtemp; + __ ldr(to_lo, Address(Rtemp)); + if (patch != NULL) { + patching_epilog(patch, lir_patch_low, base_reg, info); + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + patch_code = lir_patch_high; + } + __ ldr(to_hi, Address(Rtemp, BytesPerWord)); + } else if (base_reg == to_lo) { + __ ldr(to_hi, as_Address_hi(addr)); + if (patch != NULL) { + patching_epilog(patch, lir_patch_high, base_reg, info); + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + patch_code = lir_patch_low; + } + __ ldr(to_lo, as_Address_lo(addr)); + } else { + __ ldr(to_lo, as_Address_lo(addr)); + if (patch != NULL) { + patching_epilog(patch, lir_patch_low, base_reg, info); + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + patch_code = lir_patch_high; + } + __ ldr(to_hi, as_Address_hi(addr)); + } + break; + } + +#ifndef __SOFTFP__ + case T_FLOAT: + if (addr->index()->is_register()) { + assert(addr->scale() == LIR_Address::times_1,"Unexpected scaled register"); + __ add(Rtemp, base_reg, addr->index()->as_register()); + if ((addr->disp() <= -4096) || (addr->disp() >= 4096)) { BAILOUT("offset not in range"); } + __ flds(dest->as_float_reg(), Address(Rtemp, addr->disp())); + } else { + __ flds(dest->as_float_reg(), as_Address(addr)); + } + break; + + case T_DOUBLE: + if (addr->index()->is_register()) { + assert(addr->scale() == LIR_Address::times_1,"Unexpected scaled register"); + __ add(Rtemp, base_reg, addr->index()->as_register()); + if ((addr->disp() <= -4096) || (addr->disp() >= 4096)) { BAILOUT("offset not in range"); } + __ fldd(dest->as_double_reg(), Address(Rtemp, addr->disp())); + } else { + __ fldd(dest->as_double_reg(), as_Address(addr)); + } + break; +#endif // __SOFTFP__ + +#endif // AARCH64 + + default: + ShouldNotReachHere(); + } + + if (patch != NULL) { + // Offset embeedded into LDR/STR instruction may appear not enough + // to address a field. So, provide a space for one more instruction + // that will deal with larger offsets. + __ nop(); + patching_epilog(patch, patch_code, base_reg, info); + } + +#ifdef AARCH64 + switch (type) { + case T_ARRAY: + case T_OBJECT: + if (UseCompressedOops && !wide) { + __ decode_heap_oop(dest->as_register()); + } + __ verify_oop(dest->as_register()); + break; + + case T_ADDRESS: + if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { + __ decode_klass_not_null(dest->as_register()); + } + break; + } +#endif // AARCH64 +} + + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + bool is_32 = op->result_opr()->is_single_cpu(); + + if (op->code() == lir_idiv && op->in_opr2()->is_constant() && is_32) { + int c = op->in_opr2()->as_constant_ptr()->as_jint(); + assert(is_power_of_2(c), "non power-of-2 constant should be put in a register"); + + Register left = op->in_opr1()->as_register(); + Register dest = op->result_opr()->as_register(); + if (c == 1) { + __ mov(dest, left); + } else if (c == 2) { + __ add_32(dest, left, AsmOperand(left, lsr, 31)); + __ asr_32(dest, dest, 1); + } else if (c != (int) 0x80000000) { + int power = log2_intptr(c); + __ asr_32(Rtemp, left, 31); + __ add_32(dest, left, AsmOperand(Rtemp, lsr, 32-power)); // dest = left + (left < 0 ? 2^power - 1 : 0); + __ asr_32(dest, dest, power); // dest = dest >>> power; + } else { + // x/0x80000000 is a special case, since dividend is a power of two, but is negative. + // The only possible result values are 0 and 1, with 1 only for dividend == divisor == 0x80000000. + __ cmp_32(left, c); +#ifdef AARCH64 + __ cset(dest, eq); +#else + __ mov(dest, 0, ne); + __ mov(dest, 1, eq); +#endif // AARCH64 + } + } else { +#ifdef AARCH64 + Register left = op->in_opr1()->as_pointer_register(); + Register right = op->in_opr2()->as_pointer_register(); + Register dest = op->result_opr()->as_pointer_register(); + + switch (op->code()) { + case lir_idiv: + if (is_32) { + __ sdiv_w(dest, left, right); + } else { + __ sdiv(dest, left, right); + } + break; + case lir_irem: { + Register tmp = op->in_opr3()->as_pointer_register(); + assert_different_registers(left, tmp); + assert_different_registers(right, tmp); + if (is_32) { + __ sdiv_w(tmp, left, right); + __ msub_w(dest, right, tmp, left); + } else { + __ sdiv(tmp, left, right); + __ msub(dest, right, tmp, left); + } + break; + } + default: + ShouldNotReachHere(); + } +#else + assert(op->code() == lir_idiv || op->code() == lir_irem, "unexpected op3"); + __ call(StubRoutines::Arm::idiv_irem_entry(), relocInfo::runtime_call_type); + add_debug_info_for_div0_here(op->info()); +#endif // AARCH64 + } +} + + +void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { +#ifdef ASSERT + assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); + if (op->block() != NULL) _branch_target_blocks.append(op->block()); + if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock()); + assert(op->info() == NULL, "CodeEmitInfo?"); +#endif // ASSERT + +#ifdef __SOFTFP__ + assert (op->code() != lir_cond_float_branch, "this should be impossible"); +#else + if (op->code() == lir_cond_float_branch) { +#ifndef AARCH64 + __ fmstat(); +#endif // !AARCH64 + __ b(*(op->ublock()->label()), vs); + } +#endif // __SOFTFP__ + + AsmCondition acond = al; + switch (op->cond()) { + case lir_cond_equal: acond = eq; break; + case lir_cond_notEqual: acond = ne; break; + case lir_cond_less: acond = lt; break; + case lir_cond_lessEqual: acond = le; break; + case lir_cond_greaterEqual: acond = ge; break; + case lir_cond_greater: acond = gt; break; + case lir_cond_aboveEqual: acond = hs; break; + case lir_cond_belowEqual: acond = ls; break; + default: assert(op->cond() == lir_cond_always, "must be"); + } + __ b(*(op->label()), acond); +} + + +void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); + + switch (op->bytecode()) { + case Bytecodes::_i2l: +#ifdef AARCH64 + __ sign_extend(dest->as_register_lo(), src->as_register(), 32); +#else + move_regs(src->as_register(), dest->as_register_lo()); + __ mov(dest->as_register_hi(), AsmOperand(src->as_register(), asr, 31)); +#endif // AARCH64 + break; + case Bytecodes::_l2i: + move_regs(src->as_register_lo(), dest->as_register()); + break; + case Bytecodes::_i2b: + __ sign_extend(dest->as_register(), src->as_register(), 8); + break; + case Bytecodes::_i2s: + __ sign_extend(dest->as_register(), src->as_register(), 16); + break; + case Bytecodes::_i2c: + __ zero_extend(dest->as_register(), src->as_register(), 16); + break; + case Bytecodes::_f2d: + __ convert_f2d(dest->as_double_reg(), src->as_float_reg()); + break; + case Bytecodes::_d2f: + __ convert_d2f(dest->as_float_reg(), src->as_double_reg()); + break; + case Bytecodes::_i2f: +#ifdef AARCH64 + __ scvtf_sw(dest->as_float_reg(), src->as_register()); +#else + __ fmsr(Stemp, src->as_register()); + __ fsitos(dest->as_float_reg(), Stemp); +#endif // AARCH64 + break; + case Bytecodes::_i2d: +#ifdef AARCH64 + __ scvtf_dw(dest->as_double_reg(), src->as_register()); +#else + __ fmsr(Stemp, src->as_register()); + __ fsitod(dest->as_double_reg(), Stemp); +#endif // AARCH64 + break; + case Bytecodes::_f2i: +#ifdef AARCH64 + __ fcvtzs_ws(dest->as_register(), src->as_float_reg()); +#else + __ ftosizs(Stemp, src->as_float_reg()); + __ fmrs(dest->as_register(), Stemp); +#endif // AARCH64 + break; + case Bytecodes::_d2i: +#ifdef AARCH64 + __ fcvtzs_wd(dest->as_register(), src->as_double_reg()); +#else + __ ftosizd(Stemp, src->as_double_reg()); + __ fmrs(dest->as_register(), Stemp); +#endif // AARCH64 + break; +#ifdef AARCH64 + case Bytecodes::_l2f: + __ scvtf_sx(dest->as_float_reg(), src->as_register_lo()); + break; + case Bytecodes::_l2d: + __ scvtf_dx(dest->as_double_reg(), src->as_register_lo()); + break; + case Bytecodes::_f2l: + __ fcvtzs_xs(dest->as_register_lo(), src->as_float_reg()); + break; + case Bytecodes::_d2l: + __ fcvtzs_xd(dest->as_register_lo(), src->as_double_reg()); + break; +#endif // AARCH64 + default: + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { + if (op->init_check()) { + Register tmp = op->tmp1()->as_register(); + __ ldrb(tmp, Address(op->klass()->as_register(), InstanceKlass::init_state_offset())); + add_debug_info_for_null_check_here(op->stub()->info()); + __ cmp(tmp, InstanceKlass::fully_initialized); + __ b(*op->stub()->entry(), ne); + } + __ allocate_object(op->obj()->as_register(), + op->tmp1()->as_register(), + op->tmp2()->as_register(), + op->tmp3()->as_register(), + op->header_size(), + op->object_size(), + op->klass()->as_register(), + *op->stub()->entry()); + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { + if (UseSlowPath || + (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) || + (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) { + __ b(*op->stub()->entry()); + } else { + __ allocate_array(op->obj()->as_register(), + op->len()->as_register(), + op->tmp1()->as_register(), + op->tmp2()->as_register(), + op->tmp3()->as_register(), + arrayOopDesc::header_size(op->type()), + type2aelembytes(op->type()), + op->klass()->as_register(), + *op->stub()->entry()); + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::type_profile_helper(Register mdo, int mdo_offset_bias, + ciMethodData *md, ciProfileData *data, + Register recv, Register tmp1, Label* update_done) { + assert_different_registers(mdo, recv, tmp1); + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + Label next_test; + // See if the receiver is receiver[n]. + Address receiver_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - + mdo_offset_bias); + __ ldr(tmp1, receiver_addr); + __ verify_klass_ptr(tmp1); + __ cmp(recv, tmp1); + __ b(next_test, ne); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - + mdo_offset_bias); + __ ldr(tmp1, data_addr); + __ add(tmp1, tmp1, DataLayout::counter_increment); + __ str(tmp1, data_addr); + __ b(*update_done); + __ bind(next_test); + } + + // Didn't find receiver; find next empty slot and fill it in + for (i = 0; i < VirtualCallData::row_limit(); i++) { + Label next_test; + Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - + mdo_offset_bias); + __ ldr(tmp1, recv_addr); + __ cbnz(tmp1, next_test); + __ str(recv, recv_addr); + __ mov(tmp1, DataLayout::counter_increment); + __ str(tmp1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - + mdo_offset_bias)); + __ b(*update_done); + __ bind(next_test); + } +} + +void LIR_Assembler::setup_md_access(ciMethod* method, int bci, + ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias) { + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for checkcast"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + if (md->byte_offset_of_slot(data, DataLayout::header_offset()) + data->size_in_bytes() >= 4096) { + // The offset is large so bias the mdo by the base of the slot so + // that the ldr can use an immediate offset to reference the slots of the data + mdo_offset_bias = md->byte_offset_of_slot(data, DataLayout::header_offset()); + } +} + +// On 32-bit ARM, code before this helper should test obj for null (ZF should be set if obj is null). +void LIR_Assembler::typecheck_profile_helper1(ciMethod* method, int bci, + ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias, + Register obj, Register mdo, Register data_val, Label* obj_is_null) { + assert(method != NULL, "Should have method"); + assert_different_registers(obj, mdo, data_val); + setup_md_access(method, bci, md, data, mdo_offset_bias); + Label not_null; +#ifdef AARCH64 + __ cbnz(obj, not_null); +#else + __ b(not_null, ne); +#endif // AARCH64 + __ mov_metadata(mdo, md->constant_encoding()); + if (mdo_offset_bias > 0) { + __ mov_slow(data_val, mdo_offset_bias); + __ add(mdo, mdo, data_val); + } + Address flags_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias); + __ ldrb(data_val, flags_addr); + __ orr(data_val, data_val, (uint)BitData::null_seen_byte_constant()); + __ strb(data_val, flags_addr); + __ b(*obj_is_null); + __ bind(not_null); +} + +void LIR_Assembler::typecheck_profile_helper2(ciMethodData* md, ciProfileData* data, int mdo_offset_bias, + Register mdo, Register recv, Register value, Register tmp1, + Label* profile_cast_success, Label* profile_cast_failure, + Label* success, Label* failure) { + assert_different_registers(mdo, value, tmp1); + __ bind(*profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + if (mdo_offset_bias > 0) { + __ mov_slow(tmp1, mdo_offset_bias); + __ add(mdo, mdo, tmp1); + } + __ load_klass(recv, value); + type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, success); + __ b(*success); + // Cast failure case + __ bind(*profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + if (mdo_offset_bias > 0) { + __ mov_slow(tmp1, mdo_offset_bias); + __ add(mdo, mdo, tmp1); + } + Address data_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias); + __ ldr(tmp1, data_addr); + __ sub(tmp1, tmp1, DataLayout::counter_increment); + __ str(tmp1, data_addr); + __ b(*failure); +} + +// Sets `res` to true, if `cond` holds. On AArch64 also sets `res` to false if `cond` does not hold. +static void set_instanceof_result(MacroAssembler* _masm, Register res, AsmCondition cond) { +#ifdef AARCH64 + __ cset(res, cond); +#else + __ mov(res, 1, cond); +#endif // AARCH64 +} + + +void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { + // TODO: ARM - can be more effective with one more register + switch (op->code()) { + case lir_store_check: { + CodeStub* stub = op->stub(); + Register value = op->object()->as_register(); + Register array = op->array()->as_register(); + Register klass_RInfo = op->tmp1()->as_register(); + Register k_RInfo = op->tmp2()->as_register(); + assert_different_registers(klass_RInfo, k_RInfo, Rtemp); + if (op->should_profile()) { + assert_different_registers(value, klass_RInfo, k_RInfo, Rtemp); + } + + // check if it needs to be profiled + ciMethodData* md; + ciProfileData* data; + int mdo_offset_bias = 0; + Label profile_cast_success, profile_cast_failure, done; + Label *success_target = op->should_profile() ? &profile_cast_success : &done; + Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry(); + + if (op->should_profile()) { +#ifndef AARCH64 + __ cmp(value, 0); +#endif // !AARCH64 + typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, value, k_RInfo, Rtemp, &done); + } else { + __ cbz(value, done); + } + assert_different_registers(k_RInfo, value); + add_debug_info_for_null_check_here(op->info_for_exception()); + __ load_klass(k_RInfo, array); + __ load_klass(klass_RInfo, value); + __ ldr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); + __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset())); + // check for immediate positive hit + __ ldr(Rtemp, Address(klass_RInfo, Rtemp)); + __ cmp(klass_RInfo, k_RInfo); + __ cond_cmp(Rtemp, k_RInfo, ne); + __ b(*success_target, eq); + // check for immediate negative hit + __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset())); + __ cmp(Rtemp, in_bytes(Klass::secondary_super_cache_offset())); + __ b(*failure_target, ne); + // slow case + assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup"); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ cbz(R0, *failure_target); + if (op->should_profile()) { + Register mdo = klass_RInfo, recv = k_RInfo, tmp1 = Rtemp; + if (mdo == value) { + mdo = k_RInfo; + recv = klass_RInfo; + } + typecheck_profile_helper2(md, data, mdo_offset_bias, mdo, recv, value, tmp1, + &profile_cast_success, &profile_cast_failure, + &done, stub->entry()); + } + __ bind(done); + break; + } + + case lir_checkcast: { + CodeStub* stub = op->stub(); + Register obj = op->object()->as_register(); + Register res = op->result_opr()->as_register(); + Register klass_RInfo = op->tmp1()->as_register(); + Register k_RInfo = op->tmp2()->as_register(); + ciKlass* k = op->klass(); + assert_different_registers(res, k_RInfo, klass_RInfo, Rtemp); + + // TODO: ARM - Late binding is used to prevent confusion of register allocator + assert(stub->is_exception_throw_stub(), "must be"); + ((SimpleExceptionStub*)stub)->set_obj(op->result_opr()); + + ciMethodData* md; + ciProfileData* data; + int mdo_offset_bias = 0; + + Label done; + + Label profile_cast_failure, profile_cast_success; + Label *failure_target = op->should_profile() ? &profile_cast_failure : op->stub()->entry(); + Label *success_target = op->should_profile() ? &profile_cast_success : &done; + +#ifdef AARCH64 + move_regs(obj, res); + if (op->should_profile()) { + typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, res, klass_RInfo, Rtemp, &done); + } else { + __ cbz(obj, done); + } + if (k->is_loaded()) { + __ mov_metadata(k_RInfo, k->constant_encoding()); + } else { + if (res != obj) { + op->info_for_patch()->add_register_oop(FrameMap::as_oop_opr(res)); + } + klass2reg_with_patching(k_RInfo, op->info_for_patch()); + } + __ load_klass(klass_RInfo, res); + + if (op->fast_check()) { + __ cmp(klass_RInfo, k_RInfo); + __ b(*failure_target, ne); + } else if (k->is_loaded()) { + __ ldr(Rtemp, Address(klass_RInfo, k->super_check_offset())); + if (in_bytes(Klass::secondary_super_cache_offset()) != (int) k->super_check_offset()) { + __ cmp(Rtemp, k_RInfo); + __ b(*failure_target, ne); + } else { + __ cmp(klass_RInfo, k_RInfo); + __ cond_cmp(Rtemp, k_RInfo, ne); + __ b(*success_target, eq); + assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup"); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ cbz(R0, *failure_target); + } + } else { + __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset())); + // check for immediate positive hit + __ ldr(Rtemp, Address(klass_RInfo, Rtemp)); + __ cmp(klass_RInfo, k_RInfo); + __ cond_cmp(Rtemp, k_RInfo, ne); + __ b(*success_target, eq); + // check for immediate negative hit + __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset())); + __ cmp(Rtemp, in_bytes(Klass::secondary_super_cache_offset())); + __ b(*failure_target, ne); + // slow case + assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup"); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ cbz(R0, *failure_target); + } + +#else // AARCH64 + + __ movs(res, obj); + if (op->should_profile()) { + typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, res, klass_RInfo, Rtemp, &done); + } else { + __ b(done, eq); + } + if (k->is_loaded()) { + __ mov_metadata(k_RInfo, k->constant_encoding()); + } else if (k_RInfo != obj) { + klass2reg_with_patching(k_RInfo, op->info_for_patch()); + __ movs(res, obj); + } else { + // Patching doesn't update "res" register after GC, so do patching first + klass2reg_with_patching(Rtemp, op->info_for_patch()); + __ movs(res, obj); + __ mov(k_RInfo, Rtemp); + } + __ load_klass(klass_RInfo, res, ne); + + if (op->fast_check()) { + __ cmp(klass_RInfo, k_RInfo, ne); + __ b(*failure_target, ne); + } else if (k->is_loaded()) { + __ b(*success_target, eq); + __ ldr(Rtemp, Address(klass_RInfo, k->super_check_offset())); + if (in_bytes(Klass::secondary_super_cache_offset()) != (int) k->super_check_offset()) { + __ cmp(Rtemp, k_RInfo); + __ b(*failure_target, ne); + } else { + __ cmp(klass_RInfo, k_RInfo); + __ cmp(Rtemp, k_RInfo, ne); + __ b(*success_target, eq); + assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup"); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ cbz(R0, *failure_target); + } + } else { + __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset())); + __ b(*success_target, eq); + // check for immediate positive hit + __ ldr(Rtemp, Address(klass_RInfo, Rtemp)); + __ cmp(klass_RInfo, k_RInfo); + __ cmp(Rtemp, k_RInfo, ne); + __ b(*success_target, eq); + // check for immediate negative hit + __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset())); + __ cmp(Rtemp, in_bytes(Klass::secondary_super_cache_offset())); + __ b(*failure_target, ne); + // slow case + assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup"); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ cbz(R0, *failure_target); + } +#endif // AARCH64 + + if (op->should_profile()) { + Register mdo = klass_RInfo, recv = k_RInfo, tmp1 = Rtemp; + typecheck_profile_helper2(md, data, mdo_offset_bias, mdo, recv, res, tmp1, + &profile_cast_success, &profile_cast_failure, + &done, stub->entry()); + } + __ bind(done); + break; + } + + case lir_instanceof: { + Register obj = op->object()->as_register(); + Register res = op->result_opr()->as_register(); + Register klass_RInfo = op->tmp1()->as_register(); + Register k_RInfo = op->tmp2()->as_register(); + ciKlass* k = op->klass(); + assert_different_registers(res, klass_RInfo, k_RInfo, Rtemp); + + ciMethodData* md; + ciProfileData* data; + int mdo_offset_bias = 0; + + Label done; + + Label profile_cast_failure, profile_cast_success; + Label *failure_target = op->should_profile() ? &profile_cast_failure : &done; + Label *success_target = op->should_profile() ? &profile_cast_success : &done; + +#ifdef AARCH64 + move_regs(obj, res); +#else + __ movs(res, obj); +#endif // AARCH64 + + if (op->should_profile()) { + typecheck_profile_helper1(op->profiled_method(), op->profiled_bci(), md, data, mdo_offset_bias, res, klass_RInfo, Rtemp, &done); + } else { +#ifdef AARCH64 + __ cbz(obj, done); // If obj == NULL, res is false +#else + __ b(done, eq); +#endif // AARCH64 + } + + if (k->is_loaded()) { + __ mov_metadata(k_RInfo, k->constant_encoding()); + } else { + op->info_for_patch()->add_register_oop(FrameMap::as_oop_opr(res)); + klass2reg_with_patching(k_RInfo, op->info_for_patch()); + } + __ load_klass(klass_RInfo, res); + +#ifndef AARCH64 + if (!op->should_profile()) { + __ mov(res, 0); + } +#endif // !AARCH64 + + if (op->fast_check()) { + __ cmp(klass_RInfo, k_RInfo); + if (!op->should_profile()) { + set_instanceof_result(_masm, res, eq); + } else { + __ b(profile_cast_failure, ne); + } + } else if (k->is_loaded()) { + __ ldr(Rtemp, Address(klass_RInfo, k->super_check_offset())); + if (in_bytes(Klass::secondary_super_cache_offset()) != (int) k->super_check_offset()) { + __ cmp(Rtemp, k_RInfo); + if (!op->should_profile()) { + set_instanceof_result(_masm, res, eq); + } else { + __ b(profile_cast_failure, ne); + } + } else { + __ cmp(klass_RInfo, k_RInfo); + __ cond_cmp(Rtemp, k_RInfo, ne); + if (!op->should_profile()) { + set_instanceof_result(_masm, res, eq); + } + __ b(*success_target, eq); + assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup"); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + if (!op->should_profile()) { + move_regs(R0, res); + } else { + __ cbz(R0, *failure_target); + } + } + } else { + __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset())); + // check for immediate positive hit + __ cmp(klass_RInfo, k_RInfo); + if (!op->should_profile()) { +#ifdef AARCH64 + // TODO-AARCH64 check if separate conditional branch is more efficient than ldr+cond_cmp + __ ldr(res, Address(klass_RInfo, Rtemp)); +#else + __ ldr(res, Address(klass_RInfo, Rtemp), ne); +#endif // AARCH64 + __ cond_cmp(res, k_RInfo, ne); + set_instanceof_result(_masm, res, eq); + } else { +#ifdef AARCH64 + // TODO-AARCH64 check if separate conditional branch is more efficient than ldr+cond_cmp + __ ldr(Rtemp, Address(klass_RInfo, Rtemp)); +#else + __ ldr(Rtemp, Address(klass_RInfo, Rtemp), ne); +#endif // AARCH64 + __ cond_cmp(Rtemp, k_RInfo, ne); + } + __ b(*success_target, eq); + // check for immediate negative hit + if (op->should_profile()) { + __ ldr_u32(Rtemp, Address(k_RInfo, Klass::super_check_offset_offset())); + } + __ cmp(Rtemp, in_bytes(Klass::secondary_super_cache_offset())); + if (!op->should_profile()) { +#ifdef AARCH64 + __ mov(res, 0); +#else + __ mov(res, 0, ne); +#endif // AARCH64 + } + __ b(*failure_target, ne); + // slow case + assert(klass_RInfo == R0 && k_RInfo == R1, "runtime call setup"); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + if (!op->should_profile()) { + move_regs(R0, res); + } + if (op->should_profile()) { + __ cbz(R0, *failure_target); + } + } + + if (op->should_profile()) { + Label done_ok, done_failure; + Register mdo = klass_RInfo, recv = k_RInfo, tmp1 = Rtemp; + typecheck_profile_helper2(md, data, mdo_offset_bias, mdo, recv, res, tmp1, + &profile_cast_success, &profile_cast_failure, + &done_ok, &done_failure); + __ bind(done_failure); + __ mov(res, 0); + __ b(done); + __ bind(done_ok); + __ mov(res, 1); + } + __ bind(done); + break; + } + default: + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + // if (*addr == cmpval) { + // *addr = newval; + // dest = 1; + // } else { + // dest = 0; + // } +#ifdef AARCH64 + Label retry, done; + Register addr = op->addr()->as_pointer_register(); + Register cmpval = op->cmp_value()->as_pointer_register(); + Register newval = op->new_value()->as_pointer_register(); + Register dest = op->result_opr()->as_pointer_register(); + assert_different_registers(dest, addr, cmpval, newval, Rtemp); + + if (UseCompressedOops && op->code() == lir_cas_obj) { + Register tmp1 = op->tmp1()->as_pointer_register(); + Register tmp2 = op->tmp2()->as_pointer_register(); + assert_different_registers(dest, addr, cmpval, newval, tmp1, tmp2, Rtemp); + __ encode_heap_oop(tmp1, cmpval); cmpval = tmp1; + __ encode_heap_oop(tmp2, newval); newval = tmp2; + } + + __ mov(dest, ZR); + __ bind(retry); + if (((op->code() == lir_cas_obj) && !UseCompressedOops) || op->code() == lir_cas_long) { + __ ldaxr(Rtemp, addr); + __ cmp(Rtemp, cmpval); + __ b(done, ne); + __ stlxr(Rtemp, newval, addr); + } else if (((op->code() == lir_cas_obj) && UseCompressedOops) || op->code() == lir_cas_int) { + __ ldaxr_w(Rtemp, addr); + __ cmp_w(Rtemp, cmpval); + __ b(done, ne); + __ stlxr_w(Rtemp, newval, addr); + } else { + ShouldNotReachHere(); + } + __ cbnz_w(Rtemp, retry); + __ mov(dest, 1); + __ bind(done); +#else + // FIXME: membar_release + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp); + if (op->code() == lir_cas_int || op->code() == lir_cas_obj) { + Register addr = op->addr()->as_register(); + Register cmpval = op->cmp_value()->as_register(); + Register newval = op->new_value()->as_register(); + Register dest = op->result_opr()->as_register(); + assert_different_registers(dest, addr, cmpval, newval, Rtemp); + + __ atomic_cas_bool(cmpval, newval, addr, 0, Rtemp); // Rtemp free by default at C1 LIR layer + __ mov(dest, 1, eq); + __ mov(dest, 0, ne); + } else if (op->code() == lir_cas_long) { + assert(VM_Version::supports_cx8(), "wrong machine"); + Register addr = op->addr()->as_pointer_register(); + Register cmp_value_lo = op->cmp_value()->as_register_lo(); + Register cmp_value_hi = op->cmp_value()->as_register_hi(); + Register new_value_lo = op->new_value()->as_register_lo(); + Register new_value_hi = op->new_value()->as_register_hi(); + Register dest = op->result_opr()->as_register(); + Register tmp_lo = op->tmp1()->as_register_lo(); + Register tmp_hi = op->tmp1()->as_register_hi(); + + assert_different_registers(tmp_lo, tmp_hi, cmp_value_lo, cmp_value_hi, dest, new_value_lo, new_value_hi, addr); + assert(tmp_hi->encoding() == tmp_lo->encoding() + 1, "non aligned register pair"); + assert(new_value_hi->encoding() == new_value_lo->encoding() + 1, "non aligned register pair"); + assert((tmp_lo->encoding() & 0x1) == 0, "misaligned register pair"); + assert((new_value_lo->encoding() & 0x1) == 0, "misaligned register pair"); + __ atomic_cas64(tmp_lo, tmp_hi, dest, cmp_value_lo, cmp_value_hi, + new_value_lo, new_value_hi, addr, 0); + } else { + Unimplemented(); + } +#endif // AARCH64 + // FIXME: is full membar really needed instead of just membar_acquire? + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp); +} + + +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { + AsmCondition acond = al; + AsmCondition ncond = nv; + if (opr1 != opr2) { + switch (condition) { + case lir_cond_equal: acond = eq; ncond = ne; break; + case lir_cond_notEqual: acond = ne; ncond = eq; break; + case lir_cond_less: acond = lt; ncond = ge; break; + case lir_cond_lessEqual: acond = le; ncond = gt; break; + case lir_cond_greaterEqual: acond = ge; ncond = lt; break; + case lir_cond_greater: acond = gt; ncond = le; break; + case lir_cond_aboveEqual: acond = hs; ncond = lo; break; + case lir_cond_belowEqual: acond = ls; ncond = hi; break; + default: ShouldNotReachHere(); + } + } + +#ifdef AARCH64 + + // TODO-AARCH64 implement it more efficiently + + if (opr1->is_register()) { + reg2reg(opr1, result); + } else if (opr1->is_stack()) { + stack2reg(opr1, result, result->type()); + } else if (opr1->is_constant()) { + const2reg(opr1, result, lir_patch_none, NULL); + } else { + ShouldNotReachHere(); + } + + Label skip; + __ b(skip, acond); + + if (opr2->is_register()) { + reg2reg(opr2, result); + } else if (opr2->is_stack()) { + stack2reg(opr2, result, result->type()); + } else if (opr2->is_constant()) { + const2reg(opr2, result, lir_patch_none, NULL); + } else { + ShouldNotReachHere(); + } + + __ bind(skip); + +#else + for (;;) { // two iterations only + if (opr1 == result) { + // do nothing + } else if (opr1->is_single_cpu()) { + __ mov(result->as_register(), opr1->as_register(), acond); + } else if (opr1->is_double_cpu()) { + __ long_move(result->as_register_lo(), result->as_register_hi(), + opr1->as_register_lo(), opr1->as_register_hi(), acond); + } else if (opr1->is_single_stack()) { + __ ldr(result->as_register(), frame_map()->address_for_slot(opr1->single_stack_ix()), acond); + } else if (opr1->is_double_stack()) { + __ ldr(result->as_register_lo(), + frame_map()->address_for_slot(opr1->double_stack_ix(), lo_word_offset_in_bytes), acond); + __ ldr(result->as_register_hi(), + frame_map()->address_for_slot(opr1->double_stack_ix(), hi_word_offset_in_bytes), acond); + } else if (opr1->is_illegal()) { + // do nothing: this part of the cmove has been optimized away in the peephole optimizer + } else { + assert(opr1->is_constant(), "must be"); + LIR_Const* c = opr1->as_constant_ptr(); + + switch (c->type()) { + case T_INT: + __ mov_slow(result->as_register(), c->as_jint(), acond); + break; + case T_LONG: + __ mov_slow(result->as_register_lo(), c->as_jint_lo(), acond); + __ mov_slow(result->as_register_hi(), c->as_jint_hi(), acond); + break; + case T_OBJECT: + __ mov_oop(result->as_register(), c->as_jobject(), 0, acond); + break; + case T_FLOAT: +#ifdef __SOFTFP__ + // not generated now. + __ mov_slow(result->as_register(), c->as_jint(), acond); +#else + __ mov_float(result->as_float_reg(), c->as_jfloat(), acond); +#endif // __SOFTFP__ + break; + case T_DOUBLE: +#ifdef __SOFTFP__ + // not generated now. + __ mov_slow(result->as_register_lo(), c->as_jint_lo(), acond); + __ mov_slow(result->as_register_hi(), c->as_jint_hi(), acond); +#else + __ mov_double(result->as_double_reg(), c->as_jdouble(), acond); +#endif // __SOFTFP__ + break; + default: + ShouldNotReachHere(); + } + } + + // Negate the condition and repeat the algorithm with the second operand + if (opr1 == opr2) { break; } + opr1 = opr2; + acond = ncond; + } +#endif // AARCH64 +} + +#if defined(AARCH64) || defined(ASSERT) +static int reg_size(LIR_Opr op) { + switch (op->type()) { + case T_FLOAT: + case T_INT: return BytesPerInt; + case T_LONG: + case T_DOUBLE: return BytesPerLong; + case T_OBJECT: + case T_ARRAY: + case T_METADATA: return BytesPerWord; + case T_ADDRESS: + case T_ILLEGAL: // fall through + default: ShouldNotReachHere(); return -1; + } +} +#endif + +void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "unused on this code path"); + assert(dest->is_register(), "wrong items state"); + + if (right->is_address()) { + // special case for adding shifted/extended register + const Register res = dest->as_pointer_register(); + const Register lreg = left->as_pointer_register(); + const LIR_Address* addr = right->as_address_ptr(); + + assert(addr->base()->as_pointer_register() == lreg && addr->index()->is_register() && addr->disp() == 0, "must be"); + + int scale = addr->scale(); + AsmShift shift = lsl; + +#ifdef AARCH64 + bool is_index_extended = reg_size(addr->base()) > reg_size(addr->index()); + if (scale < 0) { + scale = -scale; + shift = lsr; + } + assert(shift == lsl || !is_index_extended, "could not have extend and right shift in one operand"); + assert(0 <= scale && scale <= 63, "scale is too large"); + + if (is_index_extended) { + assert(scale <= 4, "scale is too large for add with extended register"); + assert(addr->index()->is_single_cpu(), "should be"); + assert(addr->index()->type() == T_INT, "should be"); + assert(dest->is_double_cpu(), "should be"); + assert(code == lir_add, "special case of add with extended register"); + + __ add(res, lreg, addr->index()->as_register(), ex_sxtw, scale); + return; + } else if (reg_size(dest) == BytesPerInt) { + assert(reg_size(addr->base()) == reg_size(addr->index()), "should be"); + assert(reg_size(addr->base()) == reg_size(dest), "should be"); + + AsmOperand operand(addr->index()->as_pointer_register(), shift, scale); + switch (code) { + case lir_add: __ add_32(res, lreg, operand); break; + case lir_sub: __ sub_32(res, lreg, operand); break; + default: ShouldNotReachHere(); + } + return; + } +#endif // AARCH64 + + assert(reg_size(addr->base()) == reg_size(addr->index()), "should be"); + assert(reg_size(addr->base()) == reg_size(dest), "should be"); + assert(reg_size(dest) == wordSize, "should be"); + + AsmOperand operand(addr->index()->as_pointer_register(), shift, scale); + switch (code) { + case lir_add: __ add(res, lreg, operand); break; + case lir_sub: __ sub(res, lreg, operand); break; + default: ShouldNotReachHere(); + } + +#ifndef AARCH64 + } else if (left->is_address()) { + assert(code == lir_sub && right->is_single_cpu(), "special case used by strength_reduce_multiply()"); + const LIR_Address* addr = left->as_address_ptr(); + const Register res = dest->as_register(); + const Register rreg = right->as_register(); + assert(addr->base()->as_register() == rreg && addr->index()->is_register() && addr->disp() == 0, "must be"); + __ rsb(res, rreg, AsmOperand(addr->index()->as_register(), lsl, addr->scale())); +#endif // !AARCH64 + + } else if (dest->is_single_cpu()) { + assert(left->is_single_cpu(), "unexpected left operand"); +#ifdef AARCH64 + assert(dest->type() == T_INT, "unexpected dest type"); + assert(left->type() == T_INT, "unexpected left type"); + assert(right->type() == T_INT, "unexpected right type"); +#endif // AARCH64 + + const Register res = dest->as_register(); + const Register lreg = left->as_register(); + + if (right->is_single_cpu()) { + const Register rreg = right->as_register(); + switch (code) { + case lir_add: __ add_32(res, lreg, rreg); break; + case lir_sub: __ sub_32(res, lreg, rreg); break; + case lir_mul: __ mul_32(res, lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else { + assert(right->is_constant(), "must be"); + const jint c = right->as_constant_ptr()->as_jint(); + if (!Assembler::is_arith_imm_in_range(c)) { + BAILOUT("illegal arithmetic operand"); + } + switch (code) { + case lir_add: __ add_32(res, lreg, c); break; + case lir_sub: __ sub_32(res, lreg, c); break; + default: ShouldNotReachHere(); + } + } + + } else if (dest->is_double_cpu()) { +#ifdef AARCH64 + assert(left->is_double_cpu() || + (left->is_single_cpu() && ((left->type() == T_OBJECT) || (left->type() == T_ARRAY) || (left->type() == T_ADDRESS))), + "unexpected left operand"); + + const Register res = dest->as_register_lo(); + const Register lreg = left->as_pointer_register(); + + if (right->is_constant()) { + assert(right->type() == T_LONG, "unexpected right type"); + assert((right->as_constant_ptr()->as_jlong() >> 24) == 0, "out of range"); + jint imm = (jint)right->as_constant_ptr()->as_jlong(); + switch (code) { + case lir_add: __ add(res, lreg, imm); break; + case lir_sub: __ sub(res, lreg, imm); break; + default: ShouldNotReachHere(); + } + } else { + assert(right->is_double_cpu() || + (right->is_single_cpu() && ((right->type() == T_OBJECT) || (right->type() == T_ARRAY) || (right->type() == T_ADDRESS))), + "unexpected right operand"); + const Register rreg = right->as_pointer_register(); + switch (code) { + case lir_add: __ add(res, lreg, rreg); break; + case lir_sub: __ sub(res, lreg, rreg); break; + case lir_mul: __ mul(res, lreg, rreg); break; + default: ShouldNotReachHere(); + } + } +#else // AARCH64 + Register res_lo = dest->as_register_lo(); + Register res_hi = dest->as_register_hi(); + Register lreg_lo = left->as_register_lo(); + Register lreg_hi = left->as_register_hi(); + if (right->is_double_cpu()) { + Register rreg_lo = right->as_register_lo(); + Register rreg_hi = right->as_register_hi(); + if (res_lo == lreg_hi || res_lo == rreg_hi) { + res_lo = Rtemp; + } + switch (code) { + case lir_add: + __ adds(res_lo, lreg_lo, rreg_lo); + __ adc(res_hi, lreg_hi, rreg_hi); + break; + case lir_sub: + __ subs(res_lo, lreg_lo, rreg_lo); + __ sbc(res_hi, lreg_hi, rreg_hi); + break; + default: + ShouldNotReachHere(); + } + } else { + assert(right->is_constant(), "must be"); + assert((right->as_constant_ptr()->as_jlong() >> 32) == 0, "out of range"); + const jint c = (jint) right->as_constant_ptr()->as_jlong(); + if (res_lo == lreg_hi) { + res_lo = Rtemp; + } + switch (code) { + case lir_add: + __ adds(res_lo, lreg_lo, c); + __ adc(res_hi, lreg_hi, 0); + break; + case lir_sub: + __ subs(res_lo, lreg_lo, c); + __ sbc(res_hi, lreg_hi, 0); + break; + default: + ShouldNotReachHere(); + } + } + move_regs(res_lo, dest->as_register_lo()); +#endif // AARCH64 + + } else if (dest->is_single_fpu()) { + assert(left->is_single_fpu(), "must be"); + assert(right->is_single_fpu(), "must be"); + const FloatRegister res = dest->as_float_reg(); + const FloatRegister lreg = left->as_float_reg(); + const FloatRegister rreg = right->as_float_reg(); + switch (code) { + case lir_add: __ add_float(res, lreg, rreg); break; + case lir_sub: __ sub_float(res, lreg, rreg); break; + case lir_mul_strictfp: // fall through + case lir_mul: __ mul_float(res, lreg, rreg); break; + case lir_div_strictfp: // fall through + case lir_div: __ div_float(res, lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else if (dest->is_double_fpu()) { + assert(left->is_double_fpu(), "must be"); + assert(right->is_double_fpu(), "must be"); + const FloatRegister res = dest->as_double_reg(); + const FloatRegister lreg = left->as_double_reg(); + const FloatRegister rreg = right->as_double_reg(); + switch (code) { + case lir_add: __ add_double(res, lreg, rreg); break; + case lir_sub: __ sub_double(res, lreg, rreg); break; + case lir_mul_strictfp: // fall through + case lir_mul: __ mul_double(res, lreg, rreg); break; + case lir_div_strictfp: // fall through + case lir_div: __ div_double(res, lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { + switch (code) { + case lir_abs: + __ abs_double(dest->as_double_reg(), value->as_double_reg()); + break; + case lir_sqrt: + __ sqrt_double(dest->as_double_reg(), value->as_double_reg()); + break; + default: + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { + assert(dest->is_register(), "wrong items state"); + assert(left->is_register(), "wrong items state"); + + if (dest->is_single_cpu()) { +#ifdef AARCH64 + assert (dest->type() == T_INT, "unexpected result type"); + assert (left->type() == T_INT, "unexpected left type"); + assert (right->type() == T_INT, "unexpected right type"); +#endif // AARCH64 + + const Register res = dest->as_register(); + const Register lreg = left->as_register(); + + if (right->is_single_cpu()) { + const Register rreg = right->as_register(); + switch (code) { + case lir_logic_and: __ and_32(res, lreg, rreg); break; + case lir_logic_or: __ orr_32(res, lreg, rreg); break; + case lir_logic_xor: __ eor_32(res, lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else { + assert(right->is_constant(), "must be"); + const uint c = (uint)right->as_constant_ptr()->as_jint(); + switch (code) { + case lir_logic_and: __ and_32(res, lreg, c); break; + case lir_logic_or: __ orr_32(res, lreg, c); break; + case lir_logic_xor: __ eor_32(res, lreg, c); break; + default: ShouldNotReachHere(); + } + } + } else { + assert(dest->is_double_cpu(), "should be"); + Register res_lo = dest->as_register_lo(); + +#ifdef AARCH64 + assert ((left->is_single_cpu() && left->is_oop_register()) || left->is_double_cpu(), "should be"); + const Register lreg_lo = left->as_pointer_register(); +#else + assert (dest->type() == T_LONG, "unexpected result type"); + assert (left->type() == T_LONG, "unexpected left type"); + assert (right->type() == T_LONG, "unexpected right type"); + + const Register res_hi = dest->as_register_hi(); + const Register lreg_lo = left->as_register_lo(); + const Register lreg_hi = left->as_register_hi(); +#endif // AARCH64 + + if (right->is_register()) { +#ifdef AARCH64 + assert ((right->is_single_cpu() && right->is_oop_register()) || right->is_double_cpu(), "should be"); + const Register rreg_lo = right->as_pointer_register(); + switch (code) { + case lir_logic_and: __ andr(res_lo, lreg_lo, rreg_lo); break; + case lir_logic_or: __ orr (res_lo, lreg_lo, rreg_lo); break; + case lir_logic_xor: __ eor (res_lo, lreg_lo, rreg_lo); break; + default: ShouldNotReachHere(); + } +#else + const Register rreg_lo = right->as_register_lo(); + const Register rreg_hi = right->as_register_hi(); + if (res_lo == lreg_hi || res_lo == rreg_hi) { + res_lo = Rtemp; // Temp register helps to avoid overlap between result and input + } + switch (code) { + case lir_logic_and: + __ andr(res_lo, lreg_lo, rreg_lo); + __ andr(res_hi, lreg_hi, rreg_hi); + break; + case lir_logic_or: + __ orr(res_lo, lreg_lo, rreg_lo); + __ orr(res_hi, lreg_hi, rreg_hi); + break; + case lir_logic_xor: + __ eor(res_lo, lreg_lo, rreg_lo); + __ eor(res_hi, lreg_hi, rreg_hi); + break; + default: + ShouldNotReachHere(); + } + move_regs(res_lo, dest->as_register_lo()); +#endif // AARCH64 + } else { + assert(right->is_constant(), "must be"); +#ifdef AARCH64 + const julong c = (julong)right->as_constant_ptr()->as_jlong(); + Assembler::LogicalImmediate imm(c, false); + if (imm.is_encoded()) { + switch (code) { + case lir_logic_and: __ andr(res_lo, lreg_lo, imm); break; + case lir_logic_or: __ orr (res_lo, lreg_lo, imm); break; + case lir_logic_xor: __ eor (res_lo, lreg_lo, imm); break; + default: ShouldNotReachHere(); + } + } else { + BAILOUT("64 bit constant cannot be inlined"); + } +#else + const jint c_lo = (jint) right->as_constant_ptr()->as_jlong(); + const jint c_hi = (jint) (right->as_constant_ptr()->as_jlong() >> 32); + // Case for logic_or from do_ClassIDIntrinsic() + if (c_hi == 0 && AsmOperand::is_rotated_imm(c_lo)) { + switch (code) { + case lir_logic_and: + __ andr(res_lo, lreg_lo, c_lo); + __ mov(res_hi, 0); + break; + case lir_logic_or: + __ orr(res_lo, lreg_lo, c_lo); + break; + case lir_logic_xor: + __ eor(res_lo, lreg_lo, c_lo); + break; + default: + ShouldNotReachHere(); + } + } else if (code == lir_logic_and && + c_hi == -1 && + (AsmOperand::is_rotated_imm(c_lo) || + AsmOperand::is_rotated_imm(~c_lo))) { + // Another case which handles logic_and from do_ClassIDIntrinsic() + if (AsmOperand::is_rotated_imm(c_lo)) { + __ andr(res_lo, lreg_lo, c_lo); + } else { + __ bic(res_lo, lreg_lo, ~c_lo); + } + if (res_hi != lreg_hi) { + __ mov(res_hi, lreg_hi); + } + } else { + BAILOUT("64 bit constant cannot be inlined"); + } +#endif // AARCH64 + } + } +} + + +#ifdef AARCH64 + +void LIR_Assembler::long_compare_helper(LIR_Opr opr1, LIR_Opr opr2) { + assert(opr1->is_double_cpu(), "should be"); + Register x = opr1->as_register_lo(); + + if (opr2->is_double_cpu()) { + Register y = opr2->as_register_lo(); + __ cmp(x, y); + + } else { + assert(opr2->is_constant(), "should be"); + assert(opr2->as_constant_ptr()->type() == T_LONG, "long constant expected"); + jlong c = opr2->as_jlong(); + assert(((c >> 31) == 0) || ((c >> 31) == -1), "immediate is out of range"); + if (c >= 0) { + __ cmp(x, (jint)c); + } else { + __ cmn(x, (jint)(-c)); + } + } +} + +#endif // AARCH64 + +void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { + if (opr1->is_single_cpu()) { + if (opr2->is_constant()) { + switch (opr2->as_constant_ptr()->type()) { + case T_INT: { + const jint c = opr2->as_constant_ptr()->as_jint(); + if (Assembler::is_arith_imm_in_range(c)) { + __ cmp_32(opr1->as_register(), c); + } else if (Assembler::is_arith_imm_in_range(-c)) { + __ cmn_32(opr1->as_register(), -c); + } else { + // This can happen when compiling lookupswitch + __ mov_slow(Rtemp, c); + __ cmp_32(opr1->as_register(), Rtemp); + } + break; + } + case T_OBJECT: + assert(opr2->as_constant_ptr()->as_jobject() == NULL, "cannot handle otherwise"); + __ cmp(opr1->as_register(), 0); + break; + default: + ShouldNotReachHere(); + } + } else if (opr2->is_single_cpu()) { + if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY || opr1->type() == T_METADATA || opr1->type() == T_ADDRESS) { + assert(opr2->type() == T_OBJECT || opr2->type() == T_ARRAY || opr2->type() == T_METADATA || opr2->type() == T_ADDRESS, "incompatibe type"); + __ cmp(opr1->as_register(), opr2->as_register()); + } else { + assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY && opr2->type() != T_METADATA && opr2->type() != T_ADDRESS, "incompatibe type"); + __ cmp_32(opr1->as_register(), opr2->as_register()); + } + } else { + ShouldNotReachHere(); + } + } else if (opr1->is_double_cpu()) { +#ifdef AARCH64 + long_compare_helper(opr1, opr2); +#else + Register xlo = opr1->as_register_lo(); + Register xhi = opr1->as_register_hi(); + if (opr2->is_constant() && opr2->as_jlong() == 0) { + assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "cannot handle otherwise"); + __ orrs(Rtemp, xlo, xhi); + } else if (opr2->is_register()) { + Register ylo = opr2->as_register_lo(); + Register yhi = opr2->as_register_hi(); + if (condition == lir_cond_equal || condition == lir_cond_notEqual) { + __ teq(xhi, yhi); + __ teq(xlo, ylo, eq); + } else { + __ subs(xlo, xlo, ylo); + __ sbcs(xhi, xhi, yhi); + } + } else { + ShouldNotReachHere(); + } +#endif // AARCH64 + } else if (opr1->is_single_fpu()) { + if (opr2->is_constant()) { + assert(opr2->as_jfloat() == 0.0f, "cannot handle otherwise"); + __ cmp_zero_float(opr1->as_float_reg()); + } else { + __ cmp_float(opr1->as_float_reg(), opr2->as_float_reg()); + } + } else if (opr1->is_double_fpu()) { + if (opr2->is_constant()) { + assert(opr2->as_jdouble() == 0.0, "cannot handle otherwise"); + __ cmp_zero_double(opr1->as_double_reg()); + } else { + __ cmp_double(opr1->as_double_reg(), opr2->as_double_reg()); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) { + const Register res = dst->as_register(); + if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { + comp_op(lir_cond_unknown, left, right, op); +#ifdef AARCH64 + if (code == lir_ucmp_fd2i) { // unordered is less + __ cset(res, gt); // 1 if '>', else 0 + __ csinv(res, res, ZR, ge); // previous value if '>=', else -1 + } else { + __ cset(res, hi); // 1 if '>' or unordered, else 0 + __ csinv(res, res, ZR, pl); // previous value if '>=' or unordered, else -1 + } +#else + __ fmstat(); + if (code == lir_ucmp_fd2i) { // unordered is less + __ mvn(res, 0, lt); + __ mov(res, 1, ge); + } else { // unordered is greater + __ mov(res, 1, cs); + __ mvn(res, 0, cc); + } + __ mov(res, 0, eq); +#endif // AARCH64 + + } else { + assert(code == lir_cmp_l2i, "must be"); + +#ifdef AARCH64 + long_compare_helper(left, right); + + __ cset(res, gt); // 1 if '>', else 0 + __ csinv(res, res, ZR, ge); // previous value if '>=', else -1 +#else + Label done; + const Register xlo = left->as_register_lo(); + const Register xhi = left->as_register_hi(); + const Register ylo = right->as_register_lo(); + const Register yhi = right->as_register_hi(); + __ cmp(xhi, yhi); + __ mov(res, 1, gt); + __ mvn(res, 0, lt); + __ b(done, ne); + __ subs(res, xlo, ylo); + __ mov(res, 1, hi); + __ mvn(res, 0, lo); + __ bind(done); +#endif // AARCH64 + } +} + + +void LIR_Assembler::align_call(LIR_Code code) { + // Not needed +} + + +void LIR_Assembler::call(LIR_OpJavaCall *op, relocInfo::relocType rtype) { + int ret_addr_offset = __ patchable_call(op->addr(), rtype); + assert(ret_addr_offset == __ offset(), "embedded return address not allowed"); + add_call_info_here(op->info()); +} + + +void LIR_Assembler::ic_call(LIR_OpJavaCall *op) { + bool near_range = __ cache_fully_reachable(); + address oop_address = pc(); + + bool use_movw = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw()); + + // Ricklass may contain something that is not a metadata pointer so + // mov_metadata can't be used + InlinedAddress value((address)Universe::non_oop_word()); + InlinedAddress addr(op->addr()); + if (use_movw) { +#ifdef AARCH64 + ShouldNotReachHere(); +#else + __ movw(Ricklass, ((unsigned int)Universe::non_oop_word()) & 0xffff); + __ movt(Ricklass, ((unsigned int)Universe::non_oop_word()) >> 16); +#endif // AARCH64 + } else { + // No movw/movt, must be load a pc relative value but no + // relocation so no metadata table to load from. + // Use a b instruction rather than a bl, inline constant after the + // branch, use a PC relative ldr to load the constant, arrange for + // the call to return after the constant(s). + __ ldr_literal(Ricklass, value); + } + __ relocate(virtual_call_Relocation::spec(oop_address)); + if (near_range && use_movw) { + __ bl(op->addr()); + } else { + Label call_return; + __ adr(LR, call_return); + if (near_range) { + __ b(op->addr()); + } else { + __ indirect_jump(addr, Rtemp); + __ bind_literal(addr); + } + if (!use_movw) { + __ bind_literal(value); + } + __ bind(call_return); + } + add_call_info(code_offset(), op->info()); +} + + +/* Currently, vtable-dispatch is only enabled for sparc platforms */ +void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { + ShouldNotReachHere(); +} + +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); + address stub = __ start_a_stub(call_stub_size); + if (stub == NULL) { + BAILOUT("static call stub overflow"); + } + + DEBUG_ONLY(int offset = code_offset();) + + InlinedMetadata metadata_literal(NULL); + __ relocate(static_stub_Relocation::spec(call_pc)); + // If not a single instruction, NativeMovConstReg::next_instruction_address() + // must jump over the whole following ldr_literal. + // (See CompiledStaticCall::set_to_interpreted()) +#ifdef ASSERT + address ldr_site = __ pc(); +#endif + __ ldr_literal(Rmethod, metadata_literal); + assert(nativeMovConstReg_at(ldr_site)->next_instruction_address() == __ pc(), "Fix ldr_literal or its parsing"); + bool near_range = __ cache_fully_reachable(); + InlinedAddress dest((address)-1); + if (near_range) { + address branch_site = __ pc(); + __ b(branch_site); // b to self maps to special NativeJump -1 destination + } else { + __ indirect_jump(dest, Rtemp); + } + __ bind_literal(metadata_literal); // includes spec_for_immediate reloc + if (!near_range) { + __ bind_literal(dest); // special NativeJump -1 destination + } + + assert(code_offset() - offset <= call_stub_size, "overflow"); + __ end_a_stub(); +} + +void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { + assert(exceptionOop->as_register() == Rexception_obj, "must match"); + assert(exceptionPC->as_register() == Rexception_pc, "must match"); + info->add_register_oop(exceptionOop); + + Runtime1::StubID handle_id = compilation()->has_fpu_code() ? + Runtime1::handle_exception_id : + Runtime1::handle_exception_nofpu_id; + Label return_address; + __ adr(Rexception_pc, return_address); + __ call(Runtime1::entry_for(handle_id), relocInfo::runtime_call_type); + __ bind(return_address); + add_call_info_here(info); // for exception handler +} + +void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { + assert(exceptionOop->as_register() == Rexception_obj, "must match"); + __ b(_unwind_handler_entry); +} + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { +#ifdef AARCH64 + if (dest->is_single_cpu()) { + Register res = dest->as_register(); + Register x = left->as_register(); + Register y = count->as_register(); + assert (dest->type() == T_INT, "unexpected result type"); + assert (left->type() == T_INT, "unexpected left type"); + + switch (code) { + case lir_shl: __ lslv_w(res, x, y); break; + case lir_shr: __ asrv_w(res, x, y); break; + case lir_ushr: __ lsrv_w(res, x, y); break; + default: ShouldNotReachHere(); + } + } else if (dest->is_double_cpu()) { + Register res = dest->as_register_lo(); + Register x = left->as_register_lo(); + Register y = count->as_register(); + + switch (code) { + case lir_shl: __ lslv(res, x, y); break; + case lir_shr: __ asrv(res, x, y); break; + case lir_ushr: __ lsrv(res, x, y); break; + default: ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +#else + AsmShift shift = lsl; + switch (code) { + case lir_shl: shift = lsl; break; + case lir_shr: shift = asr; break; + case lir_ushr: shift = lsr; break; + default: ShouldNotReachHere(); + } + + if (dest->is_single_cpu()) { + __ andr(Rtemp, count->as_register(), 31); + __ mov(dest->as_register(), AsmOperand(left->as_register(), shift, Rtemp)); + } else if (dest->is_double_cpu()) { + Register dest_lo = dest->as_register_lo(); + Register dest_hi = dest->as_register_hi(); + Register src_lo = left->as_register_lo(); + Register src_hi = left->as_register_hi(); + Register Rcount = count->as_register(); + // Resolve possible register conflicts + if (shift == lsl && dest_hi == src_lo) { + dest_hi = Rtemp; + } else if (shift != lsl && dest_lo == src_hi) { + dest_lo = Rtemp; + } else if (dest_lo == src_lo && dest_hi == src_hi) { + dest_lo = Rtemp; + } else if (dest_lo == Rcount || dest_hi == Rcount) { + Rcount = Rtemp; + } + __ andr(Rcount, count->as_register(), 63); + __ long_shift(dest_lo, dest_hi, src_lo, src_hi, shift, Rcount); + move_regs(dest_lo, dest->as_register_lo()); + move_regs(dest_hi, dest->as_register_hi()); + } else { + ShouldNotReachHere(); + } +#endif // AARCH64 +} + + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { +#ifdef AARCH64 + if (dest->is_single_cpu()) { + assert (dest->type() == T_INT, "unexpected result type"); + assert (left->type() == T_INT, "unexpected left type"); + count &= 31; + if (count != 0) { + switch (code) { + case lir_shl: __ _lsl_w(dest->as_register(), left->as_register(), count); break; + case lir_shr: __ _asr_w(dest->as_register(), left->as_register(), count); break; + case lir_ushr: __ _lsr_w(dest->as_register(), left->as_register(), count); break; + default: ShouldNotReachHere(); + } + } else { + move_regs(left->as_register(), dest->as_register()); + } + } else if (dest->is_double_cpu()) { + count &= 63; + if (count != 0) { + switch (code) { + case lir_shl: __ _lsl(dest->as_register_lo(), left->as_register_lo(), count); break; + case lir_shr: __ _asr(dest->as_register_lo(), left->as_register_lo(), count); break; + case lir_ushr: __ _lsr(dest->as_register_lo(), left->as_register_lo(), count); break; + default: ShouldNotReachHere(); + } + } else { + move_regs(left->as_register_lo(), dest->as_register_lo()); + } + } else { + ShouldNotReachHere(); + } + +#else + AsmShift shift = lsl; + switch (code) { + case lir_shl: shift = lsl; break; + case lir_shr: shift = asr; break; + case lir_ushr: shift = lsr; break; + default: ShouldNotReachHere(); + } + + if (dest->is_single_cpu()) { + count &= 31; + if (count != 0) { + __ mov(dest->as_register(), AsmOperand(left->as_register(), shift, count)); + } else { + move_regs(left->as_register(), dest->as_register()); + } + } else if (dest->is_double_cpu()) { + count &= 63; + if (count != 0) { + Register dest_lo = dest->as_register_lo(); + Register dest_hi = dest->as_register_hi(); + Register src_lo = left->as_register_lo(); + Register src_hi = left->as_register_hi(); + // Resolve possible register conflicts + if (shift == lsl && dest_hi == src_lo) { + dest_hi = Rtemp; + } else if (shift != lsl && dest_lo == src_hi) { + dest_lo = Rtemp; + } + __ long_shift(dest_lo, dest_hi, src_lo, src_hi, shift, count); + move_regs(dest_lo, dest->as_register_lo()); + move_regs(dest_hi, dest->as_register_hi()); + } else { + __ long_move(dest->as_register_lo(), dest->as_register_hi(), + left->as_register_lo(), left->as_register_hi()); + } + } else { + ShouldNotReachHere(); + } +#endif // AARCH64 +} + + +// Saves 4 given registers in reserved argument area. +void LIR_Assembler::save_in_reserved_area(Register r1, Register r2, Register r3, Register r4) { + verify_reserved_argument_area_size(4); +#ifdef AARCH64 + __ stp(r1, r2, Address(SP, 0)); + __ stp(r3, r4, Address(SP, 2*wordSize)); +#else + __ stmia(SP, RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3) | RegisterSet(r4)); +#endif // AARCH64 +} + +// Restores 4 given registers from reserved argument area. +void LIR_Assembler::restore_from_reserved_area(Register r1, Register r2, Register r3, Register r4) { +#ifdef AARCH64 + __ ldp(r1, r2, Address(SP, 0)); + __ ldp(r3, r4, Address(SP, 2*wordSize)); +#else + __ ldmia(SP, RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3) | RegisterSet(r4), no_writeback); +#endif // AARCH64 +} + + +void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { + ciArrayKlass* default_type = op->expected_type(); + Register src = op->src()->as_register(); + Register src_pos = op->src_pos()->as_register(); + Register dst = op->dst()->as_register(); + Register dst_pos = op->dst_pos()->as_register(); + Register length = op->length()->as_register(); + Register tmp = op->tmp()->as_register(); + Register tmp2 = Rtemp; + + assert(src == R0 && src_pos == R1 && dst == R2 && dst_pos == R3, "code assumption"); +#ifdef AARCH64 + assert(length == R4, "code assumption"); +#endif // AARCH64 + + CodeStub* stub = op->stub(); + + int flags = op->flags(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; + if (basic_type == T_ARRAY) basic_type = T_OBJECT; + + // If we don't know anything or it's an object array, just go through the generic arraycopy + if (default_type == NULL) { + + // save arguments, because they will be killed by a runtime call + save_in_reserved_area(R0, R1, R2, R3); + +#ifdef AARCH64 + // save length argument, will be killed by a runtime call + __ raw_push(length, ZR); +#else + // pass length argument on SP[0] + __ str(length, Address(SP, -2*wordSize, pre_indexed)); // 2 words for a proper stack alignment +#endif // AARCH64 + + address copyfunc_addr = StubRoutines::generic_arraycopy(); + if (copyfunc_addr == NULL) { // Use C version if stub was not generated + __ call(CAST_FROM_FN_PTR(address, Runtime1::arraycopy)); + } else { +#ifndef PRODUCT + if (PrintC1Statistics) { + __ inc_counter((address)&Runtime1::_generic_arraycopystub_cnt, tmp, tmp2); + } +#endif // !PRODUCT + // the stub is in the code cache so close enough + __ call(copyfunc_addr, relocInfo::runtime_call_type); + } + +#ifdef AARCH64 + __ raw_pop(length, ZR); +#else + __ add(SP, SP, 2*wordSize); +#endif // AARCH64 + + __ cbz_32(R0, *stub->continuation()); + + if (copyfunc_addr != NULL) { + __ mvn_32(tmp, R0); + restore_from_reserved_area(R0, R1, R2, R3); // load saved arguments in slow case only + __ sub_32(length, length, tmp); + __ add_32(src_pos, src_pos, tmp); + __ add_32(dst_pos, dst_pos, tmp); + } else { + restore_from_reserved_area(R0, R1, R2, R3); // load saved arguments in slow case only + } + + __ b(*stub->entry()); + + __ bind(*stub->continuation()); + return; + } + + assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), + "must be true at this point"); + int elem_size = type2aelembytes(basic_type); + int shift = exact_log2(elem_size); + + // Check for NULL + if (flags & LIR_OpArrayCopy::src_null_check) { + if (flags & LIR_OpArrayCopy::dst_null_check) { + __ cmp(src, 0); + __ cond_cmp(dst, 0, ne); // make one instruction shorter if both checks are needed + __ b(*stub->entry(), eq); + } else { + __ cbz(src, *stub->entry()); + } + } else if (flags & LIR_OpArrayCopy::dst_null_check) { + __ cbz(dst, *stub->entry()); + } + + // If the compiler was not able to prove that exact type of the source or the destination + // of the arraycopy is an array type, check at runtime if the source or the destination is + // an instance type. + if (flags & LIR_OpArrayCopy::type_check) { + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + __ ldr_u32(tmp2, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ mov_slow(tmp, Klass::_lh_neutral_value); + __ cmp_32(tmp2, tmp); + __ b(*stub->entry(), ge); + } + + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + __ ldr_u32(tmp2, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ mov_slow(tmp, Klass::_lh_neutral_value); + __ cmp_32(tmp2, tmp); + __ b(*stub->entry(), ge); + } + } + + // Check if negative + const int all_positive_checks = LIR_OpArrayCopy::src_pos_positive_check | + LIR_OpArrayCopy::dst_pos_positive_check | + LIR_OpArrayCopy::length_positive_check; + switch (flags & all_positive_checks) { + case LIR_OpArrayCopy::src_pos_positive_check: + __ branch_if_negative_32(src_pos, *stub->entry()); + break; + case LIR_OpArrayCopy::dst_pos_positive_check: + __ branch_if_negative_32(dst_pos, *stub->entry()); + break; + case LIR_OpArrayCopy::length_positive_check: + __ branch_if_negative_32(length, *stub->entry()); + break; + case LIR_OpArrayCopy::src_pos_positive_check | LIR_OpArrayCopy::dst_pos_positive_check: + __ branch_if_any_negative_32(src_pos, dst_pos, tmp, *stub->entry()); + break; + case LIR_OpArrayCopy::src_pos_positive_check | LIR_OpArrayCopy::length_positive_check: + __ branch_if_any_negative_32(src_pos, length, tmp, *stub->entry()); + break; + case LIR_OpArrayCopy::dst_pos_positive_check | LIR_OpArrayCopy::length_positive_check: + __ branch_if_any_negative_32(dst_pos, length, tmp, *stub->entry()); + break; + case all_positive_checks: + __ branch_if_any_negative_32(src_pos, dst_pos, length, tmp, *stub->entry()); + break; + default: + assert((flags & all_positive_checks) == 0, "the last option"); + } + + // Range checks + if (flags & LIR_OpArrayCopy::src_range_check) { + __ ldr_s32(tmp2, Address(src, arrayOopDesc::length_offset_in_bytes())); + __ add_32(tmp, src_pos, length); + __ cmp_32(tmp, tmp2); + __ b(*stub->entry(), hi); + } + if (flags & LIR_OpArrayCopy::dst_range_check) { + __ ldr_s32(tmp2, Address(dst, arrayOopDesc::length_offset_in_bytes())); + __ add_32(tmp, dst_pos, length); + __ cmp_32(tmp, tmp2); + __ b(*stub->entry(), hi); + } + + // Check if src and dst are of the same type + if (flags & LIR_OpArrayCopy::type_check) { + // We don't know the array types are compatible + if (basic_type != T_OBJECT) { + // Simple test for basic type arrays + if (UseCompressedClassPointers) { + // We don't need decode because we just need to compare + __ ldr_u32(tmp, Address(src, oopDesc::klass_offset_in_bytes())); + __ ldr_u32(tmp2, Address(dst, oopDesc::klass_offset_in_bytes())); + __ cmp_32(tmp, tmp2); + } else { + __ load_klass(tmp, src); + __ load_klass(tmp2, dst); + __ cmp(tmp, tmp2); + } + __ b(*stub->entry(), ne); + } else { + // For object arrays, if src is a sub class of dst then we can + // safely do the copy. + Label cont, slow; + + address copyfunc_addr = StubRoutines::checkcast_arraycopy(); + + __ load_klass(tmp, src); + __ load_klass(tmp2, dst); + + // We are at a call so all live registers are saved before we + // get here + assert_different_registers(tmp, tmp2, R6, altFP_7_11); + + __ check_klass_subtype_fast_path(tmp, tmp2, R6, altFP_7_11, &cont, copyfunc_addr == NULL ? stub->entry() : &slow, NULL); + + __ mov(R6, R0); + __ mov(altFP_7_11, R1); + __ mov(R0, tmp); + __ mov(R1, tmp2); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); // does not blow any registers except R0, LR and Rtemp + __ cmp_32(R0, 0); + __ mov(R0, R6); + __ mov(R1, altFP_7_11); + + if (copyfunc_addr != NULL) { // use stub if available + // src is not a sub class of dst so we have to do a + // per-element check. + + __ b(cont, ne); + + __ bind(slow); + + int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; + if ((flags & mask) != mask) { + // Check that at least both of them object arrays. + assert(flags & mask, "one of the two should be known to be an object array"); + + if (!(flags & LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + } + int lh_offset = in_bytes(Klass::layout_helper_offset()); + + __ ldr_u32(tmp2, Address(tmp, lh_offset)); + + jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ mov_slow(tmp, objArray_lh); + __ cmp_32(tmp, tmp2); + __ b(*stub->entry(), ne); + } + + save_in_reserved_area(R0, R1, R2, R3); + + Register src_ptr = R0; + Register dst_ptr = R1; + Register len = R2; + Register chk_off = R3; + Register super_k = AARCH64_ONLY(R4) NOT_AARCH64(tmp); + + __ add(src_ptr, src, arrayOopDesc::base_offset_in_bytes(basic_type)); + __ add_ptr_scaled_int32(src_ptr, src_ptr, src_pos, shift); + + __ add(dst_ptr, dst, arrayOopDesc::base_offset_in_bytes(basic_type)); + __ add_ptr_scaled_int32(dst_ptr, dst_ptr, dst_pos, shift); + __ load_klass(tmp, dst); + + int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + +#ifdef AARCH64 + __ raw_push(length, ZR); // Preserve length around *copyfunc_addr call + + __ mov(len, length); + __ ldr(super_k, Address(tmp, ek_offset)); // super_k == R4 == length, so this load cannot be performed earlier + // TODO-AARCH64: check whether it is faster to load super klass early by using tmp and additional mov. + __ ldr_u32(chk_off, Address(super_k, sco_offset)); +#else // AARCH64 + __ ldr(super_k, Address(tmp, ek_offset)); + + __ mov(len, length); + __ ldr_u32(chk_off, Address(super_k, sco_offset)); + __ push(super_k); +#endif // AARCH64 + + __ call(copyfunc_addr, relocInfo::runtime_call_type); + +#ifndef PRODUCT + if (PrintC1Statistics) { + Label failed; + __ cbnz_32(R0, failed); + __ inc_counter((address)&Runtime1::_arraycopy_checkcast_cnt, tmp, tmp2); + __ bind(failed); + } +#endif // PRODUCT + +#ifdef AARCH64 + __ raw_pop(length, ZR); +#else + __ add(SP, SP, wordSize); // Drop super_k argument +#endif // AARCH64 + + __ cbz_32(R0, *stub->continuation()); + __ mvn_32(tmp, R0); + + // load saved arguments in slow case only + restore_from_reserved_area(R0, R1, R2, R3); + + __ sub_32(length, length, tmp); + __ add_32(src_pos, src_pos, tmp); + __ add_32(dst_pos, dst_pos, tmp); + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ inc_counter((address)&Runtime1::_arraycopy_checkcast_attempt_cnt, tmp, tmp2); + } +#endif + + __ b(*stub->entry()); + + __ bind(cont); + } else { + __ b(*stub->entry(), eq); + __ bind(cont); + } + } + } + +#ifndef PRODUCT + if (PrintC1Statistics) { + address counter = Runtime1::arraycopy_count_address(basic_type); + __ inc_counter(counter, tmp, tmp2); + } +#endif // !PRODUCT + + bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; + bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; + const char *name; + address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); + + Register src_ptr = R0; + Register dst_ptr = R1; + Register len = R2; + + __ add(src_ptr, src, arrayOopDesc::base_offset_in_bytes(basic_type)); + __ add_ptr_scaled_int32(src_ptr, src_ptr, src_pos, shift); + + __ add(dst_ptr, dst, arrayOopDesc::base_offset_in_bytes(basic_type)); + __ add_ptr_scaled_int32(dst_ptr, dst_ptr, dst_pos, shift); + + __ mov(len, length); + + __ call(entry, relocInfo::runtime_call_type); + + __ bind(*stub->continuation()); +} + +#ifdef ASSERT + // emit run-time assertion +void LIR_Assembler::emit_assert(LIR_OpAssert* op) { + assert(op->code() == lir_assert, "must be"); + +#ifdef AARCH64 + __ NOT_IMPLEMENTED(); +#else + if (op->in_opr1()->is_valid()) { + assert(op->in_opr2()->is_valid(), "both operands must be valid"); + comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op); + } else { + assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); + assert(op->condition() == lir_cond_always, "no other conditions allowed"); + } + + Label ok; + if (op->condition() != lir_cond_always) { + AsmCondition acond; + switch (op->condition()) { + case lir_cond_equal: acond = eq; break; + case lir_cond_notEqual: acond = ne; break; + case lir_cond_less: acond = lt; break; + case lir_cond_lessEqual: acond = le; break; + case lir_cond_greaterEqual: acond = ge; break; + case lir_cond_greater: acond = gt; break; + case lir_cond_aboveEqual: acond = hs; break; + case lir_cond_belowEqual: acond = ls; break; + default: ShouldNotReachHere(); + } + __ b(ok, acond); + } + if (op->halt()) { + const char* str = __ code_string(op->msg()); + __ stop(str); + } else { + breakpoint(); + } + __ bind(ok); +#endif // AARCH64 +} +#endif // ASSERT + +void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { + fatal("CRC32 intrinsic is not implemented on this platform"); +} + +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_pointer_register(); + Register hdr = op->hdr_opr()->as_pointer_register(); + Register lock = op->lock_opr()->as_pointer_register(); + Register tmp = op->scratch_opr()->is_illegal() ? noreg : + op->scratch_opr()->as_pointer_register(); + + if (!UseFastLocking) { + __ b(*op->stub()->entry()); + } else if (op->code() == lir_lock) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + int null_check_offset = __ lock_object(hdr, obj, lock, tmp, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } + } else if (op->code() == lir_unlock) { + __ unlock_object(hdr, obj, lock, tmp, *op->stub()->entry()); + } else { + ShouldNotReachHere(); + } + __ bind(*op->stub()->continuation()); +} + + +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + ciMethod* callee = op->profiled_callee(); + + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + assert(op->tmp1()->is_register(), "tmp1 must be allocated"); + Register tmp1 = op->tmp1()->as_pointer_register(); + assert_different_registers(mdo, tmp1); + __ mov_metadata(mdo, md->constant_encoding()); + int mdo_offset_bias = 0; + int max_offset = AARCH64_ONLY(4096 << LogBytesPerWord) NOT_AARCH64(4096); + if (md->byte_offset_of_slot(data, CounterData::count_offset()) + data->size_in_bytes() >= max_offset) { + // The offset is large so bias the mdo by the base of the slot so + // that the ldr can use an immediate offset to reference the slots of the data + mdo_offset_bias = md->byte_offset_of_slot(data, CounterData::count_offset()); + __ mov_slow(tmp1, mdo_offset_bias); + __ add(mdo, mdo, tmp1); + } + + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias); + Bytecodes::Code bc = method->java_code_at_bci(bci); + const bool callee_is_static = callee->is_loaded() && callee->is_static(); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) && + !callee_is_static && // required for optimized MH invokes + C1ProfileVirtualCalls) { + + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, tmp1, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, + VirtualCallData::receiver_count_offset(i)) - + mdo_offset_bias); + __ ldr(tmp1, data_addr); + __ add(tmp1, tmp1, DataLayout::counter_increment); + __ str(tmp1, data_addr); + return; + } + } + + // Receiver type not found in profile data; select an empty slot + + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == NULL) { + Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)) - + mdo_offset_bias); + __ mov_metadata(tmp1, known_klass->constant_encoding()); + __ str(tmp1, recv_addr); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - + mdo_offset_bias); + __ ldr(tmp1, data_addr); + __ add(tmp1, tmp1, DataLayout::counter_increment); + __ str(tmp1, data_addr); + return; + } + } + } else { + __ load_klass(recv, recv); + Label update_done; + type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ ldr(tmp1, counter_addr); + __ add(tmp1, tmp1, DataLayout::counter_increment); + __ str(tmp1, counter_addr); + + __ bind(update_done); + } + } else { + // Static call + __ ldr(tmp1, counter_addr); + __ add(tmp1, tmp1, DataLayout::counter_increment); + __ str(tmp1, counter_addr); + } +} + +void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { + fatal("Type profiling not implemented on this platform"); +} + +void LIR_Assembler::emit_delay(LIR_OpDelay*) { + Unimplemented(); +} + + +void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { + Address mon_addr = frame_map()->address_for_monitor_lock(monitor_no); + __ add_slow(dst->as_pointer_register(), mon_addr.base(), mon_addr.disp()); +} + + +void LIR_Assembler::align_backward_branch_target() { + // TODO-AARCH64 review it + // Some ARM processors do better with 8-byte branch target alignment + __ align(8); +} + + +void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) { + + if (left->is_single_cpu()) { + assert (dest->type() == T_INT, "unexpected result type"); + assert (left->type() == T_INT, "unexpected left type"); + __ neg_32(dest->as_register(), left->as_register()); + } else if (left->is_double_cpu()) { +#ifdef AARCH64 + __ neg(dest->as_register_lo(), left->as_register_lo()); +#else + Register dest_lo = dest->as_register_lo(); + Register dest_hi = dest->as_register_hi(); + Register src_lo = left->as_register_lo(); + Register src_hi = left->as_register_hi(); + if (dest_lo == src_hi) { + dest_lo = Rtemp; + } + __ rsbs(dest_lo, src_lo, 0); + __ rsc(dest_hi, src_hi, 0); + move_regs(dest_lo, dest->as_register_lo()); +#endif // AARCH64 + } else if (left->is_single_fpu()) { + __ neg_float(dest->as_float_reg(), left->as_float_reg()); + } else if (left->is_double_fpu()) { + __ neg_double(dest->as_double_reg(), left->as_double_reg()); + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) { + LIR_Address* addr = addr_opr->as_address_ptr(); + if (addr->index()->is_illegal()) { + jint c = addr->disp(); + if (!Assembler::is_arith_imm_in_range(c)) { + BAILOUT("illegal arithmetic operand"); + } + __ add(dest->as_pointer_register(), addr->base()->as_pointer_register(), c); + } else { + assert(addr->disp() == 0, "cannot handle otherwise"); +#ifdef AARCH64 + assert(addr->index()->is_double_cpu(), "should be"); +#endif // AARCH64 + __ add(dest->as_pointer_register(), addr->base()->as_pointer_register(), + AsmOperand(addr->index()->as_pointer_register(), lsl, addr->scale())); + } +} + + +void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { + assert(!tmp->is_valid(), "don't need temporary"); + __ call(dest); + if (info != NULL) { + add_call_info_here(info); + } +} + + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { +#ifdef AARCH64 + Unimplemented(); // TODO-AARCH64: Use stlr/ldar instructions for volatile load/store +#else + assert(src->is_double_cpu() && dest->is_address() || + src->is_address() && dest->is_double_cpu(), + "Simple move_op is called for all other cases"); + + int null_check_offset; + if (dest->is_address()) { + // Store + const LIR_Address* addr = dest->as_address_ptr(); + const Register src_lo = src->as_register_lo(); + const Register src_hi = src->as_register_hi(); + assert(addr->index()->is_illegal() && addr->disp() == 0, "The address is simple already"); + + if (src_lo < src_hi) { + null_check_offset = __ offset(); + __ stmia(addr->base()->as_register(), RegisterSet(src_lo) | RegisterSet(src_hi)); + } else { + assert(src_lo < Rtemp, "Rtemp is higher than any allocatable register"); + __ mov(Rtemp, src_hi); + null_check_offset = __ offset(); + __ stmia(addr->base()->as_register(), RegisterSet(src_lo) | RegisterSet(Rtemp)); + } + } else { + // Load + const LIR_Address* addr = src->as_address_ptr(); + const Register dest_lo = dest->as_register_lo(); + const Register dest_hi = dest->as_register_hi(); + assert(addr->index()->is_illegal() && addr->disp() == 0, "The address is simple already"); + + null_check_offset = __ offset(); + if (dest_lo < dest_hi) { + __ ldmia(addr->base()->as_register(), RegisterSet(dest_lo) | RegisterSet(dest_hi)); + } else { + assert(dest_lo < Rtemp, "Rtemp is higher than any allocatable register"); + __ ldmia(addr->base()->as_register(), RegisterSet(dest_lo) | RegisterSet(Rtemp)); + __ mov(dest_hi, Rtemp); + } + } + + if (info != NULL) { + add_debug_info_for_null_check(null_check_offset, info); + } +#endif // AARCH64 +} + + +void LIR_Assembler::membar() { + __ membar(MacroAssembler::StoreLoad, Rtemp); +} + +void LIR_Assembler::membar_acquire() { + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore), Rtemp); +} + +void LIR_Assembler::membar_release() { + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp); +} + +void LIR_Assembler::membar_loadload() { + __ membar(MacroAssembler::LoadLoad, Rtemp); +} + +void LIR_Assembler::membar_storestore() { + __ membar(MacroAssembler::StoreStore, Rtemp); +} + +void LIR_Assembler::membar_loadstore() { + __ membar(MacroAssembler::LoadStore, Rtemp); +} + +void LIR_Assembler::membar_storeload() { + __ membar(MacroAssembler::StoreLoad, Rtemp); +} + +void LIR_Assembler::on_spin_wait() { + Unimplemented(); +} + +void LIR_Assembler::get_thread(LIR_Opr result_reg) { + // Not used on ARM + Unimplemented(); +} + +void LIR_Assembler::peephole(LIR_List* lir) { +#ifdef AARCH64 + return; // TODO-AARCH64 implement peephole optimizations +#endif + LIR_OpList* inst = lir->instructions_list(); + const int inst_length = inst->length(); + for (int i = 0; i < inst_length; i++) { + LIR_Op* op = inst->at(i); + switch (op->code()) { + case lir_cmp: { + // Replace: + // cmp rX, y + // cmove [EQ] y, z, rX + // with + // cmp rX, y + // cmove [EQ] illegalOpr, z, rX + // + // or + // cmp rX, y + // cmove [NE] z, y, rX + // with + // cmp rX, y + // cmove [NE] z, illegalOpr, rX + // + // moves from illegalOpr should be removed when converting LIR to native assembly + + LIR_Op2* cmp = op->as_Op2(); + assert(cmp != NULL, "cmp LIR instruction is not an op2"); + + if (i + 1 < inst_length) { + LIR_Op2* cmove = inst->at(i + 1)->as_Op2(); + if (cmove != NULL && cmove->code() == lir_cmove) { + LIR_Opr cmove_res = cmove->result_opr(); + bool res_is_op1 = cmove_res == cmp->in_opr1(); + bool res_is_op2 = cmove_res == cmp->in_opr2(); + LIR_Opr cmp_res, cmp_arg; + if (res_is_op1) { + cmp_res = cmp->in_opr1(); + cmp_arg = cmp->in_opr2(); + } else if (res_is_op2) { + cmp_res = cmp->in_opr2(); + cmp_arg = cmp->in_opr1(); + } else { + cmp_res = LIR_OprFact::illegalOpr; + cmp_arg = LIR_OprFact::illegalOpr; + } + + if (cmp_res != LIR_OprFact::illegalOpr) { + LIR_Condition cond = cmove->condition(); + if (cond == lir_cond_equal && cmove->in_opr1() == cmp_arg) { + cmove->set_in_opr1(LIR_OprFact::illegalOpr); + } else if (cond == lir_cond_notEqual && cmove->in_opr2() == cmp_arg) { + cmove->set_in_opr2(LIR_OprFact::illegalOpr); + } + } + } + } + break; + } + + default: + break; + } + } +} + +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) { + Register ptr = src->as_pointer_register(); + + if (code == lir_xchg) { +#ifdef AARCH64 + if (UseCompressedOops && data->is_oop()) { + __ encode_heap_oop(tmp->as_pointer_register(), data->as_register()); + } +#endif // AARCH64 + } else { + assert (!data->is_oop(), "xadd for oops"); + } + +#ifndef AARCH64 + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore), Rtemp); +#endif // !AARCH64 + + Label retry; + __ bind(retry); + + if ((data->type() == T_INT) || (data->is_oop() AARCH64_ONLY(&& UseCompressedOops))) { + Register dst = dest->as_register(); + Register new_val = noreg; +#ifdef AARCH64 + __ ldaxr_w(dst, ptr); +#else + __ ldrex(dst, Address(ptr)); +#endif + if (code == lir_xadd) { + Register tmp_reg = tmp->as_register(); + if (data->is_constant()) { + assert_different_registers(dst, ptr, tmp_reg); + __ add_32(tmp_reg, dst, data->as_constant_ptr()->as_jint()); + } else { + assert_different_registers(dst, ptr, tmp_reg, data->as_register()); + __ add_32(tmp_reg, dst, data->as_register()); + } + new_val = tmp_reg; + } else { + if (UseCompressedOops && data->is_oop()) { + new_val = tmp->as_pointer_register(); + } else { + new_val = data->as_register(); + } + assert_different_registers(dst, ptr, new_val); + } +#ifdef AARCH64 + __ stlxr_w(Rtemp, new_val, ptr); +#else + __ strex(Rtemp, new_val, Address(ptr)); +#endif // AARCH64 + +#ifdef AARCH64 + } else if ((data->type() == T_LONG) || (data->is_oop() && !UseCompressedOops)) { + Register dst = dest->as_pointer_register(); + Register new_val = noreg; + __ ldaxr(dst, ptr); + if (code == lir_xadd) { + Register tmp_reg = tmp->as_pointer_register(); + if (data->is_constant()) { + assert_different_registers(dst, ptr, tmp_reg); + jlong c = data->as_constant_ptr()->as_jlong(); + assert((jlong)((jint)c) == c, "overflow"); + __ add(tmp_reg, dst, (jint)c); + } else { + assert_different_registers(dst, ptr, tmp_reg, data->as_pointer_register()); + __ add(tmp_reg, dst, data->as_pointer_register()); + } + new_val = tmp_reg; + } else { + new_val = data->as_pointer_register(); + assert_different_registers(dst, ptr, new_val); + } + __ stlxr(Rtemp, new_val, ptr); +#else + } else if (data->type() == T_LONG) { + Register dst_lo = dest->as_register_lo(); + Register new_val_lo = noreg; + Register dst_hi = dest->as_register_hi(); + + assert(dst_hi->encoding() == dst_lo->encoding() + 1, "non aligned register pair"); + assert((dst_lo->encoding() & 0x1) == 0, "misaligned register pair"); + + __ bind(retry); + __ ldrexd(dst_lo, Address(ptr)); + if (code == lir_xadd) { + Register tmp_lo = tmp->as_register_lo(); + Register tmp_hi = tmp->as_register_hi(); + + assert(tmp_hi->encoding() == tmp_lo->encoding() + 1, "non aligned register pair"); + assert((tmp_lo->encoding() & 0x1) == 0, "misaligned register pair"); + + if (data->is_constant()) { + jlong c = data->as_constant_ptr()->as_jlong(); + assert((jlong)((jint)c) == c, "overflow"); + assert_different_registers(dst_lo, dst_hi, ptr, tmp_lo, tmp_hi); + __ adds(tmp_lo, dst_lo, (jint)c); + __ adc(tmp_hi, dst_hi, 0); + } else { + Register new_val_lo = data->as_register_lo(); + Register new_val_hi = data->as_register_hi(); + __ adds(tmp_lo, dst_lo, new_val_lo); + __ adc(tmp_hi, dst_hi, new_val_hi); + assert_different_registers(dst_lo, dst_hi, ptr, tmp_lo, tmp_hi, new_val_lo, new_val_hi); + } + new_val_lo = tmp_lo; + } else { + new_val_lo = data->as_register_lo(); + Register new_val_hi = data->as_register_hi(); + + assert_different_registers(dst_lo, dst_hi, ptr, new_val_lo, new_val_hi); + assert(new_val_hi->encoding() == new_val_lo->encoding() + 1, "non aligned register pair"); + assert((new_val_lo->encoding() & 0x1) == 0, "misaligned register pair"); + } + __ strexd(Rtemp, new_val_lo, Address(ptr)); +#endif // AARCH64 + } else { + ShouldNotReachHere(); + } + + __ cbnz_32(Rtemp, retry); + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp); + +#ifdef AARCH64 + if (UseCompressedOops && data->is_oop()) { + __ decode_heap_oop(dest->as_register()); + } +#endif // AARCH64 +} + +int LIR_Assembler::exception_handler_size = -1; + +#undef __ --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_LIRAssembler_arm.hpp 2016-12-02 11:18:29.518175368 -0500 @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_C1_LIRASSEMBLER_ARM_HPP +#define CPU_ARM_VM_C1_LIRASSEMBLER_ARM_HPP + + private: + + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, int mdo_offset_bias, + ciMethodData *md, ciProfileData *data, + Register recv, Register tmp1, Label* update_done); + // Setup pointers to MDO, MDO slot, also compute offset bias to access the slot. + void setup_md_access(ciMethod* method, int bci, + ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias); + + void typecheck_profile_helper1(ciMethod* method, int bci, + ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias, + Register obj, Register mdo, Register data_val, Label* obj_is_null); + + void typecheck_profile_helper2(ciMethodData* md, ciProfileData* data, int mdo_offset_bias, + Register mdo, Register recv, Register value, Register tmp1, + Label* profile_cast_success, Label* profile_cast_failure, + Label* success, Label* failure); + +#ifdef AARCH64 + void long_compare_helper(LIR_Opr opr1, LIR_Opr opr2); +#endif // AARCH64 + + // Saves 4 given registers in reserved argument area. + void save_in_reserved_area(Register r1, Register r2, Register r3, Register r4); + + // Restores 4 given registers from reserved argument area. + void restore_from_reserved_area(Register r1, Register r2, Register r3, Register r4); + + public: + + enum { + call_stub_size = AARCH64_ONLY(32) NOT_AARCH64(16), + deopt_handler_size = AARCH64_ONLY(32) NOT_AARCH64(16) + }; + + static int exception_handler_size; + + void verify_reserved_argument_area_size(int args_count) PRODUCT_RETURN; + + void store_parameter(jint c, int offset_from_sp_in_words); + void store_parameter(Metadata* m, int offset_from_sp_in_words); + +#endif // CPU_ARM_VM_C1_LIRASSEMBLER_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_LIRGenerator_arm.cpp 2016-12-02 11:18:36.538573496 -0500 @@ -0,0 +1,1767 @@ +/* + * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_LIRGenerator.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArray.hpp" +#include "ci/ciObjArrayKlass.hpp" +#include "ci/ciTypeArrayKlass.hpp" +#include "gc/shared/cardTableModRefBS.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_arm.inline.hpp" + +#ifdef ASSERT +#define __ gen()->lir(__FILE__, __LINE__)-> +#else +#define __ gen()->lir()-> +#endif + +void LIRItem::load_byte_item() { + load_item(); +} + +void LIRItem::load_nonconstant() { + LIR_Opr r = value()->operand(); + if (_gen->can_inline_as_constant(value())) { + if (!r->is_constant()) { + r = LIR_OprFact::value_type(value()->type()); + } + _result = r; + } else { + load_item(); + } +} + +//-------------------------------------------------------------- +// LIRGenerator +//-------------------------------------------------------------- + + +LIR_Opr LIRGenerator::exceptionOopOpr() { + return FrameMap::Exception_oop_opr; +} + +LIR_Opr LIRGenerator::exceptionPcOpr() { + return FrameMap::Exception_pc_opr; +} + +LIR_Opr LIRGenerator::syncLockOpr() { + return new_register(T_INT); +} + +LIR_Opr LIRGenerator::syncTempOpr() { + return new_register(T_OBJECT); +} + +LIR_Opr LIRGenerator::getThreadTemp() { + return LIR_OprFact::illegalOpr; +} + +LIR_Opr LIRGenerator::atomicLockOpr() { + return LIR_OprFact::illegalOpr; +} + +LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { + LIR_Opr opr; + switch (type->tag()) { + case intTag: opr = FrameMap::Int_result_opr; break; + case objectTag: opr = FrameMap::Object_result_opr; break; + case longTag: opr = FrameMap::Long_result_opr; break; + case floatTag: opr = FrameMap::Float_result_opr; break; + case doubleTag: opr = FrameMap::Double_result_opr; break; + case addressTag: + default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; + } + assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); + return opr; +} + + +LIR_Opr LIRGenerator::rlock_byte(BasicType type) { + return new_register(T_INT); +} + + +//--------- loading items into registers -------------------------------- + + +bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { +#ifdef AARCH64 + if (v->type()->as_IntConstant() != NULL) { + return v->type()->as_IntConstant()->value() == 0; + } else if (v->type()->as_LongConstant() != NULL) { + return v->type()->as_LongConstant()->value() == 0; + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else if (v->type()->as_FloatConstant() != NULL) { + return jint_cast(v->type()->as_FloatConstant()->value()) == 0; + } else if (v->type()->as_DoubleConstant() != NULL) { + return jlong_cast(v->type()->as_DoubleConstant()->value()) == 0; + } +#endif // AARCH64 + return false; +} + + +bool LIRGenerator::can_inline_as_constant(Value v) const { + if (v->type()->as_IntConstant() != NULL) { + return Assembler::is_arith_imm_in_range(v->type()->as_IntConstant()->value()); + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); +#ifdef AARCH64 + } else if (v->type()->as_LongConstant() != NULL) { + return Assembler::is_arith_imm_in_range(v->type()->as_LongConstant()->value()); +#else + } else if (v->type()->as_FloatConstant() != NULL) { + return v->type()->as_FloatConstant()->value() == 0.0f; + } else if (v->type()->as_DoubleConstant() != NULL) { + return v->type()->as_DoubleConstant()->value() == 0.0; +#endif // AARCH64 + } + return false; +} + + +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { + ShouldNotCallThis(); // Not used on ARM + return false; +} + + +#ifdef AARCH64 + +static bool can_inline_as_constant_in_cmp(Value v) { + jlong constant; + if (v->type()->as_IntConstant() != NULL) { + constant = v->type()->as_IntConstant()->value(); + } else if (v->type()->as_LongConstant() != NULL) { + constant = v->type()->as_LongConstant()->value(); + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else if (v->type()->as_FloatConstant() != NULL) { + return v->type()->as_FloatConstant()->value() == 0.0f; + } else if (v->type()->as_DoubleConstant() != NULL) { + return v->type()->as_DoubleConstant()->value() == 0.0; + } else { + return false; + } + + return Assembler::is_arith_imm_in_range(constant) || Assembler::is_arith_imm_in_range(-constant); +} + + +static bool can_inline_as_constant_in_logic(Value v) { + if (v->type()->as_IntConstant() != NULL) { + return Assembler::LogicalImmediate(v->type()->as_IntConstant()->value(), true).is_encoded(); + } else if (v->type()->as_LongConstant() != NULL) { + return Assembler::LogicalImmediate(v->type()->as_LongConstant()->value(), false).is_encoded(); + } + return false; +} + + +#endif // AARCH64 + + +LIR_Opr LIRGenerator::safepoint_poll_register() { + return LIR_OprFact::illegalOpr; +} + + +static LIR_Opr make_constant(BasicType type, jlong c) { + switch (type) { + case T_ADDRESS: + case T_OBJECT: return LIR_OprFact::intptrConst(c); + case T_LONG: return LIR_OprFact::longConst(c); + case T_INT: return LIR_OprFact::intConst(c); + default: ShouldNotReachHere(); + return LIR_OprFact::intConst(-1); + } +} + +#ifdef AARCH64 + +void LIRGenerator::add_constant(LIR_Opr src, jlong c, LIR_Opr dest) { + if (c == 0) { + __ move(src, dest); + return; + } + + BasicType type = src->type(); + bool is_neg = (c < 0); + c = ABS(c); + + if ((c >> 24) == 0) { + for (int shift = 0; shift <= 12; shift += 12) { + int part = ((int)c) & (right_n_bits(12) << shift); + if (part != 0) { + if (is_neg) { + __ sub(src, make_constant(type, part), dest); + } else { + __ add(src, make_constant(type, part), dest); + } + src = dest; + } + } + } else { + __ move(make_constant(type, c), dest); + if (is_neg) { + __ sub(src, dest, dest); + } else { + __ add(src, dest, dest); + } + } +} + +#endif // AARCH64 + + +void LIRGenerator::add_large_constant(LIR_Opr src, int c, LIR_Opr dest) { + assert(c != 0, "must be"); +#ifdef AARCH64 + add_constant(src, c, dest); +#else + // Find first non-zero bit + int shift = 0; + while ((c & (3 << shift)) == 0) { + shift += 2; + } + // Add the least significant part of the constant + int mask = 0xff << shift; + __ add(src, LIR_OprFact::intConst(c & mask), dest); + // Add up to 3 other parts of the constant; + // each of them can be represented as rotated_imm + if (c & (mask << 8)) { + __ add(dest, LIR_OprFact::intConst(c & (mask << 8)), dest); + } + if (c & (mask << 16)) { + __ add(dest, LIR_OprFact::intConst(c & (mask << 16)), dest); + } + if (c & (mask << 24)) { + __ add(dest, LIR_OprFact::intConst(c & (mask << 24)), dest); + } +#endif // AARCH64 +} + +static LIR_Address* make_address(LIR_Opr base, LIR_Opr index, LIR_Address::Scale scale, BasicType type) { + return new LIR_Address(base, index, scale, 0, type); +} + +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + assert(base->is_register(), "must be"); + + if (index->is_constant()) { + disp += index->as_constant_ptr()->as_jint() << shift; + index = LIR_OprFact::illegalOpr; + } + +#ifndef AARCH64 + if (base->type() == T_LONG) { + LIR_Opr tmp = new_register(T_INT); + __ convert(Bytecodes::_l2i, base, tmp); + base = tmp; + } + if (index != LIR_OprFact::illegalOpr && index->type() == T_LONG) { + LIR_Opr tmp = new_register(T_INT); + __ convert(Bytecodes::_l2i, index, tmp); + index = tmp; + } + // At this point base and index should be all ints and not constants + assert(base->is_single_cpu() && !base->is_constant(), "base should be an non-constant int"); + assert(index->is_illegal() || (index->type() == T_INT && !index->is_constant()), "index should be an non-constant int"); +#endif + + int max_disp; + bool disp_is_in_range; + bool embedded_shift; + +#ifdef AARCH64 + int align = exact_log2(type2aelembytes(type, true)); + assert((disp & right_n_bits(align)) == 0, "displacement is not aligned"); + assert(shift == 0 || shift == align, "shift should be zero or equal to embedded align"); + max_disp = (1 << 12) << align; + + if (disp >= 0) { + disp_is_in_range = Assembler::is_unsigned_imm_in_range(disp, 12, align); + } else { + disp_is_in_range = Assembler::is_imm_in_range(disp, 9, 0); + } + + embedded_shift = true; +#else + switch (type) { + case T_BYTE: + case T_SHORT: + case T_CHAR: + max_disp = 256; // ldrh, ldrsb encoding has 8-bit offset + embedded_shift = false; + break; + case T_FLOAT: + case T_DOUBLE: + max_disp = 1024; // flds, fldd have 8-bit offset multiplied by 4 + embedded_shift = false; + break; + case T_LONG: + max_disp = 4096; + embedded_shift = false; + break; + default: + max_disp = 4096; // ldr, ldrb allow 12-bit offset + embedded_shift = true; + } + + disp_is_in_range = (-max_disp < disp && disp < max_disp); +#endif // !AARCH64 + + if (index->is_register()) { + LIR_Opr tmp = new_pointer_register(); + if (!disp_is_in_range) { + add_large_constant(base, disp, tmp); + base = tmp; + disp = 0; + } + LIR_Address* addr = make_address(base, index, (LIR_Address::Scale)shift, type); + if (disp == 0 && embedded_shift) { + // can use ldr/str instruction with register index + return addr; + } else { + LIR_Opr tmp = new_pointer_register(); + __ add(base, LIR_OprFact::address(addr), tmp); // add with shifted/extended register + return new LIR_Address(tmp, disp, type); + } + } + + // If the displacement is too large to be inlined into LDR instruction, + // generate large constant with additional sequence of ADD instructions + int excess_disp = disp & ~(max_disp - 1); + if (excess_disp != 0) { + LIR_Opr tmp = new_pointer_register(); + add_large_constant(base, excess_disp, tmp); + base = tmp; + } + return new LIR_Address(base, disp & (max_disp - 1), type); +} + + +LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, + BasicType type, bool needs_card_mark) { + int base_offset = arrayOopDesc::base_offset_in_bytes(type); + int elem_size = type2aelembytes(type); + + if (index_opr->is_constant()) { + int offset = base_offset + index_opr->as_constant_ptr()->as_jint() * elem_size; + if (needs_card_mark) { + LIR_Opr base_opr = new_pointer_register(); + add_large_constant(array_opr, offset, base_opr); + return new LIR_Address(base_opr, (intx)0, type); + } else { + return generate_address(array_opr, offset, type); + } + } else { + assert(index_opr->is_register(), "must be"); + int scale = exact_log2(elem_size); + if (needs_card_mark) { + LIR_Opr base_opr = new_pointer_register(); + LIR_Address* addr = make_address(base_opr, index_opr, (LIR_Address::Scale)scale, type); + __ add(array_opr, LIR_OprFact::intptrConst(base_offset), base_opr); + __ add(base_opr, LIR_OprFact::address(addr), base_opr); // add with shifted/extended register + return new LIR_Address(base_opr, type); + } else { + return generate_address(array_opr, index_opr, scale, base_offset, type); + } + } +} + + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + assert(type == T_LONG || type == T_INT, "should be"); + LIR_Opr r = make_constant(type, x); +#ifdef AARCH64 + bool imm_in_range = Assembler::LogicalImmediate(x, type == T_INT).is_encoded(); +#else + bool imm_in_range = AsmOperand::is_rotated_imm(x); +#endif // AARCH64 + if (!imm_in_range) { + LIR_Opr tmp = new_register(type); + __ move(r, tmp); + return tmp; + } + return r; +} + + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { + LIR_Opr pointer = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(counter), pointer); + LIR_Address* addr = new LIR_Address(pointer, type); + increment_counter(addr, step); +} + + +void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + LIR_Opr temp = new_register(addr->type()); + __ move(addr, temp); + __ add(temp, make_constant(addr->type(), step), temp); + __ move(temp, addr); +} + + +void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { + __ load(new LIR_Address(base, disp, T_INT), FrameMap::LR_opr, info); + __ cmp(condition, FrameMap::LR_opr, c); +} + + +void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { + __ load(new LIR_Address(base, disp, type), FrameMap::LR_opr, info); + __ cmp(condition, reg, FrameMap::LR_opr); +} + + +bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { + assert(left != result, "should be different registers"); + if (is_power_of_2(c + 1)) { +#ifdef AARCH64 + __ shift_left(left, log2_intptr(c + 1), result); + __ sub(result, left, result); +#else + LIR_Address::Scale scale = (LIR_Address::Scale) log2_intptr(c + 1); + LIR_Address* addr = new LIR_Address(left, left, scale, 0, T_INT); + __ sub(LIR_OprFact::address(addr), left, result); // rsb with shifted register +#endif // AARCH64 + return true; + } else if (is_power_of_2(c - 1)) { + LIR_Address::Scale scale = (LIR_Address::Scale) log2_intptr(c - 1); + LIR_Address* addr = new LIR_Address(left, left, scale, 0, T_INT); + __ add(left, LIR_OprFact::address(addr), result); // add with shifted register + return true; + } + return false; +} + + +void LIRGenerator::store_stack_parameter(LIR_Opr item, ByteSize offset_from_sp) { + assert(item->type() == T_INT, "other types are not expected"); + __ store(item, new LIR_Address(FrameMap::SP_opr, in_bytes(offset_from_sp), item->type())); +} + +void LIRGenerator::set_card(LIR_Opr value, LIR_Address* card_addr) { + assert(CardTableModRefBS::dirty_card_val() == 0, + "Cannot use ZR register (aarch64) or the register containing the card table base address directly (aarch32) otherwise"); +#ifdef AARCH64 + // AARCH64 has a register that is constant zero. We can use that one to set the + // value in the card table to dirty. + __ move(FrameMap::ZR_opr, card_addr); +#else // AARCH64 + CardTableModRefBS* ct = (CardTableModRefBS*)_bs; + if(((intx)ct->byte_map_base & 0xff) == 0) { + // If the card table base address is aligned to 256 bytes, we can use the register + // that contains the card_table_base_address. + __ move(value, card_addr); + } else { + // Otherwise we need to create a register containing that value. + LIR_Opr tmp_zero = new_register(T_INT); + __ move(LIR_OprFact::intConst(CardTableModRefBS::dirty_card_val()), tmp_zero); + __ move(tmp_zero, card_addr); + } +#endif // AARCH64 +} + +void LIRGenerator::CardTableModRef_post_barrier_helper(LIR_OprDesc* addr, LIR_Const* card_table_base) { + assert(addr->is_register(), "must be a register at this point"); + + LIR_Opr tmp = FrameMap::LR_ptr_opr; + + // TODO-AARCH64: check performance + bool load_card_table_base_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw()); + if (load_card_table_base_const) { + __ move((LIR_Opr)card_table_base, tmp); + } else { + __ move(new LIR_Address(FrameMap::Rthread_opr, in_bytes(JavaThread::card_table_base_offset()), T_ADDRESS), tmp); + } + +#ifdef AARCH64 + LIR_Address* shifted_reg_operand = new LIR_Address(tmp, addr, (LIR_Address::Scale) -CardTableModRefBS::card_shift, 0, T_BYTE); + LIR_Opr tmp2 = tmp; + __ add(tmp, LIR_OprFact::address(shifted_reg_operand), tmp2); // tmp2 = tmp + (addr >> CardTableModRefBS::card_shift) + LIR_Address* card_addr = new LIR_Address(tmp2, T_BYTE); +#else + // Use unsigned type T_BOOLEAN here rather than (signed) T_BYTE since signed load + // byte instruction does not support the addressing mode we need. + LIR_Address* card_addr = new LIR_Address(tmp, addr, (LIR_Address::Scale) -CardTableModRefBS::card_shift, 0, T_BOOLEAN); +#endif + if (UseCondCardMark) { + if (UseConcMarkSweepGC) { + __ membar_storeload(); + } + LIR_Opr cur_value = new_register(T_INT); + __ move(card_addr, cur_value); + + LabelObj* L_already_dirty = new LabelObj(); + __ cmp(lir_cond_equal, cur_value, LIR_OprFact::intConst(CardTableModRefBS::dirty_card_val())); + __ branch(lir_cond_equal, T_BYTE, L_already_dirty->label()); + set_card(tmp, card_addr); + __ branch_destination(L_already_dirty->label()); + } else { + if (UseConcMarkSweepGC && CMSPrecleaningEnabled) { + __ membar_storestore(); + } + set_card(tmp, card_addr); + } +} + +//---------------------------------------------------------------------- +// visitor functions +//---------------------------------------------------------------------- + + +void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { + assert(x->is_pinned(),""); + bool needs_range_check = x->compute_needs_range_check(); + bool use_length = x->length() != NULL; + bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT; + bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL || + !get_jobject_constant(x->value())->is_null_object() || + x->should_profile()); + + LIRItem array(x->array(), this); + LIRItem index(x->index(), this); + LIRItem value(x->value(), this); + LIRItem length(this); + + array.load_item(); + index.load_nonconstant(); + + if (use_length && needs_range_check) { + length.set_instruction(x->length()); + length.load_item(); + } + if (needs_store_check || x->check_boolean()) { + value.load_item(); + } else { + value.load_for_store(x->elt_type()); + } + + set_no_result(x); + + // the CodeEmitInfo must be duplicated for each different + // LIR-instruction because spilling can occur anywhere between two + // instructions and so the debug information must be different + CodeEmitInfo* range_check_info = state_for(x); + CodeEmitInfo* null_check_info = NULL; + if (x->needs_null_check()) { + null_check_info = new CodeEmitInfo(range_check_info); + } + + // emit array address setup early so it schedules better + LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store); + + if (GenerateRangeChecks && needs_range_check) { + if (use_length) { + __ cmp(lir_cond_belowEqual, length.result(), index.result()); + __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result())); + } else { + array_range_check(array.result(), index.result(), null_check_info, range_check_info); + // range_check also does the null check + null_check_info = NULL; + } + } + + if (GenerateArrayStoreCheck && needs_store_check) { + LIR_Opr tmp1 = FrameMap::R0_oop_opr; + LIR_Opr tmp2 = FrameMap::R1_oop_opr; + CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info); + __ store_check(value.result(), array.result(), tmp1, tmp2, + LIR_OprFact::illegalOpr, store_check_info, + x->profiled_method(), x->profiled_bci()); + } + +#if INCLUDE_ALL_GCS + if (obj_store) { + // Needs GC write barriers. + pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + } +#endif // INCLUDE_ALL_GCS + + LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info); + __ move(result, array_addr, null_check_info); + if (obj_store) { + post_barrier(LIR_OprFact::address(array_addr), value.result()); + } +} + + +void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + assert(x->is_pinned(),""); + LIRItem obj(x->obj(), this); + obj.load_item(); + set_no_result(x); + + LIR_Opr lock = new_pointer_register(); + LIR_Opr hdr = new_pointer_register(); + + // Need a scratch register for biased locking on arm + LIR_Opr scratch = LIR_OprFact::illegalOpr; + if(UseBiasedLocking) { + scratch = new_pointer_register(); + } else { + scratch = atomicLockOpr(); + } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { + info_for_exception = state_for(x); + } + + CodeEmitInfo* info = state_for(x, x->state(), true); + monitor_enter(obj.result(), lock, hdr, scratch, + x->monitor_no(), info_for_exception, info); +} + + +void LIRGenerator::do_MonitorExit(MonitorExit* x) { + assert(x->is_pinned(),""); + LIRItem obj(x->obj(), this); + obj.dont_load_item(); + set_no_result(x); + + LIR_Opr obj_temp = new_pointer_register(); + LIR_Opr lock = new_pointer_register(); + LIR_Opr hdr = new_pointer_register(); + + monitor_exit(obj_temp, lock, hdr, atomicLockOpr(), x->monitor_no()); +} + + +// _ineg, _lneg, _fneg, _dneg +void LIRGenerator::do_NegateOp(NegateOp* x) { +#ifdef __SOFTFP__ + address runtime_func = NULL; + ValueTag tag = x->type()->tag(); + if (tag == floatTag) { + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::fneg); + } else if (tag == doubleTag) { + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dneg); + } + if (runtime_func != NULL) { + set_result(x, call_runtime(x->x(), runtime_func, x->type(), NULL)); + return; + } +#endif // __SOFTFP__ + LIRItem value(x->x(), this); + value.load_item(); + LIR_Opr reg = rlock_result(x); + __ negate(value.result(), reg); +} + + +// for _fadd, _fmul, _fsub, _fdiv, _frem +// _dadd, _dmul, _dsub, _ddiv, _drem +void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { + address runtime_func; + switch (x->op()) { + case Bytecodes::_frem: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::frem); + break; + case Bytecodes::_drem: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::drem); + break; +#ifdef __SOFTFP__ + // Call function compiled with -msoft-float. + + // __aeabi_XXXX_glibc: Imported code from glibc soft-fp bundle for calculation accuracy improvement. See CR 6757269. + + case Bytecodes::_fadd: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_fadd_glibc); + break; + case Bytecodes::_fmul: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_fmul); + break; + case Bytecodes::_fsub: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_fsub_glibc); + break; + case Bytecodes::_fdiv: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_fdiv); + break; + case Bytecodes::_dadd: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_dadd_glibc); + break; + case Bytecodes::_dmul: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_dmul); + break; + case Bytecodes::_dsub: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_dsub_glibc); + break; + case Bytecodes::_ddiv: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_ddiv); + break; + default: + ShouldNotReachHere(); +#else // __SOFTFP__ + default: { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + left.load_item(); + right.load_item(); + rlock_result(x); + arithmetic_op_fpu(x->op(), x->operand(), left.result(), right.result(), x->is_strictfp()); + return; + } +#endif // __SOFTFP__ + } + + LIR_Opr result = call_runtime(x->x(), x->y(), runtime_func, x->type(), NULL); + set_result(x, result); +} + + +void LIRGenerator::make_div_by_zero_check(LIR_Opr right_arg, BasicType type, CodeEmitInfo* info) { + assert(right_arg->is_register(), "must be"); + __ cmp(lir_cond_equal, right_arg, make_constant(type, 0)); + __ branch(lir_cond_equal, type, new DivByZeroStub(info)); +} + + +// for _ladd, _lmul, _lsub, _ldiv, _lrem +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + CodeEmitInfo* info = NULL; + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { + info = state_for(x); + } + +#ifdef AARCH64 + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + + // Test if instr is commutative and if we should swap + if (x->is_commutative() && left.is_constant()) { + left_arg = &right; + right_arg = &left; + } + + left_arg->load_item(); + switch (x->op()) { + case Bytecodes::_ldiv: + right_arg->load_item(); + make_div_by_zero_check(right_arg->result(), T_LONG, info); + __ idiv(left_arg->result(), right_arg->result(), rlock_result(x), LIR_OprFact::illegalOpr, NULL); + break; + + case Bytecodes::_lrem: { + right_arg->load_item(); + make_div_by_zero_check(right_arg->result(), T_LONG, info); + // a % b is implemented with 2 instructions: + // tmp = a/b (sdiv) + // res = a - b*tmp (msub) + LIR_Opr tmp = FrameMap::as_long_opr(Rtemp); + __ irem(left_arg->result(), right_arg->result(), rlock_result(x), tmp, NULL); + break; + } + + case Bytecodes::_lmul: + if (right_arg->is_constant() && is_power_of_2_long(right_arg->get_jlong_constant())) { + right_arg->dont_load_item(); + __ shift_left(left_arg->result(), exact_log2_long(right_arg->get_jlong_constant()), rlock_result(x)); + } else { + right_arg->load_item(); + __ mul(left_arg->result(), right_arg->result(), rlock_result(x)); + } + break; + + case Bytecodes::_ladd: + case Bytecodes::_lsub: + if (right_arg->is_constant()) { + jlong c = right_arg->get_jlong_constant(); + add_constant(left_arg->result(), (x->op() == Bytecodes::_ladd) ? c : -c, rlock_result(x)); + } else { + right_arg->load_item(); + arithmetic_op_long(x->op(), rlock_result(x), left_arg->result(), right_arg->result(), NULL); + } + break; + + default: + ShouldNotReachHere(); + } +#else + switch (x->op()) { + case Bytecodes::_ldiv: + case Bytecodes::_lrem: { + LIRItem right(x->y(), this); + right.load_item(); + make_div_by_zero_check(right.result(), T_LONG, info); + } + // Fall through + case Bytecodes::_lmul: { + address entry; + switch (x->op()) { + case Bytecodes::_lrem: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::lrem); + break; + case Bytecodes::_ldiv: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::ldiv); + break; + case Bytecodes::_lmul: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::lmul); + break; + default: + ShouldNotReachHere(); + } + LIR_Opr result = call_runtime(x->y(), x->x(), entry, x->type(), NULL); + set_result(x, result); + break; + } + case Bytecodes::_ladd: + case Bytecodes::_lsub: { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + left.load_item(); + right.load_item(); + rlock_result(x); + arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); + break; + } + default: + ShouldNotReachHere(); + } +#endif // AARCH64 +} + + +// for: _iadd, _imul, _isub, _idiv, _irem +void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { + bool is_div_rem = x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem; + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + + // Test if instr is commutative and if we should swap + if (x->is_commutative() && left.is_constant()) { + left_arg = &right; + right_arg = &left; + } + + if (is_div_rem) { + CodeEmitInfo* info = state_for(x); + if (x->op() == Bytecodes::_idiv && right_arg->is_constant() && is_power_of_2(right_arg->get_jint_constant())) { + left_arg->load_item(); + right_arg->dont_load_item(); + LIR_Opr tmp = LIR_OprFact::illegalOpr; + LIR_Opr result = rlock_result(x); + __ idiv(left_arg->result(), right_arg->result(), result, tmp, info); + } else { +#ifdef AARCH64 + left_arg->load_item(); + right_arg->load_item(); + make_div_by_zero_check(right_arg->result(), T_INT, info); + if (x->op() == Bytecodes::_idiv) { + __ idiv(left_arg->result(), right_arg->result(), rlock_result(x), LIR_OprFact::illegalOpr, NULL); + } else { + // a % b is implemented with 2 instructions: + // tmp = a/b (sdiv) + // res = a - b*tmp (msub) + LIR_Opr tmp = FrameMap::as_opr(Rtemp); + __ irem(left_arg->result(), right_arg->result(), rlock_result(x), tmp, NULL); + } +#else + left_arg->load_item_force(FrameMap::R0_opr); + right_arg->load_item_force(FrameMap::R2_opr); + LIR_Opr tmp = FrameMap::R1_opr; + LIR_Opr result = rlock_result(x); + LIR_Opr out_reg; + if (x->op() == Bytecodes::_irem) { + out_reg = FrameMap::R0_opr; + __ irem(left_arg->result(), right_arg->result(), out_reg, tmp, info); + } else if (x->op() == Bytecodes::_idiv) { + out_reg = FrameMap::R1_opr; + __ idiv(left_arg->result(), right_arg->result(), out_reg, tmp, info); + } + __ move(out_reg, result); +#endif // AARCH64 + } + +#ifdef AARCH64 + } else if (((x->op() == Bytecodes::_iadd) || (x->op() == Bytecodes::_isub)) && right_arg->is_constant()) { + left_arg->load_item(); + jint c = right_arg->get_jint_constant(); + right_arg->dont_load_item(); + add_constant(left_arg->result(), (x->op() == Bytecodes::_iadd) ? c : -c, rlock_result(x)); +#endif // AARCH64 + + } else { + left_arg->load_item(); + if (x->op() == Bytecodes::_imul && right_arg->is_constant()) { + int c = right_arg->get_jint_constant(); + if (c > 0 && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) { + right_arg->dont_load_item(); + } else { + right_arg->load_item(); + } + } else { + AARCH64_ONLY(assert(!right_arg->is_constant(), "constant right_arg is already handled by this moment");) + right_arg->load_nonconstant(); + } + rlock_result(x); + assert(right_arg->is_constant() || right_arg->is_register(), "wrong state of right"); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), NULL); + } +} + + +void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { + ValueTag tag = x->type()->tag(); + assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); + switch (tag) { + case floatTag: + case doubleTag: do_ArithmeticOp_FPU(x); return; + case longTag: do_ArithmeticOp_Long(x); return; + case intTag: do_ArithmeticOp_Int(x); return; + } + ShouldNotReachHere(); +} + + +// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr +void LIRGenerator::do_ShiftOp(ShiftOp* x) { + LIRItem value(x->x(), this); + LIRItem count(x->y(), this); + +#ifndef AARCH64 + if (value.type()->is_long()) { + count.set_destroys_register(); + } +#endif // !AARCH64 + + if (count.is_constant()) { + assert(count.type()->as_IntConstant() != NULL, "should be"); + count.dont_load_item(); + } else { + count.load_item(); + } + value.load_item(); + + LIR_Opr res = rlock_result(x); + shift_op(x->op(), res, value.result(), count.result(), LIR_OprFact::illegalOpr); +} + + +// _iand, _land, _ior, _lor, _ixor, _lxor +void LIRGenerator::do_LogicOp(LogicOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + +#ifdef AARCH64 + if (right.is_constant() && can_inline_as_constant_in_logic(right.value())) { + right.dont_load_item(); + } else { + right.load_item(); + } +#else + right.load_nonconstant(); +#endif // AARCH64 + + logic_op(x->op(), rlock_result(x), left.result(), right.result()); +} + + +// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg +void LIRGenerator::do_CompareOp(CompareOp* x) { +#ifdef __SOFTFP__ + address runtime_func; + switch (x->op()) { + case Bytecodes::_fcmpl: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::fcmpl); + break; + case Bytecodes::_fcmpg: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::fcmpg); + break; + case Bytecodes::_dcmpl: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dcmpl); + break; + case Bytecodes::_dcmpg: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dcmpg); + break; + case Bytecodes::_lcmp: { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + left.load_item(); + right.load_nonconstant(); + LIR_Opr reg = rlock_result(x); + __ lcmp2int(left.result(), right.result(), reg); + return; + } + default: + ShouldNotReachHere(); + } + LIR_Opr result = call_runtime(x->x(), x->y(), runtime_func, x->type(), NULL); + set_result(x, result); +#else // __SOFTFP__ + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + left.load_item(); + +#ifdef AARCH64 + if (right.is_constant() && can_inline_as_constant_in_cmp(right.value())) { + right.dont_load_item(); + } else { + right.load_item(); + } +#else + right.load_nonconstant(); +#endif // AARCH64 + + LIR_Opr reg = rlock_result(x); + + if (x->x()->type()->is_float_kind()) { + Bytecodes::Code code = x->op(); + __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); + } else if (x->x()->type()->tag() == longTag) { + __ lcmp2int(left.result(), right.result(), reg); + } else { + ShouldNotReachHere(); + } +#endif // __SOFTFP__ +} + + +void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) { + assert(x->number_of_arguments() == 4, "wrong type"); + LIRItem obj (x->argument_at(0), this); // object + LIRItem offset(x->argument_at(1), this); // offset of field + LIRItem cmp (x->argument_at(2), this); // value to compare with field + LIRItem val (x->argument_at(3), this); // replace field with val if matches cmp + + LIR_Opr addr = new_pointer_register(); + LIR_Opr tmp1 = LIR_OprFact::illegalOpr; + LIR_Opr tmp2 = LIR_OprFact::illegalOpr; + + // get address of field + obj.load_item(); + offset.load_item(); + cmp.load_item(); + val.load_item(); + + __ add(obj.result(), offset.result(), addr); + LIR_Opr result = rlock_result(x); + + if (type == objectType) { +#if INCLUDE_ALL_GCS + // Do the pre-write barrier, if any. + pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); +#endif // INCLUDE_ALL_GCS +#ifdef AARCH64 + if (UseCompressedOops) { + tmp1 = new_pointer_register(); + tmp2 = new_pointer_register(); + } +#endif // AARCH64 + __ cas_obj(addr, cmp.result(), val.result(), tmp1, tmp2, result); + post_barrier(addr, val.result()); + } + else if (type == intType) { + __ cas_int(addr, cmp.result(), val.result(), tmp1, tmp1, result); + } + else if (type == longType) { +#ifndef AARCH64 + tmp1 = new_register(T_LONG); +#endif // !AARCH64 + __ cas_long(addr, cmp.result(), val.result(), tmp1, tmp2, result); + } + else { + ShouldNotReachHere(); + } +} + + +void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + address runtime_func; + switch (x->id()) { + case vmIntrinsics::_dabs: { +#ifdef __SOFTFP__ + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dabs); + break; +#else + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + __ abs(value.result(), rlock_result(x), LIR_OprFact::illegalOpr); + return; +#endif // __SOFTFP__ + } + case vmIntrinsics::_dsqrt: { +#ifdef __SOFTFP__ + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); + break; +#else + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + __ sqrt(value.result(), rlock_result(x), LIR_OprFact::illegalOpr); + return; +#endif // __SOFTFP__ + } + case vmIntrinsics::_dsin: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); + break; + case vmIntrinsics::_dcos: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); + break; + case vmIntrinsics::_dtan: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); + break; + case vmIntrinsics::_dlog: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); + break; + case vmIntrinsics::_dlog10: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); + break; + case vmIntrinsics::_dexp: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + break; + case vmIntrinsics::_dpow: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + break; + default: + ShouldNotReachHere(); + return; + } + + LIR_Opr result; + if (x->number_of_arguments() == 1) { + result = call_runtime(x->argument_at(0), runtime_func, x->type(), NULL); + } else { + assert(x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow, "unexpected intrinsic"); + result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_func, x->type(), NULL); + } + set_result(x, result); +} + +void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { + fatal("FMA intrinsic is not implemented on this platform"); +} + +void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { + fatal("vectorizedMismatch intrinsic is not implemented on this platform"); +} + +void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + CodeEmitInfo* info = state_for(x, x->state()); + assert(x->number_of_arguments() == 5, "wrong type"); + LIRItem src(x->argument_at(0), this); + LIRItem src_pos(x->argument_at(1), this); + LIRItem dst(x->argument_at(2), this); + LIRItem dst_pos(x->argument_at(3), this); + LIRItem length(x->argument_at(4), this); + + // We put arguments into the same registers which are used for a Java call. + // Note: we used fixed registers for all arguments because all registers + // are caller-saved, so register allocator treats them all as used. + src.load_item_force (FrameMap::R0_oop_opr); + src_pos.load_item_force(FrameMap::R1_opr); + dst.load_item_force (FrameMap::R2_oop_opr); + dst_pos.load_item_force(FrameMap::R3_opr); + length.load_item_force (FrameMap::R4_opr); + LIR_Opr tmp = (FrameMap::R5_opr); + set_no_result(x); + + int flags; + ciArrayKlass* expected_type; + arraycopy_helper(x, &flags, &expected_type); + __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), + tmp, expected_type, flags, info); +} + +void LIRGenerator::do_update_CRC32(Intrinsic* x) { + fatal("CRC32 intrinsic is not implemented on this platform"); +} + +void LIRGenerator::do_update_CRC32C(Intrinsic* x) { + Unimplemented(); +} + +void LIRGenerator::do_Convert(Convert* x) { + address runtime_func; + switch (x->op()) { +#ifndef AARCH64 + case Bytecodes::_l2f: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::l2f); + break; + case Bytecodes::_l2d: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::l2d); + break; + case Bytecodes::_f2l: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::f2l); + break; + case Bytecodes::_d2l: + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::d2l); + break; +#ifdef __SOFTFP__ + case Bytecodes::_f2d: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_f2d); + break; + case Bytecodes::_d2f: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_d2f); + break; + case Bytecodes::_i2f: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_i2f); + break; + case Bytecodes::_i2d: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_i2d); + break; + case Bytecodes::_f2i: + runtime_func = CAST_FROM_FN_PTR(address, __aeabi_f2iz); + break; + case Bytecodes::_d2i: + // This is implemented in hard float in assembler on arm but a call + // on other platforms. + runtime_func = CAST_FROM_FN_PTR(address, SharedRuntime::d2i); + break; +#endif // __SOFTFP__ +#endif // !AARCH64 + default: { + LIRItem value(x->value(), this); + value.load_item(); + LIR_Opr reg = rlock_result(x); + __ convert(x->op(), value.result(), reg, NULL); + return; + } + } + + LIR_Opr result = call_runtime(x->value(), runtime_func, x->type(), NULL); + set_result(x, result); +} + + +void LIRGenerator::do_NewInstance(NewInstance* x) { + print_if_not_loaded(x); + + CodeEmitInfo* info = state_for(x, x->state()); + LIR_Opr reg = result_register_for(x->type()); // R0 is required by runtime call in NewInstanceStub::emit_code + LIR_Opr klass_reg = FrameMap::R1_metadata_opr; // R1 is required by runtime call in NewInstanceStub::emit_code + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = FrameMap::LR_oop_opr; + + new_instance(reg, x->klass(), x->is_unresolved(), tmp1, tmp2, tmp3, + LIR_OprFact::illegalOpr, klass_reg, info); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { + // Evaluate state_for() first, because it can emit code + // with the same fixed registers that are used here (R1, R2) + CodeEmitInfo* info = state_for(x, x->state()); + LIRItem length(x->length(), this); + + length.load_item_force(FrameMap::R2_opr); // R2 is required by runtime call in NewTypeArrayStub::emit_code + LIR_Opr len = length.result(); + + LIR_Opr reg = result_register_for(x->type()); // R0 is required by runtime call in NewTypeArrayStub::emit_code + LIR_Opr klass_reg = FrameMap::R1_metadata_opr; // R1 is required by runtime call in NewTypeArrayStub::emit_code + + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = FrameMap::LR_oop_opr; + LIR_Opr tmp4 = LIR_OprFact::illegalOpr; + + BasicType elem_type = x->elt_type(); + __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); + + CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { + // Evaluate state_for() first, because it can emit code + // with the same fixed registers that are used here (R1, R2) + CodeEmitInfo* info = state_for(x, x->state()); + LIRItem length(x->length(), this); + + length.load_item_force(FrameMap::R2_opr); // R2 is required by runtime call in NewObjectArrayStub::emit_code + LIR_Opr len = length.result(); + + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + } + + LIR_Opr reg = result_register_for(x->type()); // R0 is required by runtime call in NewObjectArrayStub::emit_code + LIR_Opr klass_reg = FrameMap::R1_metadata_opr; // R1 is required by runtime call in NewObjectArrayStub::emit_code + + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = FrameMap::LR_oop_opr; + LIR_Opr tmp4 = LIR_OprFact::illegalOpr; + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); + ciMetadata* obj = ciObjArrayKlass::make(x->klass()); + if (obj == ciEnv::unloaded_ciobjarrayklass()) { + BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); + } + klass2reg_with_patching(klass_reg, obj, patching_info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { + Values* dims = x->dims(); + int i = dims->length(); + LIRItemList* items = new LIRItemList(i, i, NULL); + while (i-- > 0) { + LIRItem* size = new LIRItem(dims->at(i), this); + items->at_put(i, size); + } + + // Need to get the info before, as the items may become invalid through item_free + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + + // Cannot re-use same xhandlers for multiple CodeEmitInfos, so + // clone all handlers (NOTE: Usually this is handled transparently + // by the CodeEmitInfo cloning logic in CodeStub constructors but + // is done explicitly here because a stub isn't being used). + x->set_exception_handlers(new XHandlers(x->exception_handlers())); + } + + i = dims->length(); + while (i-- > 0) { + LIRItem* size = items->at(i); + size->load_item(); + LIR_Opr sz = size->result(); + assert(sz->type() == T_INT, "should be"); + store_stack_parameter(sz, in_ByteSize(i * BytesPerInt)); + } + + CodeEmitInfo* info = state_for(x, x->state()); + LIR_Opr klass_reg = FrameMap::R0_metadata_opr; + klass2reg_with_patching(klass_reg, x->klass(), patching_info); + + LIR_Opr rank = FrameMap::R2_opr; + __ move(LIR_OprFact::intConst(x->rank()), rank); + LIR_Opr varargs = FrameMap::SP_opr; + LIR_OprList* args = new LIR_OprList(3); + args->append(klass_reg); + args->append(rank); + args->append(varargs); + LIR_Opr reg = result_register_for(x->type()); + __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), + LIR_OprFact::illegalOpr, reg, args, info); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_BlockBegin(BlockBegin* x) { + // nothing to do for now +} + + +void LIRGenerator::do_CheckCast(CheckCast* x) { + LIRItem obj(x->obj(), this); + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check())) { + patching_info = state_for(x, x->state_before()); + } + + obj.load_item(); + + CodeEmitInfo* info_for_exception = state_for(x); + CodeStub* stub; + if (x->is_incompatible_class_change_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, + LIR_OprFact::illegalOpr, info_for_exception); + } else { + stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, + LIR_OprFact::illegalOpr, info_for_exception); + } + + LIR_Opr out_reg = rlock_result(x); + LIR_Opr tmp1 = FrameMap::R0_oop_opr; + LIR_Opr tmp2 = FrameMap::R1_oop_opr; + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + + __ checkcast(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3, x->direct_compare(), + info_for_exception, patching_info, stub, x->profiled_method(), x->profiled_bci()); +} + + +void LIRGenerator::do_InstanceOf(InstanceOf* x) { + LIRItem obj(x->obj(), this); + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + } + + obj.load_item(); + LIR_Opr out_reg = rlock_result(x); + LIR_Opr tmp1 = FrameMap::R0_oop_opr; + LIR_Opr tmp2 = FrameMap::R1_oop_opr; + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + + __ instanceof(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3, + x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); +} + + +#ifdef __SOFTFP__ +// Turn operator if (f g) into runtime call: +// call _aeabi_fcmp(f, g) +// cmp(eq, 1) +// branch(eq, true path). +void LIRGenerator::do_soft_float_compare(If* x) { + assert(x->number_of_sux() == 2, "inconsistency"); + ValueTag tag = x->x()->type()->tag(); + If::Condition cond = x->cond(); + address runtime_func; + // unordered comparison gets the wrong answer because aeabi functions + // return false. + bool unordered_is_true = x->unordered_is_true(); + // reverse of condition for ne + bool compare_to_zero = false; + switch (lir_cond(cond)) { + case lir_cond_notEqual: + compare_to_zero = true; // fall through + case lir_cond_equal: + runtime_func = tag == floatTag ? + CAST_FROM_FN_PTR(address, __aeabi_fcmpeq): + CAST_FROM_FN_PTR(address, __aeabi_dcmpeq); + break; + case lir_cond_less: + if (unordered_is_true) { + runtime_func = tag == floatTag ? + CAST_FROM_FN_PTR(address, SharedRuntime::unordered_fcmplt): + CAST_FROM_FN_PTR(address, SharedRuntime::unordered_dcmplt); + } else { + runtime_func = tag == floatTag ? + CAST_FROM_FN_PTR(address, __aeabi_fcmplt): + CAST_FROM_FN_PTR(address, __aeabi_dcmplt); + } + break; + case lir_cond_lessEqual: + if (unordered_is_true) { + runtime_func = tag == floatTag ? + CAST_FROM_FN_PTR(address, SharedRuntime::unordered_fcmple): + CAST_FROM_FN_PTR(address, SharedRuntime::unordered_dcmple); + } else { + runtime_func = tag == floatTag ? + CAST_FROM_FN_PTR(address, __aeabi_fcmple): + CAST_FROM_FN_PTR(address, __aeabi_dcmple); + } + break; + case lir_cond_greaterEqual: + if (unordered_is_true) { + runtime_func = tag == floatTag ? + CAST_FROM_FN_PTR(address, SharedRuntime::unordered_fcmpge): + CAST_FROM_FN_PTR(address, SharedRuntime::unordered_dcmpge); + } else { + runtime_func = tag == floatTag ? + CAST_FROM_FN_PTR(address, __aeabi_fcmpge): + CAST_FROM_FN_PTR(address, __aeabi_dcmpge); + } + break; + case lir_cond_greater: + if (unordered_is_true) { + runtime_func = tag == floatTag ? + CAST_FROM_FN_PTR(address, SharedRuntime::unordered_fcmpgt): + CAST_FROM_FN_PTR(address, SharedRuntime::unordered_dcmpgt); + } else { + runtime_func = tag == floatTag ? + CAST_FROM_FN_PTR(address, __aeabi_fcmpgt): + CAST_FROM_FN_PTR(address, __aeabi_dcmpgt); + } + break; + case lir_cond_aboveEqual: + case lir_cond_belowEqual: + ShouldNotReachHere(); // We're not going to get these. + default: + assert(lir_cond(cond) == lir_cond_always, "must be"); + ShouldNotReachHere(); + } + set_no_result(x); + + // add safepoint before generating condition code so it can be recomputed + if (x->is_safepoint()) { + increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci()); + __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); + } + // Call float compare function, returns (1,0) if true or false. + LIR_Opr result = call_runtime(x->x(), x->y(), runtime_func, intType, NULL); + __ cmp(lir_cond_equal, result, + compare_to_zero ? + LIR_OprFact::intConst(0) : LIR_OprFact::intConst(1)); + profile_branch(x, cond); + move_to_phi(x->state()); + __ branch(lir_cond_equal, T_INT, x->tsux()); +} +#endif // __SOFTFP__ + +void LIRGenerator::do_If(If* x) { + assert(x->number_of_sux() == 2, "inconsistency"); + ValueTag tag = x->x()->type()->tag(); + +#ifdef __SOFTFP__ + if (tag == floatTag || tag == doubleTag) { + do_soft_float_compare(x); + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); + return; + } +#endif // __SOFTFP__ + + LIRItem xitem(x->x(), this); + LIRItem yitem(x->y(), this); + LIRItem* xin = &xitem; + LIRItem* yin = &yitem; + If::Condition cond = x->cond(); + +#ifndef AARCH64 + if (tag == longTag) { + if (cond == If::gtr || cond == If::leq) { + cond = Instruction::mirror(cond); + xin = &yitem; + yin = &xitem; + } + xin->set_destroys_register(); + } +#endif // !AARCH64 + + xin->load_item(); + LIR_Opr left = xin->result(); + LIR_Opr right; + +#ifdef AARCH64 + if (yin->is_constant() && can_inline_as_constant_in_cmp(yin->value())) { + yin->dont_load_item(); + } else { + yin->load_item(); + } + right = yin->result(); +#else + if (tag == longTag && yin->is_constant() && yin->get_jlong_constant() == 0 && + (cond == If::eql || cond == If::neq)) { + // inline long zero + right = LIR_OprFact::value_type(yin->value()->type()); + } else { + yin->load_nonconstant(); + right = yin->result(); + } +#endif // AARCH64 + + set_no_result(x); + + // add safepoint before generating condition code so it can be recomputed + if (x->is_safepoint()) { + increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci()); + __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); + } + + __ cmp(lir_cond(cond), left, right); + profile_branch(x, cond); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { + __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux()); + } else { + __ branch(lir_cond(cond), right->type(), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); +} + + +LIR_Opr LIRGenerator::getThreadPointer() { + return FrameMap::Rthread_opr; +} + +void LIRGenerator::trace_block_entry(BlockBegin* block) { + __ move(LIR_OprFact::intConst(block->block_id()), FrameMap::R0_opr); + LIR_OprList* args = new LIR_OprList(1); + args->append(FrameMap::R0_opr); + address func = CAST_FROM_FN_PTR(address, Runtime1::trace_block_entry); + __ call_runtime_leaf(func, getThreadTemp(), LIR_OprFact::illegalOpr, args); +} + + +void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, + CodeEmitInfo* info) { +#ifndef AARCH64 + if (value->is_double_cpu()) { + assert(address->index()->is_illegal(), "should have a constant displacement"); + LIR_Opr tmp = new_pointer_register(); + add_large_constant(address->base(), address->disp(), tmp); + __ volatile_store_mem_reg(value, new LIR_Address(tmp, (intx)0, address->type()), info); + return; + } +#endif // !AARCH64 + // TODO-AARCH64 implement with stlr instruction + __ store(value, address, info, lir_patch_none); +} + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { +#ifndef AARCH64 + if (result->is_double_cpu()) { + assert(address->index()->is_illegal(), "should have a constant displacement"); + LIR_Opr tmp = new_pointer_register(); + add_large_constant(address->base(), address->disp(), tmp); + __ volatile_load_mem_reg(new LIR_Address(tmp, (intx)0, address->type()), result, info); + return; + } +#endif // !AARCH64 + // TODO-AARCH64 implement with ldar instruction + __ load(address, result, info, lir_patch_none); +} + +void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset, + BasicType type, bool is_volatile) { +#ifdef AARCH64 + __ load(new LIR_Address(src, offset, type), dst); +#else + assert(offset->is_single_cpu(), "must be"); + if (is_volatile && dst->is_double_cpu()) { + LIR_Opr tmp = new_pointer_register(); + __ add(src, offset, tmp); + __ volatile_load_mem_reg(new LIR_Address(tmp, (intx)0, type), dst, NULL); + } else if (type == T_FLOAT || type == T_DOUBLE) { + // fld doesn't have indexed addressing mode + LIR_Opr tmp = new_register(T_INT); + __ add(src, offset, tmp); + __ load(new LIR_Address(tmp, (intx)0, type), dst); + } else { + __ load(new LIR_Address(src, offset, type), dst); + } +#endif // AARCH64 +} + +void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data, + BasicType type, bool is_volatile) { +#ifdef AARCH64 + LIR_Address* addr = new LIR_Address(src, offset, type); + if (type == T_ARRAY || type == T_OBJECT) { + pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + __ move(data, addr); + assert(src->is_register(), "must be register"); + post_barrier(LIR_OprFact::address(addr), data); + } else { + __ move(data, addr); + } +#else + assert(offset->is_single_cpu(), "must be"); + if (is_volatile && data->is_double_cpu()) { + LIR_Opr tmp = new_register(T_INT); + __ add(src, offset, tmp); + __ volatile_store_mem_reg(data, new LIR_Address(tmp, (intx)0, type), NULL); + } else if (type == T_FLOAT || type == T_DOUBLE) { + // fst doesn't have indexed addressing mode + LIR_Opr tmp = new_register(T_INT); + __ add(src, offset, tmp); + __ move(data, new LIR_Address(tmp, (intx)0, type)); + } else { + LIR_Address* addr = new LIR_Address(src, offset, type); + bool is_obj = (type == T_ARRAY || type == T_OBJECT); +#if INCLUDE_ALL_GCS + if (is_obj) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + } +#endif // INCLUDE_ALL_GCS + __ move(data, addr); + if (is_obj) { + assert(src->is_register(), "must be register"); + post_barrier(LIR_OprFact::address(addr), data); + } + } +#endif // AARCH64 +} + +void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { + BasicType type = x->basic_type(); + LIRItem src(x->object(), this); + LIRItem off(x->offset(), this); + LIRItem value(x->value(), this); + + src.load_item(); + if (x->is_add()) { + value.load_nonconstant(); + } else { + value.load_item(); + } + off.load_nonconstant(); + + LIR_Opr dst = rlock_result(x, type); + LIR_Opr data = value.result(); + bool is_obj = (type == T_ARRAY || type == T_OBJECT); + + assert (type == T_INT || type == T_LONG || (!x->is_add() && is_obj), "unexpected type"); + LIR_Opr addr_ptr = new_pointer_register(); + + __ add(src.result(), off.result(), addr_ptr); + + LIR_Address* addr = new LIR_Address(addr_ptr, (intx)0, type); + + if (x->is_add()) { + LIR_Opr tmp = new_register(type); + __ xadd(addr_ptr, data, dst, tmp); + } else { + LIR_Opr tmp = (UseCompressedOops && is_obj) ? new_pointer_register() : LIR_OprFact::illegalOpr; + if (is_obj) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + } + __ xchg(addr_ptr, data, dst, tmp); + if (is_obj) { + // Seems to be a precise address + post_barrier(LIR_OprFact::address(addr), data); + } + } +} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_LIRGenerator_arm.hpp 2016-12-02 11:18:42.490911053 -0500 @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + + // Helper to set the card at the given address to the given value. + void set_card(LIR_Opr value, LIR_Address* card_addr); + + void make_div_by_zero_check(LIR_Opr right_arg, BasicType type, CodeEmitInfo* info); + +#ifdef AARCH64 + // the helper for arithmetic + void add_constant(LIR_Opr src, jlong c, LIR_Opr dest); +#endif // AARCH64 --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_LIR_arm.cpp 2016-12-02 11:18:48.171233184 -0500 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_LIR.hpp" + +FloatRegister LIR_OprDesc::as_float_reg() const { + return as_FloatRegister(fpu_regnr()); +} + +FloatRegister LIR_OprDesc::as_double_reg() const { + return as_FloatRegister(fpu_regnrLo()); +} + +#ifdef AARCH64 +// Reg2 unused. +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); + return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | + (reg1 << LIR_OprDesc::reg2_shift) | + LIR_OprDesc::double_type | + LIR_OprDesc::fpu_register | + LIR_OprDesc::double_size); +} +#else +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(as_FloatRegister(reg2) != fnoreg, "Arm32 holds double in two regs."); + return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | + (reg2 << LIR_OprDesc::reg2_shift) | + LIR_OprDesc::double_type | + LIR_OprDesc::fpu_register | + LIR_OprDesc::double_size); +} +#endif + +#ifndef PRODUCT +void LIR_Address::verify() const { +#ifdef _LP64 + assert(base()->is_cpu_register(), "wrong base operand"); +#endif +#ifdef AARCH64 + if (base()->type() == T_INT) { + assert(index()->is_single_cpu() && (index()->type() == T_INT), "wrong index operand"); + } else { + assert(index()->is_illegal() || index()->is_double_cpu() || + (index()->is_single_cpu() && (index()->is_oop_register() || index()->type() == T_INT)), "wrong index operand"); + assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, "wrong type for addresses"); + } +#else + assert(disp() == 0 || index()->is_illegal(), "can't have both"); + // Note: offsets higher than 4096 must not be rejected here. They can + // be handled by the back-end or will be rejected if not. +#ifdef _LP64 + assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand"); + assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, + "wrong type for addresses"); +#else + assert(base()->is_single_cpu(), "wrong base operand"); + assert(index()->is_illegal() || index()->is_single_cpu(), "wrong index operand"); + assert(base()->type() == T_OBJECT || base()->type() == T_INT || base()->type() == T_METADATA, + "wrong type for addresses"); +#endif +#endif // AARCH64 +} +#endif // PRODUCT --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_LinearScan_arm.cpp 2016-12-02 11:18:53.807552817 -0500 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LinearScan.hpp" +#include "utilities/bitMap.inline.hpp" + +void LinearScan::allocate_fpu_stack() { + // No FPU stack on ARM +} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_LinearScan_arm.hpp 2016-12-02 11:18:59.259862017 -0500 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_C1_LINEARSCAN_ARM_HPP +#define CPU_ARM_VM_C1_LINEARSCAN_ARM_HPP + +inline bool LinearScan::is_processed_reg_num(int reg_num) { + return reg_num < pd_nof_cpu_regs_processed_in_linearscan || + reg_num >= pd_nof_cpu_regs_frame_map; +} + +inline int LinearScan::num_physical_regs(BasicType type) { +#ifndef AARCH64 + if (type == T_LONG || type == T_DOUBLE) return 2; +#endif // !AARCH64 + return 1; +} + + +inline bool LinearScan::requires_adjacent_regs(BasicType type) { +#ifdef AARCH64 + return false; +#else + return type == T_DOUBLE || type == T_LONG; +#endif // AARCH64 +} + +inline bool LinearScan::is_caller_save(int assigned_reg) { + assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); + // TODO-AARCH64 try to add callee-saved registers + return true; +} + + +inline void LinearScan::pd_add_temps(LIR_Op* op) { + // No extra temporals on ARM +} + + +// Implementation of LinearScanWalker + +inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { +#ifndef __SOFTFP__ + if (cur->type() == T_FLOAT || cur->type() == T_DOUBLE) { + _first_reg = pd_first_fpu_reg; + _last_reg = pd_first_fpu_reg + pd_nof_fpu_regs_reg_alloc - 1; + return true; + } +#endif // !__SOFTFP__ + + // Use allocatable CPU registers otherwise + _first_reg = pd_first_cpu_reg; + _last_reg = pd_first_cpu_reg + FrameMap::adjust_reg_range(pd_nof_cpu_regs_reg_alloc) - 1; + return true; +} + +#endif // CPU_ARM_VM_C1_LINEARSCAN_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_MacroAssembler_arm.cpp 2016-12-02 11:19:05.072191636 -0500 @@ -0,0 +1,408 @@ +/* + * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/systemDictionary.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" + +// Note: Rtemp usage is this file should not impact C2 and should be +// correct as long as it is not implicitly used in lower layers (the +// arm [macro]assembler) and used with care in the other C1 specific +// files. + +void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) { + Label verified; + load_klass(Rtemp, receiver); + cmp(Rtemp, iCache); + b(verified, eq); // jump over alignment no-ops +#ifdef AARCH64 + jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp); +#else + jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); +#endif + align(CodeEntryAlignment); + bind(verified); +} + +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) { + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); + assert((frame_size_in_bytes % StackAlignmentInBytes) == 0, "frame size should be aligned"); + +#ifdef AARCH64 + // Extra nop for MT-safe patching in NativeJump::patch_verified_entry + nop(); +#endif // AARCH64 + + arm_stack_overflow_check(bang_size_in_bytes, Rtemp); + + // FP can no longer be used to memorize SP. It may be modified + // if this method contains a methodHandle call site + raw_push(FP, LR); + sub_slow(SP, SP, frame_size_in_bytes); +} + +void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { + add_slow(SP, SP, frame_size_in_bytes); + raw_pop(FP, LR); +} + +void C1_MacroAssembler::verified_entry() { + if (C1Breakpoint) { + breakpoint(); + } +} + +// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. +void C1_MacroAssembler::try_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, + RegisterOrConstant size_expression, Label& slow_case) { + if (UseTLAB) { + tlab_allocate(obj, obj_end, tmp1, size_expression, slow_case); + } else { + eden_allocate(obj, obj_end, tmp1, tmp2, size_expression, slow_case); + incr_allocated_bytes(size_expression, tmp1); + } +} + + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp) { + assert_different_registers(obj, klass, len, tmp); + + if(UseBiasedLocking && !len->is_valid()) { + ldr(tmp, Address(klass, Klass::prototype_header_offset())); + } else { + mov(tmp, (intptr_t)markOopDesc::prototype()); + } + +#ifdef AARCH64 + if (UseCompressedClassPointers) { + str(tmp, Address(obj, oopDesc::mark_offset_in_bytes())); + encode_klass_not_null(tmp, klass); // Take care not to kill klass + str_w(tmp, Address(obj, oopDesc::klass_offset_in_bytes())); + } else { + assert(oopDesc::mark_offset_in_bytes() + wordSize == oopDesc::klass_offset_in_bytes(), "adjust this code"); + stp(tmp, klass, Address(obj, oopDesc::mark_offset_in_bytes())); + } +#else + str(tmp, Address(obj, oopDesc::mark_offset_in_bytes())); + str(klass, Address(obj, oopDesc::klass_offset_in_bytes())); +#endif // AARCH64 + + if (len->is_valid()) { + str_32(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); + } +#ifdef AARCH64 + else if (UseCompressedClassPointers) { + store_klass_gap(obj); + } +#endif // AARCH64 +} + + +// Cleans object body [base..obj_end]. Clobbers `base` and `tmp` registers. +void C1_MacroAssembler::initialize_body(Register base, Register obj_end, Register tmp) { + zero_memory(base, obj_end, tmp); +} + + +void C1_MacroAssembler::initialize_object(Register obj, Register obj_end, Register klass, + Register len, Register tmp1, Register tmp2, + RegisterOrConstant header_size, int obj_size_in_bytes, + bool is_tlab_allocated) +{ + assert_different_registers(obj, obj_end, klass, len, tmp1, tmp2); + initialize_header(obj, klass, len, tmp1); + + const Register ptr = tmp2; + + if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { +#ifdef AARCH64 + if (obj_size_in_bytes < 0) { + add_rc(ptr, obj, header_size); + initialize_body(ptr, obj_end, tmp1); + + } else { + int base = instanceOopDesc::header_size() * HeapWordSize; + assert(obj_size_in_bytes >= base, "should be"); + + const int zero_bytes = obj_size_in_bytes - base; + assert((zero_bytes % wordSize) == 0, "should be"); + + if ((zero_bytes % (2*wordSize)) != 0) { + str(ZR, Address(obj, base)); + base += wordSize; + } + + const int stp_count = zero_bytes / (2*wordSize); + + if (zero_bytes > 8 * wordSize) { + Label loop; + add(ptr, obj, base); + mov(tmp1, stp_count); + bind(loop); + subs(tmp1, tmp1, 1); + stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); + b(loop, gt); + } else { + for (int i = 0; i < stp_count; i++) { + stp(ZR, ZR, Address(obj, base + i * 2 * wordSize)); + } + } + } +#else + if (obj_size_in_bytes >= 0 && obj_size_in_bytes <= 8 * BytesPerWord) { + mov(tmp1, 0); + const int base = instanceOopDesc::header_size() * HeapWordSize; + for (int i = base; i < obj_size_in_bytes; i += wordSize) { + str(tmp1, Address(obj, i)); + } + } else { + assert(header_size.is_constant() || header_size.as_register() == ptr, "code assumption"); + add(ptr, obj, header_size); + initialize_body(ptr, obj_end, tmp1); + } +#endif // AARCH64 + } + + // StoreStore barrier required after complete initialization + // (headers + content zeroing), before the object may escape. + membar(MacroAssembler::StoreStore, tmp1); +} + +void C1_MacroAssembler::allocate_object(Register obj, Register tmp1, Register tmp2, Register tmp3, + int header_size, int object_size, + Register klass, Label& slow_case) { + assert_different_registers(obj, tmp1, tmp2, tmp3, klass, Rtemp); + assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); + const int object_size_in_bytes = object_size * BytesPerWord; + + const Register obj_end = tmp1; + const Register len = noreg; + + if (Assembler::is_arith_imm_in_range(object_size_in_bytes)) { + try_allocate(obj, obj_end, tmp2, tmp3, object_size_in_bytes, slow_case); + } else { + // Rtemp should be free at c1 LIR level + mov_slow(Rtemp, object_size_in_bytes); + try_allocate(obj, obj_end, tmp2, tmp3, Rtemp, slow_case); + } + initialize_object(obj, obj_end, klass, len, tmp2, tmp3, instanceOopDesc::header_size() * HeapWordSize, object_size_in_bytes, /* is_tlab_allocated */ UseTLAB); +} + +void C1_MacroAssembler::allocate_array(Register obj, Register len, + Register tmp1, Register tmp2, Register tmp3, + int header_size, int element_size, + Register klass, Label& slow_case) { + assert_different_registers(obj, len, tmp1, tmp2, tmp3, klass, Rtemp); + const int header_size_in_bytes = header_size * BytesPerWord; + const int scale_shift = exact_log2(element_size); + const Register obj_size = Rtemp; // Rtemp should be free at c1 LIR level + +#ifdef AARCH64 + mov_slow(Rtemp, max_array_allocation_length); + cmp_32(len, Rtemp); +#else + cmp_32(len, max_array_allocation_length); +#endif // AARCH64 + b(slow_case, hs); + + bool align_header = ((header_size_in_bytes | element_size) & MinObjAlignmentInBytesMask) != 0; + assert(align_header || ((header_size_in_bytes & MinObjAlignmentInBytesMask) == 0), "must be"); + assert(align_header || ((element_size & MinObjAlignmentInBytesMask) == 0), "must be"); + + mov(obj_size, header_size_in_bytes + (align_header ? (MinObjAlignmentInBytes - 1) : 0)); + add_ptr_scaled_int32(obj_size, obj_size, len, scale_shift); + + if (align_header) { + align_reg(obj_size, obj_size, MinObjAlignmentInBytes); + } + + try_allocate(obj, tmp1, tmp2, tmp3, obj_size, slow_case); + initialize_object(obj, tmp1, klass, len, tmp2, tmp3, header_size_in_bytes, -1, /* is_tlab_allocated */ UseTLAB); +} + +int C1_MacroAssembler::lock_object(Register hdr, Register obj, + Register disp_hdr, Register tmp1, + Label& slow_case) { + Label done, fast_lock, fast_lock_done; + int null_check_offset = 0; + + const Register tmp2 = Rtemp; // Rtemp should be free at c1 LIR level + assert_different_registers(hdr, obj, disp_hdr, tmp1, tmp2); + + assert(BasicObjectLock::lock_offset_in_bytes() == 0, "ajust this code"); + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int mark_offset = BasicLock::displaced_header_offset_in_bytes(); + + if (UseBiasedLocking) { + // load object + str(obj, Address(disp_hdr, obj_offset)); + null_check_offset = biased_locking_enter(obj, hdr/*scratched*/, tmp1, false, tmp2, done, slow_case); + } + + assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions"); + +#ifdef AARCH64 + + str(obj, Address(disp_hdr, obj_offset)); + + if (!UseBiasedLocking) { + null_check_offset = offset(); + } + ldr(hdr, obj); + + // Test if object is already locked + assert(markOopDesc::unlocked_value == 1, "adjust this code"); + tbnz(hdr, exact_log2(markOopDesc::unlocked_value), fast_lock); + + // Check for recursive locking + // See comments in InterpreterMacroAssembler::lock_object for + // explanations on the fast recursive locking check. + intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); + Assembler::LogicalImmediate imm(mask, false); + mov(tmp2, SP); + sub(tmp2, hdr, tmp2); + ands(tmp2, tmp2, imm); + b(slow_case, ne); + + // Recursive locking: store 0 into a lock record + str(ZR, Address(disp_hdr, mark_offset)); + b(fast_lock_done); + +#else // AARCH64 + + if (!UseBiasedLocking) { + null_check_offset = offset(); + } + + // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. + // That would be acceptable as ether CAS or slow case path is taken in that case. + + // Must be the first instruction here, because implicit null check relies on it + ldr(hdr, Address(obj, oopDesc::mark_offset_in_bytes())); + + str(obj, Address(disp_hdr, obj_offset)); + tst(hdr, markOopDesc::unlocked_value); + b(fast_lock, ne); + + // Check for recursive locking + // See comments in InterpreterMacroAssembler::lock_object for + // explanations on the fast recursive locking check. + // -1- test low 2 bits + movs(tmp2, AsmOperand(hdr, lsl, 30)); + // -2- test (hdr - SP) if the low two bits are 0 + sub(tmp2, hdr, SP, eq); + movs(tmp2, AsmOperand(tmp2, lsr, exact_log2(os::vm_page_size())), eq); + // If 'eq' then OK for recursive fast locking: store 0 into a lock record. + str(tmp2, Address(disp_hdr, mark_offset), eq); + b(fast_lock_done, eq); + // else need slow case + b(slow_case); + +#endif // AARCH64 + + bind(fast_lock); + // Save previous object header in BasicLock structure and update the header + str(hdr, Address(disp_hdr, mark_offset)); + + cas_for_lock_acquire(hdr, disp_hdr, obj, tmp2, slow_case); + + bind(fast_lock_done); + +#ifndef PRODUCT + if (PrintBiasedLockingStatistics) { + cond_atomic_inc32(al, BiasedLocking::fast_path_entry_count_addr()); + } +#endif // !PRODUCT + + bind(done); + + return null_check_offset; +} + +void C1_MacroAssembler::unlock_object(Register hdr, Register obj, + Register disp_hdr, Register tmp, + Label& slow_case) { + // Note: this method is not using its 'tmp' argument + + assert_different_registers(hdr, obj, disp_hdr, Rtemp); + Register tmp2 = Rtemp; + + assert(BasicObjectLock::lock_offset_in_bytes() == 0, "ajust this code"); + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int mark_offset = BasicLock::displaced_header_offset_in_bytes(); + + Label done; + if (UseBiasedLocking) { + // load object + ldr(obj, Address(disp_hdr, obj_offset)); + biased_locking_exit(obj, hdr, done); + } + + assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions"); + Label retry; + + // Load displaced header and object from the lock + ldr(hdr, Address(disp_hdr, mark_offset)); + // If hdr is NULL, we've got recursive locking and there's nothing more to do + cbz(hdr, done); + + if(!UseBiasedLocking) { + // load object + ldr(obj, Address(disp_hdr, obj_offset)); + } + + // Restore the object header + cas_for_lock_release(disp_hdr, hdr, obj, tmp2, slow_case); + + bind(done); +} + + +#ifndef PRODUCT + +void C1_MacroAssembler::verify_stack_oop(int stack_offset) { + if (!VerifyOops) return; + verify_oop_addr(Address(SP, stack_offset)); +} + +void C1_MacroAssembler::verify_not_null_oop(Register r) { + Label not_null; + cbnz(r, not_null); + stop("non-null oop required"); + bind(not_null); + if (!VerifyOops) return; + verify_oop(r); +} + +#endif // !PRODUCT --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_MacroAssembler_arm.hpp 2016-12-02 11:19:10.756513990 -0500 @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_C1_MACROASSEMBLER_ARM_HPP +#define CPU_ARM_VM_C1_MACROASSEMBLER_ARM_HPP + + private: + + void pd_init() { /* not used */ } + + public: + + // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. + // `size_expression` should be a register or constant which can be used as immediate in "add" instruction. + void try_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, + RegisterOrConstant size_expression, Label& slow_case); + + void initialize_header(Register obj, Register klass, Register len, Register tmp); + + // Cleans object body [base..obj_end]. Clobbers `base` and `tmp` registers. + void initialize_body(Register base, Register obj_end, Register tmp); + + void initialize_object(Register obj, Register obj_end, Register klass, + Register len, Register tmp1, Register tmp2, + RegisterOrConstant header_size_expression, int obj_size_in_bytes, + bool is_tlab_allocated); + + void allocate_object(Register obj, Register tmp1, Register tmp2, Register tmp3, + int header_size, int object_size, + Register klass, Label& slow_case); + + void allocate_array(Register obj, Register len, + Register tmp1, Register tmp2, Register tmp3, + int header_size, int element_size, + Register klass, Label& slow_case); + + enum { + max_array_allocation_length = 0x01000000 + }; + + int lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case); + + void unlock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case); + + // This platform only uses signal-based null checks. The Label is not needed. + void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } + +#endif // CPU_ARM_VM_C1_MACROASSEMBLER_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_Runtime1_arm.cpp 2016-12-02 11:19:16.700851094 -0500 @@ -0,0 +1,1230 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "c1/c1_Defs.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_arm.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "register_arm.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_arm.inline.hpp" +#if INCLUDE_ALL_GCS +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#endif + +// Note: Rtemp usage is this file should not impact C2 and should be +// correct as long as it is not implicitly used in lower layers (the +// arm [macro]assembler) and used with care in the other C1 specific +// files. + +// Implementation of StubAssembler + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { + mov(R0, Rthread); + + int call_offset = set_last_Java_frame(SP, FP, false, Rtemp); + + call(entry); + if (call_offset == -1) { // PC not saved + call_offset = offset(); + } + reset_last_Java_frame(Rtemp); + + assert(frame_size() != no_frame_size, "frame must be fixed"); + if (_stub_id != Runtime1::forward_exception_id) { + ldr(R3, Address(Rthread, Thread::pending_exception_offset())); + } + + if (oop_result1->is_valid()) { + assert_different_registers(oop_result1, R3, Rtemp); + get_vm_result(oop_result1, Rtemp); + } + if (metadata_result->is_valid()) { + assert_different_registers(metadata_result, R3, Rtemp); + get_vm_result_2(metadata_result, Rtemp); + } + + // Check for pending exception + // unpack_with_exception_in_tls path is taken through + // Runtime1::exception_handler_for_pc + if (_stub_id != Runtime1::forward_exception_id) { + assert(frame_size() != no_frame_size, "cannot directly call forward_exception_id"); +#ifdef AARCH64 + Label skip; + cbz(R3, skip); + jump(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type, Rtemp); + bind(skip); +#else + cmp(R3, 0); + jump(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type, Rtemp, ne); +#endif // AARCH64 + } else { +#ifdef ASSERT + // Should not have pending exception in forward_exception stub + ldr(R3, Address(Rthread, Thread::pending_exception_offset())); + cmp(R3, 0); + breakpoint(ne); +#endif // ASSERT + } + return call_offset; +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) { + if (arg1 != R1) { + mov(R1, arg1); + } + return call_RT(oop_result1, metadata_result, entry, 1); +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) { + assert(arg1 == R1 && arg2 == R2, "cannot handle otherwise"); + return call_RT(oop_result1, metadata_result, entry, 2); +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { + assert(arg1 == R1 && arg2 == R2 && arg3 == R3, "cannot handle otherwise"); + return call_RT(oop_result1, metadata_result, entry, 3); +} + + +#define __ sasm-> + +// TODO: ARM - does this duplicate RegisterSaver in SharedRuntime? +#ifdef AARCH64 + + // + // On AArch64 registers save area has the following layout: + // + // |---------------------| + // | return address (LR) | + // | FP | + // |---------------------| + // | D31 | + // | ... | + // | D0 | + // |---------------------| + // | padding | + // |---------------------| + // | R28 | + // | ... | + // | R0 | + // |---------------------| <-- SP + // + +enum RegisterLayout { + number_of_saved_gprs = 29, + number_of_saved_fprs = FloatRegisterImpl::number_of_registers, + + R0_offset = 0, + D0_offset = R0_offset + number_of_saved_gprs + 1, + FP_offset = D0_offset + number_of_saved_fprs, + LR_offset = FP_offset + 1, + + reg_save_size = LR_offset + 1, + + arg1_offset = reg_save_size * wordSize, + arg2_offset = (reg_save_size + 1) * wordSize +}; + +#else + +enum RegisterLayout { + fpu_save_size = pd_nof_fpu_regs_reg_alloc, +#ifndef __SOFTFP__ + D0_offset = 0, +#endif + R0_offset = fpu_save_size, + R1_offset, + R2_offset, + R3_offset, + R4_offset, + R5_offset, + R6_offset, +#if (FP_REG_NUM != 7) + R7_offset, +#endif + R8_offset, + R9_offset, + R10_offset, +#if (FP_REG_NUM != 11) + R11_offset, +#endif + R12_offset, + FP_offset, + LR_offset, + reg_save_size, + arg1_offset = reg_save_size * wordSize, + arg2_offset = (reg_save_size + 1) * wordSize +}; + +#endif // AARCH64 + +static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers = HaveVFP) { + sasm->set_frame_size(reg_save_size /* in words */); + + // Record saved value locations in an OopMap. + // Locations are offsets from sp after runtime call. + OopMap* map = new OopMap(VMRegImpl::slots_per_word * reg_save_size, 0); + +#ifdef AARCH64 + for (int i = 0; i < number_of_saved_gprs; i++) { + map->set_callee_saved(VMRegImpl::stack2reg((R0_offset + i) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg()); + } + map->set_callee_saved(VMRegImpl::stack2reg(FP_offset * VMRegImpl::slots_per_word), FP->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(LR_offset * VMRegImpl::slots_per_word), LR->as_VMReg()); + + if (save_fpu_registers) { + for (int i = 0; i < number_of_saved_fprs; i++) { + map->set_callee_saved(VMRegImpl::stack2reg((D0_offset + i) * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg()); + } + } +#else + int j=0; + for (int i = R0_offset; i < R10_offset; i++) { + if (j == FP_REG_NUM) { + // skip the FP register, saved below + j++; + } + map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg()); + j++; + } + assert(j == R10->encoding(), "must be"); +#if (FP_REG_NUM != 11) + // add R11, if not saved as FP + map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg()); +#endif + map->set_callee_saved(VMRegImpl::stack2reg(FP_offset), FP->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(LR_offset), LR->as_VMReg()); + + if (save_fpu_registers) { + for (int i = 0; i < fpu_save_size; i++) { + map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg()); + } + } +#endif // AARCH64 + + return map; +} + +static OopMap* save_live_registers(StubAssembler* sasm, bool save_fpu_registers = HaveVFP) { + __ block_comment("save_live_registers"); + sasm->set_frame_size(reg_save_size /* in words */); + +#ifdef AARCH64 + assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned"); + + __ raw_push(FP, LR); + + __ sub(SP, SP, (reg_save_size - 2) * wordSize); + + for (int i = 0; i < round_down(number_of_saved_gprs, 2); i += 2) { + __ stp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize)); + } + + if (is_odd(number_of_saved_gprs)) { + int i = number_of_saved_gprs - 1; + __ str(as_Register(i), Address(SP, (R0_offset + i) * wordSize)); + } + + if (save_fpu_registers) { + assert (is_even(number_of_saved_fprs), "adjust this code"); + for (int i = 0; i < number_of_saved_fprs; i += 2) { + __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), Address(SP, (D0_offset + i) * wordSize)); + } + } +#else + __ push(RegisterSet(FP) | RegisterSet(LR)); + __ push(RegisterSet(R0, R6) | RegisterSet(R8, R10) | R12 | altFP_7_11); + if (save_fpu_registers) { + __ fstmdbd(SP, FloatRegisterSet(D0, fpu_save_size / 2), writeback); + } else { + __ sub(SP, SP, fpu_save_size * wordSize); + } +#endif // AARCH64 + + return generate_oop_map(sasm, save_fpu_registers); +} + + +static void restore_live_registers(StubAssembler* sasm, + bool restore_R0, + bool restore_FP_LR, + bool do_return, + bool restore_fpu_registers = HaveVFP) { + __ block_comment("restore_live_registers"); + +#ifdef AARCH64 + if (restore_R0) { + __ ldr(R0, Address(SP, R0_offset * wordSize)); + } + + assert(is_odd(number_of_saved_gprs), "adjust this code"); + for (int i = 1; i < number_of_saved_gprs; i += 2) { + __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize)); + } + + if (restore_fpu_registers) { + assert (is_even(number_of_saved_fprs), "adjust this code"); + for (int i = 0; i < number_of_saved_fprs; i += 2) { + __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), Address(SP, (D0_offset + i) * wordSize)); + } + } + + __ add(SP, SP, (reg_save_size - 2) * wordSize); + + if (restore_FP_LR) { + __ raw_pop(FP, LR); + if (do_return) { + __ ret(); + } + } else { + assert (!do_return, "return without restoring FP/LR"); + } +#else + if (restore_fpu_registers) { + __ fldmiad(SP, FloatRegisterSet(D0, fpu_save_size / 2), writeback); + if (!restore_R0) { + __ add(SP, SP, (R1_offset - fpu_save_size) * wordSize); + } + } else { + __ add(SP, SP, (restore_R0 ? fpu_save_size : R1_offset) * wordSize); + } + __ pop(RegisterSet((restore_R0 ? R0 : R1), R6) | RegisterSet(R8, R10) | R12 | altFP_7_11); + if (restore_FP_LR) { + __ pop(RegisterSet(FP) | RegisterSet(do_return ? PC : LR)); + } else { + assert (!do_return, "return without restoring FP/LR"); + } +#endif // AARCH64 +} + + +static void restore_live_registers_except_R0(StubAssembler* sasm, bool restore_fpu_registers = HaveVFP) { + restore_live_registers(sasm, false, true, true, restore_fpu_registers); +} + +static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = HaveVFP) { + restore_live_registers(sasm, true, true, true, restore_fpu_registers); +} + +#ifndef AARCH64 +static void restore_live_registers_except_FP_LR(StubAssembler* sasm, bool restore_fpu_registers = HaveVFP) { + restore_live_registers(sasm, true, false, false, restore_fpu_registers); +} +#endif // !AARCH64 + +static void restore_live_registers_without_return(StubAssembler* sasm, bool restore_fpu_registers = HaveVFP) { + restore_live_registers(sasm, true, true, false, restore_fpu_registers); +} + + +void Runtime1::initialize_pd() { + LIR_Assembler::exception_handler_size = AARCH64_ONLY(256) NOT_AARCH64(68); +#ifndef PRODUCT + if (VerifyOops) LIR_Assembler::exception_handler_size += AARCH64_ONLY(216) NOT_AARCH64(60); +#endif // !PRODUCT +} + + +OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) { + OopMap* oop_map = save_live_registers(sasm); + + if (has_argument) { + __ ldr(R1, Address(SP, arg1_offset)); + } + + int call_offset = __ call_RT(noreg, noreg, target); + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + DEBUG_ONLY(STOP("generate_exception_throw");) // Should not reach here + return oop_maps; +} + + +static void restore_sp_for_method_handle(StubAssembler* sasm) { + // Restore SP from its saved reg (FP) if the exception PC is a MethodHandle call site. + __ ldr_s32(Rtemp, Address(Rthread, JavaThread::is_method_handle_return_offset())); +#ifdef AARCH64 + Label skip; + __ cbz(Rtemp, skip); + __ mov(SP, Rmh_SP_save); + __ bind(skip); +#else + __ cmp(Rtemp, 0); + __ mov(SP, Rmh_SP_save, ne); +#endif // AARCH64 +} + + +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler* sasm) { + __ block_comment("generate_handle_exception"); + + bool save_fpu_registers = false; + + // Save registers, if required. + OopMapSet* oop_maps = new OopMapSet(); + OopMap* oop_map = NULL; + + switch (id) { + case forward_exception_id: { + save_fpu_registers = HaveVFP; + oop_map = generate_oop_map(sasm); + __ ldr(Rexception_obj, Address(Rthread, Thread::pending_exception_offset())); + __ ldr(Rexception_pc, Address(SP, LR_offset * wordSize)); + Register zero = __ zero_register(Rtemp); + __ str(zero, Address(Rthread, Thread::pending_exception_offset())); + break; + } + case handle_exception_id: + save_fpu_registers = HaveVFP; + // fall-through + case handle_exception_nofpu_id: + // At this point all registers MAY be live. + oop_map = save_live_registers(sasm, save_fpu_registers); + break; + case handle_exception_from_callee_id: + // At this point all registers except exception oop (R4/R19) and + // exception pc (R5/R20) are dead. + oop_map = save_live_registers(sasm); // TODO it's not required to save all registers + break; + default: ShouldNotReachHere(); + } + + __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset())); + __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset())); + + __ str(Rexception_pc, Address(SP, LR_offset * wordSize)); // patch throwing pc into return address + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); + oop_maps->add_gc_map(call_offset, oop_map); + + // Exception handler found + __ str(R0, Address(SP, LR_offset * wordSize)); // patch the return address + + // Restore the registers that were saved at the beginning, remove + // frame and jump to the exception handler. + switch (id) { + case forward_exception_id: + case handle_exception_nofpu_id: + case handle_exception_id: + restore_live_registers(sasm, save_fpu_registers); + // Note: the restore live registers includes the jump to LR (patched to R0) + break; + case handle_exception_from_callee_id: + restore_live_registers_without_return(sasm); // must not jump immediatly to handler + restore_sp_for_method_handle(sasm); + __ ret(); + break; + default: ShouldNotReachHere(); + } + + DEBUG_ONLY(STOP("generate_handle_exception");) // Should not reach here + + return oop_maps; +} + + +void Runtime1::generate_unwind_exception(StubAssembler* sasm) { + // FP no longer used to find the frame start + // on entry, remove_frame() has already been called (restoring FP and LR) + + // search the exception handler address of the caller (using the return address) + __ mov(c_rarg0, Rthread); + __ mov(Rexception_pc, LR); + __ mov(c_rarg1, LR); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), c_rarg0, c_rarg1); + + // Exception oop should be still in Rexception_obj and pc in Rexception_pc + // Jump to handler + __ verify_not_null_oop(Rexception_obj); + + // JSR292 extension + restore_sp_for_method_handle(sasm); + + __ jump(R0); +} + + +OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { + OopMap* oop_map = save_live_registers(sasm); + + // call the runtime patching routine, returns non-zero if nmethod got deopted. + int call_offset = __ call_RT(noreg, noreg, target); + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + __ cmp_32(R0, 0); + +#ifdef AARCH64 + Label call_deopt; + + restore_live_registers_without_return(sasm); + __ b(call_deopt, ne); + __ ret(); + + __ bind(call_deopt); +#else + restore_live_registers_except_FP_LR(sasm); + __ pop(RegisterSet(FP) | RegisterSet(PC), eq); + + // Deoptimization needed + // TODO: ARM - no need to restore FP & LR because unpack_with_reexecution() stores them back + __ pop(RegisterSet(FP) | RegisterSet(LR)); +#endif // AARCH64 + + __ jump(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type, Rtemp); + + DEBUG_ONLY(STOP("generate_patching");) // Should not reach here + return oop_maps; +} + + +OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + const bool must_gc_arguments = true; + const bool dont_gc_arguments = false; + + OopMapSet* oop_maps = NULL; + bool save_fpu_registers = HaveVFP; + + switch (id) { + case forward_exception_id: + { + oop_maps = generate_handle_exception(id, sasm); + // does not return on ARM + } + break; + +#if INCLUDE_ALL_GCS + case g1_pre_barrier_slow_id: + { + // Input: + // - pre_val pushed on the stack + + __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments); + + // save at least the registers that need saving if the runtime is called +#ifdef AARCH64 + __ raw_push(R0, R1); + __ raw_push(R2, R3); + const int nb_saved_regs = 4; +#else // AARCH64 + const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR); + const int nb_saved_regs = 6; + assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs"); + __ push(saved_regs); +#endif // AARCH64 + + const Register r_pre_val_0 = R0; // must be R0, to be ready for the runtime call + const Register r_index_1 = R1; + const Register r_buffer_2 = R2; + + Address queue_index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_index())); + Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_buf())); + + Label done; + Label runtime; + + __ ldr(r_index_1, queue_index); + __ ldr(r_pre_val_0, Address(SP, nb_saved_regs*wordSize)); + __ ldr(r_buffer_2, buffer); + + __ subs(r_index_1, r_index_1, wordSize); + __ b(runtime, lt); + + __ str(r_index_1, queue_index); + __ str(r_pre_val_0, Address(r_buffer_2, r_index_1)); + + __ bind(done); + +#ifdef AARCH64 + __ raw_pop(R2, R3); + __ raw_pop(R0, R1); +#else // AARCH64 + __ pop(saved_regs); +#endif // AARCH64 + + __ ret(); + + __ bind(runtime); + + save_live_registers(sasm); + + assert(r_pre_val_0 == c_rarg0, "pre_val should be in R0"); + __ mov(c_rarg1, Rthread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, c_rarg1); + + restore_live_registers_without_return(sasm); + + __ b(done); + } + break; + case g1_post_barrier_slow_id: + { + // Input: + // - store_addr, pushed on the stack + + __ set_info("g1_post_barrier_slow_id", dont_gc_arguments); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = barrier_set_cast(bs); + Label done; + Label recheck; + Label runtime; + + Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + + DirtyCardQueue::byte_offset_of_index())); + Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + + DirtyCardQueue::byte_offset_of_buf())); + + AddressLiteral cardtable((address)ct->byte_map_base); + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + // save at least the registers that need saving if the runtime is called +#ifdef AARCH64 + __ raw_push(R0, R1); + __ raw_push(R2, R3); + const int nb_saved_regs = 4; +#else // AARCH64 + const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR); + const int nb_saved_regs = 6; + assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs"); + __ push(saved_regs); +#endif // AARCH64 + + const Register r_card_addr_0 = R0; // must be R0 for the slow case + const Register r_obj_0 = R0; + const Register r_card_base_1 = R1; + const Register r_tmp2 = R2; + const Register r_index_2 = R2; + const Register r_buffer_3 = R3; + const Register tmp1 = Rtemp; + + __ ldr(r_obj_0, Address(SP, nb_saved_regs*wordSize)); + // Note: there is a comment in x86 code about not using + // ExternalAddress / lea, due to relocation not working + // properly for that address. Should be OK for arm, where we + // explicitly specify that 'cartable' has a relocInfo::none + // type. + __ lea(r_card_base_1, cardtable); + __ add(r_card_addr_0, r_card_base_1, AsmOperand(r_obj_0, lsr, CardTableModRefBS::card_shift)); + + // first quick check without barrier + __ ldrb(r_tmp2, Address(r_card_addr_0)); + + __ cmp(r_tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val()); + __ b(recheck, ne); + + __ bind(done); + +#ifdef AARCH64 + __ raw_pop(R2, R3); + __ raw_pop(R0, R1); +#else // AARCH64 + __ pop(saved_regs); +#endif // AARCH64 + + __ ret(); + + __ bind(recheck); + + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp1); + + // reload card state after the barrier that ensures the stored oop was visible + __ ldrb(r_tmp2, Address(r_card_addr_0)); + + assert(CardTableModRefBS::dirty_card_val() == 0, "adjust this code"); + __ cbz(r_tmp2, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + + assert(0 == (int)CardTableModRefBS::dirty_card_val(), "adjust this code"); + if (((intptr_t)ct->byte_map_base & 0xff) == 0) { + // Card table is aligned so the lowest byte of the table address base is zero. + __ strb(r_card_base_1, Address(r_card_addr_0)); + } else { + __ strb(__ zero_register(r_tmp2), Address(r_card_addr_0)); + } + + __ ldr(r_index_2, queue_index); + __ ldr(r_buffer_3, buffer); + + __ subs(r_index_2, r_index_2, wordSize); + __ b(runtime, lt); // go to runtime if now negative + + __ str(r_index_2, queue_index); + + __ str(r_card_addr_0, Address(r_buffer_3, r_index_2)); + + __ b(done); + + __ bind(runtime); + + save_live_registers(sasm); + + assert(r_card_addr_0 == c_rarg0, "card_addr should be in R0"); + __ mov(c_rarg1, Rthread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), c_rarg0, c_rarg1); + + restore_live_registers_without_return(sasm); + + __ b(done); + } + break; +#endif // INCLUDE_ALL_GCS + case new_instance_id: + case fast_new_instance_id: + case fast_new_instance_init_check_id: + { + const Register result = R0; + const Register klass = R1; + + if (UseTLAB && FastTLABRefill && id != new_instance_id) { + // We come here when TLAB allocation failed. + // In this case we either refill TLAB or allocate directly from eden. + Label retry_tlab, try_eden, slow_case, slow_case_no_pop; + + // Make sure the class is fully initialized + if (id == fast_new_instance_init_check_id) { + __ ldrb(result, Address(klass, InstanceKlass::init_state_offset())); + __ cmp(result, InstanceKlass::fully_initialized); + __ b(slow_case_no_pop, ne); + } + + // Free some temporary registers + const Register obj_size = R4; + const Register tmp1 = R5; + const Register tmp2 = LR; + const Register obj_end = Rtemp; + + __ raw_push(R4, R5, LR); + + __ tlab_refill(result, obj_size, tmp1, tmp2, obj_end, try_eden, slow_case); + + __ bind(retry_tlab); + __ ldr_u32(obj_size, Address(klass, Klass::layout_helper_offset())); + __ tlab_allocate(result, obj_end, tmp1, obj_size, slow_case); // initializes result and obj_end + __ initialize_object(result, obj_end, klass, noreg /* len */, tmp1, tmp2, + instanceOopDesc::header_size() * HeapWordSize, -1, + /* is_tlab_allocated */ true); + __ raw_pop_and_ret(R4, R5); + + __ bind(try_eden); + __ ldr_u32(obj_size, Address(klass, Klass::layout_helper_offset())); + __ eden_allocate(result, obj_end, tmp1, tmp2, obj_size, slow_case); // initializes result and obj_end + __ incr_allocated_bytes(obj_size, tmp2); + __ initialize_object(result, obj_end, klass, noreg /* len */, tmp1, tmp2, + instanceOopDesc::header_size() * HeapWordSize, -1, + /* is_tlab_allocated */ false); + __ raw_pop_and_ret(R4, R5); + + __ bind(slow_case); + __ raw_pop(R4, R5, LR); + + __ bind(slow_case_no_pop); + } + + OopMap* map = save_live_registers(sasm); + int call_offset = __ call_RT(result, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + + // MacroAssembler::StoreStore useless (included in the runtime exit path) + + restore_live_registers_except_R0(sasm); + } + break; + + case counter_overflow_id: + { + OopMap* oop_map = save_live_registers(sasm); + __ ldr(R1, Address(SP, arg1_offset)); + __ ldr(R2, Address(SP, arg2_offset)); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), R1, R2); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + } + break; + + case new_type_array_id: + case new_object_array_id: + { + if (id == new_type_array_id) { + __ set_info("new_type_array", dont_gc_arguments); + } else { + __ set_info("new_object_array", dont_gc_arguments); + } + + const Register result = R0; + const Register klass = R1; + const Register length = R2; + + if (UseTLAB && FastTLABRefill) { + // We come here when TLAB allocation failed. + // In this case we either refill TLAB or allocate directly from eden. + Label retry_tlab, try_eden, slow_case, slow_case_no_pop; + +#ifdef AARCH64 + __ mov_slow(Rtemp, C1_MacroAssembler::max_array_allocation_length); + __ cmp_32(length, Rtemp); +#else + __ cmp_32(length, C1_MacroAssembler::max_array_allocation_length); +#endif // AARCH64 + __ b(slow_case_no_pop, hs); + + // Free some temporary registers + const Register arr_size = R4; + const Register tmp1 = R5; + const Register tmp2 = LR; + const Register tmp3 = Rtemp; + const Register obj_end = tmp3; + + __ raw_push(R4, R5, LR); + + __ tlab_refill(result, arr_size, tmp1, tmp2, tmp3, try_eden, slow_case); + + __ bind(retry_tlab); + // Get the allocation size: round_up((length << (layout_helper & 0xff)) + header_size) + __ ldr_u32(tmp1, Address(klass, Klass::layout_helper_offset())); + __ mov(arr_size, MinObjAlignmentInBytesMask); + __ and_32(tmp2, tmp1, (unsigned int)(Klass::_lh_header_size_mask << Klass::_lh_header_size_shift)); + +#ifdef AARCH64 + __ lslv_w(tmp3, length, tmp1); + __ add(arr_size, arr_size, tmp3); +#else + __ add(arr_size, arr_size, AsmOperand(length, lsl, tmp1)); +#endif // AARCH64 + + __ add(arr_size, arr_size, AsmOperand(tmp2, lsr, Klass::_lh_header_size_shift)); + __ align_reg(arr_size, arr_size, MinObjAlignmentInBytes); + + // tlab_allocate initializes result and obj_end, and preserves tmp2 which contains header_size + __ tlab_allocate(result, obj_end, tmp1, arr_size, slow_case); + + assert_different_registers(result, obj_end, klass, length, tmp1, tmp2); + __ initialize_header(result, klass, length, tmp1); + + __ add(tmp2, result, AsmOperand(tmp2, lsr, Klass::_lh_header_size_shift)); + if (!ZeroTLAB) { + __ initialize_body(tmp2, obj_end, tmp1); + } + + __ membar(MacroAssembler::StoreStore, tmp1); + + __ raw_pop_and_ret(R4, R5); + + __ bind(try_eden); + // Get the allocation size: round_up((length << (layout_helper & 0xff)) + header_size) + __ ldr_u32(tmp1, Address(klass, Klass::layout_helper_offset())); + __ mov(arr_size, MinObjAlignmentInBytesMask); + __ and_32(tmp2, tmp1, (unsigned int)(Klass::_lh_header_size_mask << Klass::_lh_header_size_shift)); + +#ifdef AARCH64 + __ lslv_w(tmp3, length, tmp1); + __ add(arr_size, arr_size, tmp3); +#else + __ add(arr_size, arr_size, AsmOperand(length, lsl, tmp1)); +#endif // AARCH64 + + __ add(arr_size, arr_size, AsmOperand(tmp2, lsr, Klass::_lh_header_size_shift)); + __ align_reg(arr_size, arr_size, MinObjAlignmentInBytes); + + // eden_allocate destroys tmp2, so reload header_size after allocation + // eden_allocate initializes result and obj_end + __ eden_allocate(result, obj_end, tmp1, tmp2, arr_size, slow_case); + __ incr_allocated_bytes(arr_size, tmp2); + __ ldrb(tmp2, Address(klass, in_bytes(Klass::layout_helper_offset()) + + Klass::_lh_header_size_shift / BitsPerByte)); + __ initialize_object(result, obj_end, klass, length, tmp1, tmp2, tmp2, -1, /* is_tlab_allocated */ false); + __ raw_pop_and_ret(R4, R5); + + __ bind(slow_case); + __ raw_pop(R4, R5, LR); + __ bind(slow_case_no_pop); + } + + OopMap* map = save_live_registers(sasm); + int call_offset; + if (id == new_type_array_id) { + call_offset = __ call_RT(result, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); + } else { + call_offset = __ call_RT(result, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); + } + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + + // MacroAssembler::StoreStore useless (included in the runtime exit path) + + restore_live_registers_except_R0(sasm); + } + break; + + case new_multi_array_id: + { + __ set_info("new_multi_array", dont_gc_arguments); + + // R0: klass + // R2: rank + // SP: address of 1st dimension + const Register result = R0; + OopMap* map = save_live_registers(sasm); + + __ mov(R1, R0); + __ add(R3, SP, arg1_offset); + int call_offset = __ call_RT(result, noreg, CAST_FROM_FN_PTR(address, new_multi_array), R1, R2, R3); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + + // MacroAssembler::StoreStore useless (included in the runtime exit path) + + restore_live_registers_except_R0(sasm); + } + break; + + case register_finalizer_id: + { + __ set_info("register_finalizer", dont_gc_arguments); + + // Do not call runtime if JVM_ACC_HAS_FINALIZER flag is not set + __ load_klass(Rtemp, R0); + __ ldr_u32(Rtemp, Address(Rtemp, Klass::access_flags_offset())); + +#ifdef AARCH64 + Label L; + __ tbnz(Rtemp, exact_log2(JVM_ACC_HAS_FINALIZER), L); + __ ret(); + __ bind(L); +#else + __ tst(Rtemp, JVM_ACC_HAS_FINALIZER); + __ bx(LR, eq); +#endif // AARCH64 + + // Call VM + OopMap* map = save_live_registers(sasm); + oop_maps = new OopMapSet(); + int call_offset = __ call_RT(noreg, noreg, + CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), R0); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + } + break; + + case throw_range_check_failed_id: + { + __ set_info("range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; + + case throw_index_exception_id: + { + __ set_info("index_range_check_failed", dont_gc_arguments); +#ifdef AARCH64 + __ NOT_TESTED(); +#endif + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_div0_exception_id: + { + __ set_info("throw_div0_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: + { + __ set_info("throw_null_pointer_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; + + case handle_exception_nofpu_id: + case handle_exception_id: + { + __ set_info("handle_exception", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case handle_exception_from_callee_id: + { + __ set_info("handle_exception_from_callee", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case unwind_exception_id: + { + __ set_info("unwind_exception", dont_gc_arguments); + generate_unwind_exception(sasm); + } + break; + + case throw_array_store_exception_id: + { + __ set_info("throw_array_store_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); + } + break; + + case throw_class_cast_exception_id: + { + __ set_info("throw_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { + __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments); +#ifdef AARCH64 + __ NOT_TESTED(); +#endif + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } + break; + + case slow_subtype_check_id: + { + // (in) R0 - sub, destroyed, + // (in) R1 - super, not changed + // (out) R0 - result: 1 if check passed, 0 otherwise + __ raw_push(R2, R3, LR); + + // Load an array of secondary_supers + __ ldr(R2, Address(R0, Klass::secondary_supers_offset())); + // Length goes to R3 + __ ldr_s32(R3, Address(R2, Array::length_offset_in_bytes())); + __ add(R2, R2, Array::base_offset_in_bytes()); + + Label loop, miss; + __ bind(loop); + __ cbz(R3, miss); + __ ldr(LR, Address(R2, wordSize, post_indexed)); + __ sub(R3, R3, 1); + __ cmp(LR, R1); + __ b(loop, ne); + + // We get here if an equal cache entry is found + __ str(R1, Address(R0, Klass::secondary_super_cache_offset())); + __ mov(R0, 1); + __ raw_pop_and_ret(R2, R3); + + // A cache entry not found - return false + __ bind(miss); + __ mov(R0, 0); + __ raw_pop_and_ret(R2, R3); + } + break; + + case monitorenter_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorenter_id: + { + __ set_info("monitorenter", dont_gc_arguments); + const Register obj = R1; + const Register lock = R2; + OopMap* map = save_live_registers(sasm, save_fpu_registers); + __ ldr(obj, Address(SP, arg1_offset)); + __ ldr(lock, Address(SP, arg2_offset)); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), obj, lock); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case monitorexit_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorexit_id: + { + __ set_info("monitorexit", dont_gc_arguments); + const Register lock = R1; + OopMap* map = save_live_registers(sasm, save_fpu_registers); + __ ldr(lock, Address(SP, arg1_offset)); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), lock); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case deoptimize_id: + { + __ set_info("deoptimize", dont_gc_arguments); + OopMap* oop_map = save_live_registers(sasm); + const Register trap_request = R1; + __ ldr(trap_request, Address(SP, arg1_offset)); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), trap_request); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers_without_return(sasm); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + __ jump(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type, AARCH64_ONLY(Rtemp) NOT_AARCH64(noreg)); + } + break; + + case access_field_patching_id: + { + __ set_info("access_field_patching", dont_gc_arguments); + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } + break; + + case load_klass_patching_id: + { + __ set_info("load_klass_patching", dont_gc_arguments); + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } + break; + + case load_appendix_patching_id: + { + __ set_info("load_appendix_patching", dont_gc_arguments); + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } + break; + + case load_mirror_patching_id: + { + __ set_info("load_mirror_patching", dont_gc_arguments); + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } + break; + + case predicate_failed_trap_id: + { + __ set_info("predicate_failed_trap", dont_gc_arguments); + + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + restore_live_registers_without_return(sasm); + + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + __ jump(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type, Rtemp); + } + break; + + default: + { + __ set_info("unimplemented entry", dont_gc_arguments); + STOP("unimplemented entry"); + } + break; + } + return oop_maps; +} + +#undef __ + +#ifdef __SOFTFP__ +const char *Runtime1::pd_name_for_address(address entry) { + +#define FUNCTION_CASE(a, f) \ + if ((intptr_t)a == CAST_FROM_FN_PTR(intptr_t, f)) return #f + + FUNCTION_CASE(entry, __aeabi_fadd_glibc); + FUNCTION_CASE(entry, __aeabi_fmul); + FUNCTION_CASE(entry, __aeabi_fsub_glibc); + FUNCTION_CASE(entry, __aeabi_fdiv); + + // __aeabi_XXXX_glibc: Imported code from glibc soft-fp bundle for calculation accuracy improvement. See CR 6757269. + FUNCTION_CASE(entry, __aeabi_dadd_glibc); + FUNCTION_CASE(entry, __aeabi_dmul); + FUNCTION_CASE(entry, __aeabi_dsub_glibc); + FUNCTION_CASE(entry, __aeabi_ddiv); + + FUNCTION_CASE(entry, __aeabi_f2d); + FUNCTION_CASE(entry, __aeabi_d2f); + FUNCTION_CASE(entry, __aeabi_i2f); + FUNCTION_CASE(entry, __aeabi_i2d); + FUNCTION_CASE(entry, __aeabi_f2iz); + + FUNCTION_CASE(entry, SharedRuntime::fcmpl); + FUNCTION_CASE(entry, SharedRuntime::fcmpg); + FUNCTION_CASE(entry, SharedRuntime::dcmpl); + FUNCTION_CASE(entry, SharedRuntime::dcmpg); + + FUNCTION_CASE(entry, SharedRuntime::unordered_fcmplt); + FUNCTION_CASE(entry, SharedRuntime::unordered_dcmplt); + FUNCTION_CASE(entry, SharedRuntime::unordered_fcmple); + FUNCTION_CASE(entry, SharedRuntime::unordered_dcmple); + FUNCTION_CASE(entry, SharedRuntime::unordered_fcmpge); + FUNCTION_CASE(entry, SharedRuntime::unordered_dcmpge); + FUNCTION_CASE(entry, SharedRuntime::unordered_fcmpgt); + FUNCTION_CASE(entry, SharedRuntime::unordered_dcmpgt); + + FUNCTION_CASE(entry, SharedRuntime::fneg); + FUNCTION_CASE(entry, SharedRuntime::dneg); + + FUNCTION_CASE(entry, __aeabi_fcmpeq); + FUNCTION_CASE(entry, __aeabi_fcmplt); + FUNCTION_CASE(entry, __aeabi_fcmple); + FUNCTION_CASE(entry, __aeabi_fcmpge); + FUNCTION_CASE(entry, __aeabi_fcmpgt); + + FUNCTION_CASE(entry, __aeabi_dcmpeq); + FUNCTION_CASE(entry, __aeabi_dcmplt); + FUNCTION_CASE(entry, __aeabi_dcmple); + FUNCTION_CASE(entry, __aeabi_dcmpge); + FUNCTION_CASE(entry, __aeabi_dcmpgt); +#undef FUNCTION_CASE + return ""; +} +#else // __SOFTFP__ +const char *Runtime1::pd_name_for_address(address entry) { + return ""; +} +#endif // __SOFTFP__ --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c1_globals_arm.hpp 2016-12-02 11:19:22.009152127 -0500 @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_C1_GLOBALS_ARM_HPP +#define CPU_ARM_VM_C1_GLOBALS_ARM_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) +// + +#ifndef COMPILER2 // avoid duplicated definitions, favoring C2 version +define_pd_global(bool, BackgroundCompilation, true ); +define_pd_global(bool, UseTLAB, true ); +define_pd_global(bool, ResizeTLAB, true ); +define_pd_global(bool, InlineIntrinsics, false); // TODO: ARM +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); +define_pd_global(bool, UseOnStackReplacement, true ); +define_pd_global(bool, TieredCompilation, false); +define_pd_global(intx, CompileThreshold, 1500 ); + +define_pd_global(intx, OnStackReplacePercentage, 933 ); +define_pd_global(intx, FreqInlineSize, 325 ); +define_pd_global(size_t, NewSizeThreadIncrease, 4*K ); +define_pd_global(size_t, InitialCodeCacheSize, 160*K); +define_pd_global(size_t, ReservedCodeCacheSize, 32*M ); +define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M ); +define_pd_global(size_t, ProfiledCodeHeapSize, 14*M ); +define_pd_global(size_t, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(bool, ProfileInterpreter, false); +define_pd_global(size_t, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K); +define_pd_global(size_t, MetaspaceSize, 12*M ); +define_pd_global(bool, NeverActAsServerClassMachine, true); +define_pd_global(uint64_t, MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); +#endif // COMPILER2 +define_pd_global(bool, UseTypeProfile, false); +define_pd_global(bool, RoundFPResults, false); + + +define_pd_global(bool, LIRFillDelaySlots, false); +define_pd_global(bool, OptimizeSinglePrecision, true); +define_pd_global(bool, CSEArrayLength, true); +define_pd_global(bool, TwoOperandLIRForm, false); + +#endif // CPU_ARM_VM_C1_GLOBALS_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/c2_globals_arm.hpp 2016-12-02 11:19:27.641471534 -0500 @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_C2_GLOBALS_ARM_HPP +#define CPU_ARM_VM_C2_GLOBALS_ARM_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// +// Sets the default values for platform dependent flags used by the server compiler. +// (see c2_globals.hpp). Alpha-sorted. + +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, false); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +define_pd_global(bool, ProfileInterpreter, true); +#ifdef AARCH64 +define_pd_global(bool, TieredCompilation, trueInTiered); +#else +define_pd_global(bool, TieredCompilation, false); +#endif +define_pd_global(intx, CompileThreshold, 10000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 4); +// C2 gets to use all the float/double registers +#ifdef AARCH64 +define_pd_global(intx, FLOATPRESSURE, 31); +#else +define_pd_global(intx, FLOATPRESSURE, 30); +#endif +define_pd_global(intx, FreqInlineSize, 175); +#ifdef AARCH64 +define_pd_global(intx, INTPRESSURE, 27); +#else +define_pd_global(intx, INTPRESSURE, 12); +#endif +define_pd_global(intx, InteriorEntryAlignment, 16); // = CodeEntryAlignment +define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +// The default setting 16/16 seems to work best. +// (For _228_jack 16/16 is 2% better than 4/4, 16/4, 32/32, 32/16, or 16/32.) +//define_pd_global(intx, OptoLoopAlignment, 16); // = 4*wordSize +define_pd_global(intx, RegisterCostAreaRatio, 16000); +define_pd_global(bool, UseTLAB, true); +define_pd_global(bool, ResizeTLAB, true); +define_pd_global(intx, LoopUnrollLimit, 60); // Design center runs on 1.3.1 +define_pd_global(intx, LoopPercentProfileLimit, 10); +define_pd_global(intx, PostLoopMultiversioning, false); +define_pd_global(intx, MinJumpTableSize, 16); + +// Peephole and CISC spilling both break the graph, and so makes the +// scheduler sick. +define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, UseCISCSpill, false); +define_pd_global(bool, OptoBundling, false); +define_pd_global(bool, OptoScheduling, true); +define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); +define_pd_global(bool, IdealizeClearArrayNode, true); + +#ifdef _LP64 +// We need to make sure that all generated code is within +// 2 gigs of the libjvm.so runtime routines so we can use +// the faster "call" instruction rather than the expensive +// sequence of instructions to load a 64 bit pointer. +// +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(size_t, InitialCodeCacheSize, 2048*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(size_t, ReservedCodeCacheSize, 48*M); +define_pd_global(size_t, NonProfiledCodeHeapSize, 21*M); +define_pd_global(size_t, ProfiledCodeHeapSize, 22*M); +define_pd_global(size_t, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(size_t, CodeCacheExpansionSize, 64*K); + +// Ergonomics related flags +define_pd_global(uint64_t, MaxRAM, 128ULL*G); +#else +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(size_t, InitialCodeCacheSize, 1536*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(size_t, ReservedCodeCacheSize, 32*M); +define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M); +define_pd_global(size_t, ProfiledCodeHeapSize, 14*M); +define_pd_global(size_t, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(size_t, CodeCacheExpansionSize, 32*K); +// Ergonomics related flags +define_pd_global(uint64_t, MaxRAM, 4ULL*G); +#endif +define_pd_global(uintx, CodeCacheMinBlockLength, 4); +define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K); + +define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed + +// Heap related flags +define_pd_global(size_t, MetaspaceSize, ScaleForWordSize(16*M)); + +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + +#endif // CPU_ARM_VM_C2_GLOBALS_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/codeBuffer_arm.hpp 2016-12-02 11:19:34.037834270 -0500 @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_CODEBUFFER_ARM_HPP +#define CPU_ARM_VM_CODEBUFFER_ARM_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_ARM_VM_CODEBUFFER_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/compiledIC_arm.cpp 2016-12-02 11:19:40.826219237 -0500 @@ -0,0 +1,166 @@ +/* + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nativeInst.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" + +// ---------------------------------------------------------------------------- +#if defined(COMPILER2) || INCLUDE_JVMCI +#define __ _masm. +// emit call stub, compiled java to interpreter +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { + // Stub is fixed up when the corresponding call is converted from calling + // compiled code to calling interpreted code. + // set (empty), R9 + // b -1 + + if (mark == NULL) { + mark = cbuf.insts_mark(); // get mark within main instrs section + } + + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(to_interp_stub_size()); + if (base == NULL) { + return NULL; // CodeBuffer::expand failed + } + + // static stub relocation stores the instruction address of the call + __ relocate(static_stub_Relocation::spec(mark)); + + InlinedMetadata object_literal(NULL); + // single instruction, see NativeMovConstReg::next_instruction_address() in + // CompiledStaticCall::set_to_interpreted() + __ ldr_literal(Rmethod, object_literal); + + __ set_inst_mark(); // Who uses this? + + bool near_range = __ cache_fully_reachable(); + InlinedAddress dest((address)-1); + address branch_site = __ pc(); + if (near_range) { + __ b(branch_site); // special NativeJump -1 destination + } else { + // Can't trash LR, FP, or argument registers + __ indirect_jump(dest, Rtemp); + } + __ bind_literal(object_literal); // includes spec_for_immediate reloc + if (!near_range) { + __ bind_literal(dest); // special NativeJump -1 destination + } + + assert(__ pc() - base <= to_interp_stub_size(), "wrong stub size"); + + // Update current stubs pointer and restore code_end. + __ end_a_stub(); + return base; +} +#undef __ + +// size of C2 call stub, compiled java to interpretor +int CompiledStaticCall::to_interp_stub_size() { + return 8 * NativeInstruction::instruction_size; +} + +// Relocation entries for call stub, compiled java to interpreter. +int CompiledStaticCall::reloc_to_interp_stub() { + return 10; // 4 in emit_to_interp_stub + 1 in Java_Static_Call +} +#endif // COMPILER2 || JVMCI + +void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) { + address stub = find_stub(); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + p2i(instruction_address()), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + +#ifdef ASSERT + // read the value once + volatile intptr_t data = method_holder->data(); + volatile address destination = jump->jump_destination(); + assert(data == 0 || data == (intptr_t)callee(), + "a) MT-unsafe modification of inline cache"); + assert(destination == (address)-1 || destination == entry, + "b) MT-unsafe modification of inline cache"); +#endif + + // Update stub. + method_holder->set_data((intptr_t)callee()); + jump->set_jump_destination(entry); + + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + method_holder->set_data(0); + jump->set_jump_destination((address)-1); +} + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledStaticCall::verify() { + // Verify call. + NativeCall::verify(); + if (os::is_MP()) { + verify_alignment(); + } + + // Verify stub. + address stub = find_stub(); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/copy_arm.hpp 2016-12-02 11:19:46.310530250 -0500 @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_COPY_ARM_HPP +#define CPU_ARM_VM_COPY_ARM_HPP + +#include "utilities/macros.hpp" + +// Inline functions for memory copy and fill. + +// Contains inline asm implementations +#include OS_CPU_HEADER_INLINE(copy) + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + juint* to = (juint*)tohw; + count *= HeapWordSize / BytesPerInt; + while (count-- > 0) { + *to++ = value; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + pd_fill_to_words(tohw, count, value); +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + memset(to, 0, count); +} + +#endif // CPU_ARM_VM_COPY_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/debug_arm.cpp 2016-12-02 11:19:51.722837178 -0500 @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/codeCache.hpp" +#include "code/nmethod.hpp" +#include "runtime/frame.hpp" +#include "runtime/init.hpp" +#include "runtime/os.hpp" +#include "utilities/debug.hpp" + +void pd_ps(frame f) {} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/depChecker_arm.cpp 2016-12-02 11:19:57.247150459 -0500 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "compiler/disassembler.hpp" +#include "depChecker_arm.hpp" + +// Nothing to do --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/depChecker_arm.hpp 2016-12-02 11:20:02.907471453 -0500 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_DEPCHECKER_ARM_HPP +#define CPU_ARM_VM_DEPCHECKER_ARM_HPP + +// Nothing to do + +#endif // CPU_ARM_VM_DEPCHECKER_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/disassembler_arm.hpp 2016-12-02 11:20:08.591793808 -0500 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_DISASSEMBLER_ARM_HPP +#define CPU_ARM_VM_DISASSEMBLER_ARM_HPP + + static int pd_instruction_alignment() { + return sizeof(int); + } + + static const char* pd_cpu_opts() { + return ""; + } + +#endif // CPU_ARM_VM_DISASSEMBLER_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/frame_arm.cpp 2016-12-02 11:20:14.332119339 -0500 @@ -0,0 +1,655 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_arm.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#include "runtime/vframeArray.hpp" +#endif +#include "prims/methodHandles.hpp" + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif + + +// Profiling/safepoint support + +bool frame::safe_for_sender(JavaThread *thread) { + address sp = (address)_sp; + address fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + static size_t stack_guard_size = os::uses_stack_guard_pages() ? + (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0; + size_t usable_stack_size = thread->stack_size() - stack_guard_size; + + // sp must be within the usable part of the stack (not in guards) + bool sp_safe = (sp != NULL && + (sp <= thread->stack_base()) && + (sp >= thread->stack_base() - usable_stack_size)); + + if (!sp_safe) { + return false; + } + + bool unextended_sp_safe = (unextended_sp != NULL && + (unextended_sp <= thread->stack_base()) && + (unextended_sp >= sp)); + if (!unextended_sp_safe) { + return false; + } + + // We know sp/unextended_sp are safe. Only fp is questionable here. + + bool fp_safe = (fp != NULL && + (fp <= thread->stack_base()) && + fp >= sp); + + if (_cb != NULL ) { + + // First check if frame is complete and tester is reliable + // Unfortunately we can only check frame complete for runtime stubs and nmethod + // other generic buffer blobs are more problematic so we just assume they are + // ok. adapter blobs never have a frame complete and are never ok. + + if (!_cb->is_frame_complete_at(_pc)) { + if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { + return false; + } + } + + // Could just be some random pointer within the codeBlob + if (!_cb->code_contains(_pc)) { + return false; + } + + // Entry frame checks + if (is_entry_frame()) { + // an entry frame must have a valid fp. + return fp_safe && is_entry_frame_valid(thread); + } + + intptr_t* sender_sp = NULL; + address sender_pc = NULL; + + if (is_interpreted_frame()) { + // fp must be safe + if (!fp_safe) { + return false; + } + + sender_pc = (address) this->fp()[return_addr_offset]; + sender_sp = (intptr_t*) addr_at(sender_sp_offset); + + } else { + // must be some sort of compiled/runtime frame + // fp does not have to be safe (although it could be check for c1?) + + sender_sp = _unextended_sp + _cb->frame_size(); + // Is sender_sp safe? + if ((address)sender_sp >= thread->stack_base()) { + return false; + } + // With our calling conventions, the return_address should + // end up being the word on the stack + sender_pc = (address) *(sender_sp - sender_sp_offset + return_addr_offset); + } + + // We must always be able to find a recognizable pc + CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); + if (sender_pc == NULL || sender_blob == NULL) { + return false; + } + + + // If the potential sender is the interpreter then we can do some more checking + if (Interpreter::contains(sender_pc)) { + + // FP is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP + // is really a frame pointer. + + intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset + link_offset); + bool saved_fp_safe = ((address)saved_fp <= thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, saved_fp, sender_pc); + + return sender.is_interpreted_frame_valid(thread); + } + + if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { + return false; + } + + // Could just be some random pointer within the codeBlob + if (!sender_blob->code_contains(sender_pc)) { + return false; + } + + // We should never be able to see an adapter if the current frame is something from code cache + if (sender_blob->is_adapter_blob()) { + return false; + } + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { + intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset + link_offset); + bool saved_fp_safe = ((address)saved_fp <= thread->stack_base()) && (saved_fp >= sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, saved_fp, sender_pc); + + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + bool jcw_safe = (jcw <= thread->stack_base()) && (jcw > (address)sender.fp()); + + return jcw_safe; + } + + // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size + // because the return address counts against the callee's frame. + + if (sender_blob->frame_size() <= 0) { + assert(!sender_blob->is_compiled(), "should count return address at least"); + return false; + } + + // We should never be able to see anything here except an nmethod. If something in the + // code cache (current frame) is called by an entity within the code cache that entity + // should not be anything but the call stub (already covered), the interpreter (already covered) + // or an nmethod. + + if (!sender_blob->is_compiled()) { + return false; + } + + // Could put some more validation for the potential non-interpreted sender + // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... + + // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb + + // We've validated the potential sender that would be created + return true; + } + + // Must be native-compiled frame. Since sender will try and use fp to find + // linkages it must be safe + + if (!fp_safe) { + return false; + } + + // Will the pc we fetch be non-zero (which we'll find at the oldest frame) + + if ((address) this->fp()[return_addr_offset] == NULL) return false; + + + // could try and do some more potential verification of native frame if we could think of some... + + return true; +} + + +void frame::patch_pc(Thread* thread, address pc) { + address* pc_addr = &((address *)sp())[-sender_sp_offset+return_addr_offset]; + if (TracePcPatching) { + tty->print_cr("patch_pc at address" INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "] ", + p2i(pc_addr), p2i(*pc_addr), p2i(pc)); + } + *pc_addr = pc; + _cb = CodeCache::find_blob(pc); + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); + _deopt_state = is_deoptimized; + // leave _pc as is + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +int frame::frame_size(RegisterMap* map) const { + frame sender = this->sender(map); + return sender.sp() - sp(); +} + +intptr_t* frame::entry_frame_argument_at(int offset) const { + assert(is_entry_frame(), "entry frame expected"); + // convert offset to index to deal with tsi + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + // Entry frame's arguments are always in relation to unextended_sp() + return &unextended_sp()[index]; +} + +// sender_sp +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + return (intptr_t*) at(interpreter_frame_sender_sp_offset); +} + +void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { + assert(is_interpreted_frame(), "interpreted frame expected"); + ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); +} + + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); + // make sure the pointer points inside the frame + assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); + assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); + return result; +} + +void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { + *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; +} + +#ifdef AARCH64 + +// Used by template based interpreter deoptimization +void frame::interpreter_frame_set_stack_top(intptr_t* stack_top) { + *((intptr_t**)addr_at(interpreter_frame_stack_top_offset)) = stack_top; +} + +// Used by template based interpreter deoptimization +void frame::interpreter_frame_set_extended_sp(intptr_t* sp) { + *((intptr_t**)addr_at(interpreter_frame_extended_sp_offset)) = sp; +} + +#else + +// Used by template based interpreter deoptimization +void frame::interpreter_frame_set_last_sp(intptr_t* sp) { + *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; +} + +#endif // AARCH64 + +frame frame::sender_for_entry_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); +#ifdef AARCH64 + assert (jfa->last_Java_pc() != NULL, "pc should be stored"); + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); + return fr; +#else + if (jfa->last_Java_pc() != NULL) { + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); + return fr; + } + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); + return fr; +#endif // AARCH64 +} + +//------------------------------------------------------------------------------ +// frame::verify_deopt_original_pc +// +// Verifies the calculated original PC of a deoptimization PC for the +// given unextended SP. The unextended SP might also be the saved SP +// for MethodHandle call sites. +#ifdef ASSERT +void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) { + frame fr; + + // This is ugly but it's better than to change {get,set}_original_pc + // to take an SP value as argument. And it's only a debugging + // method anyway. + fr._unextended_sp = unextended_sp; + + address original_pc = nm->get_original_pc(&fr); + assert(nm->insts_contains(original_pc), "original PC must be in nmethod"); + assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be"); +} +#endif + +//------------------------------------------------------------------------------ +// frame::adjust_unextended_sp +void frame::adjust_unextended_sp() { + // same as on x86 + + // If we are returning to a compiled MethodHandle call site, the + // saved_fp will in fact be a saved value of the unextended SP. The + // simplest way to tell whether we are returning to such a call site + // is as follows: + + CompiledMethod* sender_cm = (_cb == NULL) ? NULL : _cb->as_compiled_method_or_null(); + if (sender_cm != NULL) { + // If the sender PC is a deoptimization point, get the original + // PC. For MethodHandle call site the unextended_sp is stored in + // saved_fp. + if (sender_cm->is_deopt_mh_entry(_pc)) { + DEBUG_ONLY(verify_deopt_mh_original_pc(sender_cm, _fp)); + _unextended_sp = _fp; + } + else if (sender_cm->is_deopt_entry(_pc)) { + DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); + } + else if (sender_cm->is_method_handle_return(_pc)) { + _unextended_sp = _fp; + } + } +} + +//------------------------------------------------------------------------------ +// frame::update_map_with_saved_link +void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { + // see x86 for comments + map->set_location(FP->as_VMReg(), (address) link_addr); +#ifdef AARCH64 + // also adjust a high part of register + map->set_location(FP->as_VMReg()->next(), (address) link_addr); +#endif // AARCH64 +} + +frame frame::sender_for_interpreter_frame(RegisterMap* map) const { + // SP is the raw SP from the sender after adapter or interpreter + // extension. + intptr_t* sender_sp = this->sender_sp(); + + // This is the sp before any possible extension (adapter/locals). + intptr_t* unextended_sp = interpreter_frame_sender_sp(); + +#ifdef COMPILER2 + if (map->update_map()) { + update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); + } +#endif // COMPILER2 + + return frame(sender_sp, unextended_sp, link(), sender_pc()); +} + +frame frame::sender_for_compiled_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + + // frame owned by optimizing compiler + assert(_cb->frame_size() >= 0, "must have non-zero frame size"); + intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); + intptr_t* unextended_sp = sender_sp; + + address sender_pc = (address) *(sender_sp - sender_sp_offset + return_addr_offset); + + // This is the saved value of FP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame (or C1?). + intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - sender_sp_offset + link_offset); + + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + + // Since the prolog does the save and restore of FP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + update_map_with_saved_link(map, saved_fp_addr); + } + + assert(sender_sp != sp(), "must have changed"); + return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); +} + +frame frame::sender(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map->set_include_argument_oops(false); + + if (is_entry_frame()) return sender_for_entry_frame(map); + if (is_interpreted_frame()) return sender_for_interpreter_frame(map); + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + + assert(false, "should not be called for a C frame"); + return frame(); +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks + if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { + return false; + } + if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { + return false; + } + if (fp() + interpreter_frame_initial_sp_offset < sp()) { + return false; + } + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (fp() <= sp()) { // this attempts to deal with unsigned comparison above + return false; + } + // do some validation of frame elements + + // first the method + + Method* m = *interpreter_frame_method_addr(); + + // validate the method we'd find in this potential sender + if (!m->is_valid_method()) return false; + + // stack frames shouldn't be much larger than max_stack elements + + if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { + return false; + } + + // validate bci/bcp + + address bcp = interpreter_frame_bcp(); + if (m->validate_bci_from_bcp(bcp) < 0) { + return false; + } + + // validate ConstantPoolCache* + ConstantPoolCache* cp = *interpreter_frame_cache_addr(); + if (cp == NULL || !cp->is_metaspace_object()) return false; + + // validate locals + + address locals = (address) *interpreter_frame_locals_addr(); + + if (locals > thread->stack_base() || locals < (address) fp()) return false; + + // We'd have to be pretty unlucky to be mislead at this point + + return true; +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + + intptr_t* res_addr; + if (method->is_native()) { + // Prior to calling into the runtime to report the method_exit both of + // the possible return value registers are saved. +#ifdef AARCH64 + // Return value registers are saved into the frame + if (type == T_FLOAT || type == T_DOUBLE) { + res_addr = addr_at(interpreter_frame_fp_saved_result_offset); + } else { + res_addr = addr_at(interpreter_frame_gp_saved_result_offset); + } +#else + // Return value registers are pushed to the native stack + res_addr = (intptr_t*)sp(); +#ifdef __ABI_HARD__ + // FP result is pushed onto a stack along with integer result registers + if (type == T_FLOAT || type == T_DOUBLE) { + res_addr += 2; + } +#endif // __ABI_HARD__ +#endif // AARCH64 + } else { + res_addr = (intptr_t*)interpreter_frame_tos_address(); + } + + switch (type) { + case T_OBJECT : + case T_ARRAY : { + oop obj; + if (method->is_native()) { + obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); + } else { + obj = *(oop*)res_addr; + } + assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + break; + } + case T_BOOLEAN : value_result->z = *(jboolean*)res_addr; break; + case T_BYTE : value_result->b = *(jbyte*)res_addr; break; + case T_CHAR : value_result->c = *(jchar*)res_addr; break; + case T_SHORT : value_result->s = *(jshort*)res_addr; break; + case T_INT : value_result->i = *(jint*)res_addr; break; + case T_LONG : value_result->j = *(jlong*)res_addr; break; + case T_FLOAT : value_result->f = *(jfloat*)res_addr; break; + case T_DOUBLE : value_result->d = *(jdouble*)res_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + + return type; +} + + +intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + return &interpreter_frame_tos_address()[index]; +} + +#ifndef PRODUCT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(frame_no, fp() + frame::name##_offset, #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); +#ifdef AARCH64 + DESCRIBE_FP_OFFSET(interpreter_frame_stack_top); + DESCRIBE_FP_OFFSET(interpreter_frame_extended_sp); +#else + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); +#endif // AARCH64 + DESCRIBE_FP_OFFSET(interpreter_frame_method); + DESCRIBE_FP_OFFSET(interpreter_frame_mdp); + DESCRIBE_FP_OFFSET(interpreter_frame_cache); + DESCRIBE_FP_OFFSET(interpreter_frame_locals); + DESCRIBE_FP_OFFSET(interpreter_frame_bcp); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + } +} + +// This is a generic constructor which is only used by pns() in debug.cpp. +frame::frame(void* sp, void* fp, void* pc) { + init((intptr_t*)sp, (intptr_t*)fp, (address)pc); +} +#endif + +intptr_t *frame::initial_deoptimization_info() { + // used to reset the saved FP + return fp(); +} + +intptr_t* frame::real_fp() const { +#ifndef AARCH64 + if (is_entry_frame()) { + // Work-around: FP (currently) does not conform to the ABI for entry + // frames (see generate_call_stub). Might be worth fixing as another CR. + // Following code assumes (and asserts) this has not yet been fixed. + assert(frame::entry_frame_call_wrapper_offset == 0, "adjust this code"); + intptr_t* new_fp = fp(); + new_fp += 5; // saved R0,R1,R2,R4,R10 +#ifndef __SOFTFP__ + new_fp += 8*2; // saved D8..D15 +#endif + return new_fp; + } +#endif // !AARCH64 + if (_cb != NULL) { + // use the frame size if valid + int size = _cb->frame_size(); + if (size > 0) { + return unextended_sp() + size; + } + } + // else rely on fp() + assert(! is_compiled_frame(), "unknown compiled frame size"); + return fp(); +} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/frame_arm.hpp 2016-12-02 11:20:19.960438520 -0500 @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_FRAME_ARM_HPP +#define CPU_ARM_VM_FRAME_ARM_HPP + +#include "runtime/synchronizer.hpp" + + public: + enum { + pc_return_offset = 0, + // All frames + link_offset = 0, + return_addr_offset = 1, + // non-interpreter frames + sender_sp_offset = 2, + + // Interpreter frames +#ifdef AARCH64 + interpreter_frame_gp_saved_result_offset = 4, // for native calls only + interpreter_frame_fp_saved_result_offset = 3, // for native calls only +#endif + interpreter_frame_oop_temp_offset = 2, // for native calls only + + interpreter_frame_sender_sp_offset = -1, +#ifdef AARCH64 + interpreter_frame_stack_top_offset = interpreter_frame_sender_sp_offset - 1, + interpreter_frame_extended_sp_offset = interpreter_frame_stack_top_offset - 1, + interpreter_frame_method_offset = interpreter_frame_extended_sp_offset - 1, +#else + // outgoing sp before a call to an invoked method + interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, + interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, +#endif // AARCH64 + interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1, + interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1, + interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1, + interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1, + interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1, + interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, + + interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, + interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, + + // Entry frames + entry_frame_call_wrapper_offset = AARCH64_ONLY(2) NOT_AARCH64(0) + }; + + intptr_t ptr_at(int offset) const { + return *ptr_at_addr(offset); + } + + void ptr_at_put(int offset, intptr_t value) { + *ptr_at_addr(offset) = value; + } + + private: + // an additional field beyond _sp and _pc: + intptr_t* _fp; // frame pointer + // The interpreter and adapters will extend the frame of the caller. + // Since oopMaps are based on the sp of the caller before extension + // we need to know that value. However in order to compute the address + // of the return address we need the real "raw" sp. Since sparc already + // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's + // original sp we use that convention. + + intptr_t* _unextended_sp; + void adjust_unextended_sp(); + + intptr_t* ptr_at_addr(int offset) const { + return (intptr_t*) addr_at(offset); + } + +#ifdef ASSERT + // Used in frame::sender_for_{interpreter,compiled}_frame + static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false); + static void verify_deopt_mh_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { + verify_deopt_original_pc(nm, unextended_sp, true); + } +#endif + + public: + // Constructors + + frame(intptr_t* sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); + +#ifndef AARCH64 + frame(intptr_t* sp, intptr_t* fp); +#endif // !AARCH64 + + void init(intptr_t* sp, intptr_t* fp, address pc); + + // accessors for the instance variables + // Note: not necessarily the real 'frame pointer' (see real_fp) + intptr_t* fp() const { return _fp; } + + inline address* sender_pc_addr() const; + +#ifdef AARCH64 + // Used by template based interpreter deoptimization + void interpreter_frame_set_stack_top(intptr_t* stack_top); + void interpreter_frame_set_extended_sp(intptr_t* sp); + +#else + // expression stack tos if we are nested in a java call + intptr_t* interpreter_frame_last_sp() const; + + // deoptimization support + void interpreter_frame_set_last_sp(intptr_t* sp); +#endif // AARCH64 + + // helper to update a map with callee-saved FP + static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + +#endif // CPU_ARM_VM_FRAME_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/frame_arm.inline.hpp 2016-12-02 11:20:25.484751799 -0500 @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_FRAME_ARM_INLINE_HPP +#define CPU_ARM_VM_FRAME_ARM_INLINE_HPP + +#include "code/codeCache.hpp" +#include "code/vmreg.inline.hpp" + +// Inline functions for ARM frames: + +// Constructors: + +inline frame::frame() { + _pc = NULL; + _sp = NULL; + _unextended_sp = NULL; + _fp = NULL; + _cb = NULL; + _deopt_state = unknown; +} + +inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { + init(sp, fp, pc); +} + +inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { + _sp = sp; + _unextended_sp = unextended_sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + assert(_cb->as_compiled_method()->insts_contains(_pc), "original PC must be in CompiledMethod"); + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +#ifndef AARCH64 + +inline frame::frame(intptr_t* sp, intptr_t* fp) { + _sp = sp; + _unextended_sp = sp; + _fp = fp; + assert(sp != NULL,"null SP ?"); + _pc = (address)(sp[-1]); + // assert(_pc != NULL, "no pc?"); // see comments in x86 + _cb = CodeCache::find_blob(_pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +#endif // !AARCH64 + +// Accessors + +inline bool frame::equal(frame other) const { + bool ret = sp() == other.sp() + && unextended_sp() == other.unextended_sp() + && fp() == other.fp() + && pc() == other.pc(); + assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); + return ret; +} + +// Return unique id for this frame. The id must have a value where we can distinguish +// identity and younger/older relationship. NULL represents an invalid (incomparable) +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + +// Relationals on frames based +// Return true if the frame is younger (more recent activation) than the frame represented by id +inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() < id ; } + +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } + + + +inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } + +inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + +// Return address: + +inline address* frame::sender_pc_addr() const { return (address*) addr_at(return_addr_offset); } +inline address frame::sender_pc() const { return *sender_pc_addr(); } + +inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); } + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + return (intptr_t**)addr_at(interpreter_frame_locals_offset); +} + +#ifndef AARCH64 +inline intptr_t* frame::interpreter_frame_last_sp() const { + return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); +} +#endif // !AARCH64 + +inline intptr_t* frame::interpreter_frame_bcp_addr() const { + return (intptr_t*)addr_at(interpreter_frame_bcp_offset); +} + +inline intptr_t* frame::interpreter_frame_mdp_addr() const { + return (intptr_t*)addr_at(interpreter_frame_mdp_offset); +} + + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); +} + +// Method + +inline Method** frame::interpreter_frame_method_addr() const { + return (Method**)addr_at(interpreter_frame_method_offset); +} + +inline oop* frame::interpreter_frame_mirror_addr() const { + return (oop*)addr_at(interpreter_frame_mirror_offset); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { +#ifdef AARCH64 + intptr_t* stack_top = (intptr_t*)*addr_at(interpreter_frame_stack_top_offset); + assert(stack_top != NULL, "should be stored before call"); + assert(stack_top <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); + return stack_top; +#else + intptr_t* last_sp = interpreter_frame_last_sp(); + if (last_sp == NULL ) { + return sp(); + } else { + // sp() may have been extended or shrunk by an adapter. At least + // check that we don't fall behind the legal region. + // For top deoptimized frame last_sp == interpreter_frame_monitor_end. + assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); + return last_sp; + } +#endif // AARCH64 +} + +inline oop* frame::interpreter_frame_temp_oop_addr() const { + return (oop *)(fp() + interpreter_frame_oop_temp_offset); +} + +inline int frame::interpreter_frame_monitor_size() { + return BasicObjectLock::size(); +} + + +// expression stack +// (the max_stack arguments are used by the GC; see class FrameClosure) + +inline intptr_t* frame::interpreter_frame_expression_stack() const { + intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); + return monitor_end-1; +} + + +inline jint frame::interpreter_frame_expression_stack_direction() { return -1; } + + +// Entry frames + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); +} + + +// Compiled frames + +inline bool frame::volatile_across_calls(Register reg) { + return true; +} + +inline oop frame::saved_oop_result(RegisterMap* map) const { + oop* result_adr = (oop*) map->location(R0->as_VMReg()); + guarantee(result_adr != NULL, "bad register save location"); + return (*result_adr); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + oop* result_adr = (oop*) map->location(R0->as_VMReg()); + guarantee(result_adr != NULL, "bad register save location"); + *result_adr = obj; +} + +#endif // CPU_ARM_VM_FRAME_ARM_INLINE_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/globalDefinitions_arm.hpp 2016-12-02 11:20:30.921060089 -0500 @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_GLOBALDEFINITIONS_ARM_HPP +#define CPU_ARM_VM_GLOBALDEFINITIONS_ARM_HPP + +#ifdef AARCH64 +#define AARCH64_ONLY(code) code +#define AARCH64_ONLY_ARG(arg) , arg +#define NOT_AARCH64(code) +#define NOT_AARCH64_ARG(arg) +#else +#define AARCH64_ONLY(code) +#define AARCH64_ONLY_ARG(arg) +#define NOT_AARCH64(code) code +#define NOT_AARCH64_ARG(arg) , arg +#endif + +const int StackAlignmentInBytes = AARCH64_ONLY(16) NOT_AARCH64(8); + +// Indicates whether the C calling conventions require that +// 32-bit integer argument values are extended to 64 bits. +const bool CCallingConventionRequiresIntsAsLongs = false; + +#ifdef __SOFTFP__ +const bool HaveVFP = false; +#else +const bool HaveVFP = true; +#endif + +#if defined(__ARM_PCS_VFP) || defined(AARCH64) +#define __ABI_HARD__ +#endif + +#if defined(__ARM_ARCH_7A__) || defined(AARCH64) +#define SUPPORTS_NATIVE_CX8 +#endif + +#define STUBROUTINES_MD_HPP "stubRoutines_arm.hpp" +#define INTERP_MASM_MD_HPP "interp_masm_arm.hpp" +#define TEMPLATETABLE_MD_HPP "templateTable_arm.hpp" +#ifdef AARCH64 +#define ADGLOBALS_MD_HPP "adfiles/adGlobals_arm_64.hpp" +#define AD_MD_HPP "adfiles/ad_arm_64.hpp" +#else +#define ADGLOBALS_MD_HPP "adfiles/adGlobals_arm_32.hpp" +#define AD_MD_HPP "adfiles/ad_arm_32.hpp" +#endif +#define C1_LIRGENERATOR_MD_HPP "c1_LIRGenerator_arm.hpp" + +#ifdef TARGET_COMPILER_gcc +#ifdef ARM32 +#undef BREAKPOINT +#define BREAKPOINT __asm__ volatile ("bkpt") +#endif +#endif + +#endif // CPU_ARM_VM_GLOBALDEFINITIONS_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/globals_arm.hpp 2016-12-02 11:20:36.929400818 -0500 @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_GLOBALS_ARM_HPP +#define CPU_ARM_VM_GLOBALS_ARM_HPP + +// +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) +// + +define_pd_global(bool, ShareVtableStubs, true); + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast +define_pd_global(bool, TrapBasedNullChecks, false); // Not needed + +define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. +define_pd_global(intx, CodeEntryAlignment, 16); +define_pd_global(intx, OptoLoopAlignment, 16); + +define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this + +#define DEFAULT_STACK_YELLOW_PAGES (2) +#define DEFAULT_STACK_RED_PAGES (1) +#define DEFAULT_STACK_SHADOW_PAGES (5 DEBUG_ONLY(+1)) +#define DEFAULT_STACK_RESERVED_PAGES (0) + +#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES +#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES +#define MIN_STACK_RESERVED_PAGES (0) + +define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); +define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); +define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + +define_pd_global(intx, InlineFrequencyCount, 50); +#if defined(COMPILER1) || defined(COMPILER2) +define_pd_global(intx, InlineSmallCode, 1500); +#endif + +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); + +define_pd_global(bool, UseMembar, true); + +define_pd_global(bool, PreserveFramePointer, false); + +// GC Ergo Flags +define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // default max size of CMS young gen, per GC worker thread + +define_pd_global(uintx, TypeProfileLevel, 0); + +// No performance work done here yet. +define_pd_global(bool, CompactStrings, false); + +define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); + +#define ARCH_FLAGS(develop, \ + product, \ + diagnostic, \ + experimental, \ + notproduct, \ + range, \ + constraint, \ + writeable) \ + \ + develop(bool, VerifyInterpreterStackTop, false, \ + "Verify interpreter stack top at every stack expansion (AArch64 only)") \ + \ + develop(bool, ZapHighNonSignificantBits, false, \ + "Zap high non-significant bits of values (AArch64 only)") \ + \ + +#endif // CPU_ARM_VM_GLOBALS_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/icBuffer_arm.cpp 2016-12-02 11:20:42.669726348 -0500 @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "assembler_arm.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_arm.hpp" +#include "oops/oop.inline.hpp" + +#define __ masm-> + +int InlineCacheBuffer::ic_stub_code_size() { + return (AARCH64_ONLY(8) NOT_AARCH64(4)) * Assembler::InstructionSize; +} + +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler* masm = new MacroAssembler(&code); + + InlinedAddress oop_literal((address) cached_value); + __ ldr_literal(Ricklass, oop_literal); + // FIXME: OK to remove reloc here? + __ patchable_jump(entry_point, relocInfo::runtime_call_type, Rtemp); + __ bind_literal(oop_literal); + __ flush(); +} + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + address jump_address; + jump_address = code_begin + NativeInstruction::instruction_size; + NativeJump* jump = nativeJump_at(jump_address); + return jump->jump_destination(); +} + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); + return (void*)move->data(); +} + +#undef __ --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/icache_arm.cpp 2016-12-02 11:20:47.742013994 -0500 @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "assembler_arm.inline.hpp" +#include "runtime/icache.hpp" + +#define __ _masm-> + +#ifdef AARCH64 + +static int icache_flush(address addr, int lines, int magic) { + // TODO-AARCH64 Figure out actual cache line size (mrs Xt, CTR_EL0) + + address p = addr; + for (int i = 0; i < lines; i++, p += ICache::line_size) { + __asm__ volatile( + " dc cvau, %[p]" + : + : [p] "r" (p) + : "memory"); + } + + __asm__ volatile( + " dsb ish" + : : : "memory"); + + p = addr; + for (int i = 0; i < lines; i++, p += ICache::line_size) { + __asm__ volatile( + " ic ivau, %[p]" + : + : [p] "r" (p) + : "memory"); + } + + __asm__ volatile( + " dsb ish\n\t" + " isb\n\t" + : : : "memory"); + + return magic; +} + +#else + +static int icache_flush(address addr, int lines, int magic) { + __builtin___clear_cache(addr, addr + (lines << ICache::log2_line_size)); + return magic; +} + +#endif // AARCH64 + +void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { + address start = (address)icache_flush; + + *flush_icache_stub = (ICache::flush_icache_stub_t)start; + + // ICache::invalidate_range() contains explicit condition that the first + // call is invoked on the generated icache flush stub code range. + ICache::invalidate_range(start, 0); + + { + // dummy code mark to make the shared code happy + // (fields that would need to be modified to emulate the correct + // mark are not accessible) + StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush"); + __ ret(); + } +} + +#undef __ --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/icache_arm.hpp 2016-12-02 11:20:53.362332717 -0500 @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_ICACHE_ARM_HPP +#define CPU_ARM_VM_ICACHE_ARM_HPP + +// Interface for updating the instruction cache. Whenever the VM modifies +// code, part of the processor instruction cache potentially has to be flushed. + +class ICache : public AbstractICache { + public: + enum { + stub_size = 32, // Size of the icache flush stub in bytes + line_size = BytesPerWord, // conservative + log2_line_size = LogBytesPerWord // log2(line_size) + }; +}; + +#endif // CPU_ARM_VM_ICACHE_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/interp_masm_arm.cpp 2016-12-02 11:20:58.614630571 -0500 @@ -0,0 +1,2272 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shared/barrierSet.inline.hpp" +#include "gc/shared/cardTableModRefBS.inline.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interp_masm_arm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "logging/log.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/methodData.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/sharedRuntime.hpp" + +#if INCLUDE_ALL_GCS +#include "gc/g1/g1CollectedHeap.inline.hpp" +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#include "gc/g1/heapRegion.hpp" +#endif // INCLUDE_ALL_GCS + +//-------------------------------------------------------------------- +// Implementation of InterpreterMacroAssembler + + + + +InterpreterMacroAssembler::InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) { +} + +void InterpreterMacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { +#if defined(ASSERT) && !defined(AARCH64) + // Ensure that last_sp is not filled. + { Label L; + ldr(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); + cbz(Rtemp, L); + stop("InterpreterMacroAssembler::call_VM_helper: last_sp != NULL"); + bind(L); + } +#endif // ASSERT && !AARCH64 + + // Rbcp must be saved/restored since it may change due to GC. + save_bcp(); + +#ifdef AARCH64 + check_no_cached_stack_top(Rtemp); + save_stack_top(); + check_extended_sp(Rtemp); + cut_sp_before_call(); +#endif // AARCH64 + + // super call + MacroAssembler::call_VM_helper(oop_result, entry_point, number_of_arguments, check_exceptions); + +#ifdef AARCH64 + // Restore SP to extended SP + restore_sp_after_call(Rtemp); + check_stack_top(); + clear_cached_stack_top(); +#endif // AARCH64 + + // Restore interpreter specific registers. + restore_bcp(); + restore_method(); +} + +void InterpreterMacroAssembler::jump_to_entry(address entry) { + assert(entry, "Entry must have been generated by now"); + b(entry); +} + +void InterpreterMacroAssembler::check_and_handle_popframe() { + if (can_pop_frame()) { + Label L; + const Register popframe_cond = R2_tmp; + + // Initiate popframe handling only if it is not already being processed. If the flag + // has the popframe_processing bit set, it means that this code is called *during* popframe + // handling - we don't want to reenter. + + ldr_s32(popframe_cond, Address(Rthread, JavaThread::popframe_condition_offset())); + tbz(popframe_cond, exact_log2(JavaThread::popframe_pending_bit), L); + tbnz(popframe_cond, exact_log2(JavaThread::popframe_processing_bit), L); + + // Call Interpreter::remove_activation_preserving_args_entry() to get the + // address of the same-named entrypoint in the generated interpreter code. + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + + // Call indirectly to avoid generation ordering problem. + jump(R0); + + bind(L); + } +} + + +// Blows R2, Rtemp. Sets TOS cached value. +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { + const Register thread_state = R2_tmp; + + ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset())); + + const Address tos_addr(thread_state, JvmtiThreadState::earlyret_tos_offset()); + const Address oop_addr(thread_state, JvmtiThreadState::earlyret_oop_offset()); + const Address val_addr(thread_state, JvmtiThreadState::earlyret_value_offset()); +#ifndef AARCH64 + const Address val_addr_hi(thread_state, JvmtiThreadState::earlyret_value_offset() + + in_ByteSize(wordSize)); +#endif // !AARCH64 + + Register zero = zero_register(Rtemp); + + switch (state) { + case atos: ldr(R0_tos, oop_addr); + str(zero, oop_addr); + interp_verify_oop(R0_tos, state, __FILE__, __LINE__); + break; + +#ifdef AARCH64 + case ltos: ldr(R0_tos, val_addr); break; +#else + case ltos: ldr(R1_tos_hi, val_addr_hi); // fall through +#endif // AARCH64 + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: ldr_s32(R0_tos, val_addr); break; +#ifdef __SOFTFP__ + case dtos: ldr(R1_tos_hi, val_addr_hi); // fall through + case ftos: ldr(R0_tos, val_addr); break; +#else + case ftos: ldr_float (S0_tos, val_addr); break; + case dtos: ldr_double(D0_tos, val_addr); break; +#endif // __SOFTFP__ + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + // Clean up tos value in the thread object + str(zero, val_addr); +#ifndef AARCH64 + str(zero, val_addr_hi); +#endif // !AARCH64 + + mov(Rtemp, (int) ilgl); + str_32(Rtemp, tos_addr); +} + + +// Blows R2, Rtemp. +void InterpreterMacroAssembler::check_and_handle_earlyret() { + if (can_force_early_return()) { + Label L; + const Register thread_state = R2_tmp; + + ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset())); + cbz(thread_state, L); // if (thread->jvmti_thread_state() == NULL) exit; + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + + ldr_s32(Rtemp, Address(thread_state, JvmtiThreadState::earlyret_state_offset())); + cmp(Rtemp, JvmtiThreadState::earlyret_pending); + b(L, ne); + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code. + + ldr_s32(R0, Address(thread_state, JvmtiThreadState::earlyret_tos_offset())); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), R0); + + jump(R0); + + bind(L); + } +} + + +// Sets reg. Blows Rtemp. +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { + assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); + assert(reg != Rtemp, "should be different registers"); + + ldrb(Rtemp, Address(Rbcp, bcp_offset)); + ldrb(reg, Address(Rbcp, bcp_offset+1)); + orr(reg, reg, AsmOperand(Rtemp, lsl, BitsPerByte)); +} + +void InterpreterMacroAssembler::get_index_at_bcp(Register index, int bcp_offset, Register tmp_reg, size_t index_size) { + assert_different_registers(index, tmp_reg); + if (index_size == sizeof(u2)) { + // load bytes of index separately to avoid unaligned access + ldrb(index, Address(Rbcp, bcp_offset+1)); + ldrb(tmp_reg, Address(Rbcp, bcp_offset)); + orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); + } else if (index_size == sizeof(u4)) { + // TODO-AARCH64: consider using unaligned access here + ldrb(index, Address(Rbcp, bcp_offset+3)); + ldrb(tmp_reg, Address(Rbcp, bcp_offset+2)); + orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); + ldrb(tmp_reg, Address(Rbcp, bcp_offset+1)); + orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); + ldrb(tmp_reg, Address(Rbcp, bcp_offset)); + orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + mvn_32(index, index); // convert to plain index + } else if (index_size == sizeof(u1)) { + ldrb(index, Address(Rbcp, bcp_offset)); + } else { + ShouldNotReachHere(); + } +} + +// Sets cache, index. +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + assert_different_registers(cache, index); + + get_index_at_bcp(index, bcp_offset, cache, index_size); + + // load constant pool cache pointer + ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize)); + + // convert from field index to ConstantPoolCacheEntry index + assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); + // TODO-AARCH64 merge this shift with shift "add(..., Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord))" after this method is called + logical_shift_left(index, index, 2); +} + +// Sets cache, index, bytecode. +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size) { + get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); + // caution index and bytecode can be the same + add(bytecode, cache, AsmOperand(index, lsl, LogBytesPerWord)); +#ifdef AARCH64 + add(bytecode, bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); + ldarb(bytecode, bytecode); +#else + ldrb(bytecode, Address(bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()))); + TemplateTable::volatile_barrier(MacroAssembler::LoadLoad, noreg, true); +#endif // AARCH64 +} + +// Sets cache. Blows reg_tmp. +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register reg_tmp, int bcp_offset, size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + assert_different_registers(cache, reg_tmp); + + get_index_at_bcp(reg_tmp, bcp_offset, cache, index_size); + + // load constant pool cache pointer + ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize)); + + // skip past the header + add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); + // convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset + assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); + add(cache, cache, AsmOperand(reg_tmp, lsl, 2 + LogBytesPerWord)); +} + +// Load object from cpool->resolved_references(index) +void InterpreterMacroAssembler::load_resolved_reference_at_index( + Register result, Register index) { + assert_different_registers(result, index); + get_constant_pool(result); + + Register cache = result; + // load pointer for resolved_references[] objArray + ldr(cache, Address(result, ConstantPool::resolved_references_offset_in_bytes())); + // JNIHandles::resolve(result) + ldr(cache, Address(cache, 0)); + // Add in the index + // convert from field index to resolved_references() index and from + // word index to byte offset. Since this is a java object, it can be compressed + add(cache, cache, AsmOperand(index, lsl, LogBytesPerHeapOop)); + load_heap_oop(result, Address(cache, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); +} + +// Generate a subtype check: branch to not_subtype if sub_klass is +// not a subtype of super_klass. +// Profiling code for the subtype check failure (profile_typecheck_failed) +// should be explicitly generated by the caller in the not_subtype case. +// Blows Rtemp, tmp1, tmp2. +void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, + Register Rsuper_klass, + Label ¬_subtype, + Register tmp1, + Register tmp2) { + + assert_different_registers(Rsub_klass, Rsuper_klass, tmp1, tmp2, Rtemp); + Label ok_is_subtype, loop, update_cache; + + const Register super_check_offset = tmp1; + const Register cached_super = tmp2; + + // Profile the not-null value's klass. + profile_typecheck(tmp1, Rsub_klass); + + // Load the super-klass's check offset into + ldr_u32(super_check_offset, Address(Rsuper_klass, Klass::super_check_offset_offset())); + + // Check for self + cmp(Rsub_klass, Rsuper_klass); + + // Load from the sub-klass's super-class display list, or a 1-word cache of + // the secondary superclass list, or a failing value with a sentinel offset + // if the super-klass is an interface or exceptionally deep in the Java + // hierarchy and we have to scan the secondary superclass list the hard way. + // See if we get an immediate positive hit + ldr(cached_super, Address(Rsub_klass, super_check_offset)); + + cond_cmp(Rsuper_klass, cached_super, ne); + b(ok_is_subtype, eq); + + // Check for immediate negative hit + cmp(super_check_offset, in_bytes(Klass::secondary_super_cache_offset())); + b(not_subtype, ne); + + // Now do a linear scan of the secondary super-klass chain. + const Register supers_arr = tmp1; + const Register supers_cnt = tmp2; + const Register cur_super = Rtemp; + + // Load objArrayOop of secondary supers. + ldr(supers_arr, Address(Rsub_klass, Klass::secondary_supers_offset())); + + ldr_u32(supers_cnt, Address(supers_arr, Array::length_offset_in_bytes())); // Load the array length +#ifdef AARCH64 + cbz(supers_cnt, not_subtype); + add(supers_arr, supers_arr, Array::base_offset_in_bytes()); +#else + cmp(supers_cnt, 0); + + // Skip to the start of array elements and prefetch the first super-klass. + ldr(cur_super, Address(supers_arr, Array::base_offset_in_bytes(), pre_indexed), ne); + b(not_subtype, eq); +#endif // AARCH64 + + bind(loop); + +#ifdef AARCH64 + ldr(cur_super, Address(supers_arr, wordSize, post_indexed)); +#endif // AARCH64 + + cmp(cur_super, Rsuper_klass); + b(update_cache, eq); + + subs(supers_cnt, supers_cnt, 1); + +#ifndef AARCH64 + ldr(cur_super, Address(supers_arr, wordSize, pre_indexed), ne); +#endif // !AARCH64 + + b(loop, ne); + + b(not_subtype); + + bind(update_cache); + // Must be equal but missed in cache. Update cache. + str(Rsuper_klass, Address(Rsub_klass, Klass::secondary_super_cache_offset())); + + bind(ok_is_subtype); +} + + +// The 1st part of the store check. +// Sets card_table_base register. +void InterpreterMacroAssembler::store_check_part1(Register card_table_base) { + // Check barrier set type (should be card table) and element size + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableForRS || + bs->kind() == BarrierSet::CardTableExtension, + "Wrong barrier set kind"); + + CardTableModRefBS* ct = barrier_set_cast(bs); + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "Adjust store check code"); + + // Load card table base address. + + /* Performance note. + + There is an alternative way of loading card table base address + from thread descriptor, which may look more efficient: + + ldr(card_table_base, Address(Rthread, JavaThread::card_table_base_offset())); + + However, performance measurements of micro benchmarks and specJVM98 + showed that loading of card table base from thread descriptor is + 7-18% slower compared to loading of literal embedded into the code. + Possible cause is a cache miss (card table base address resides in a + rarely accessed area of thread descriptor). + */ + // TODO-AARCH64 Investigate if mov_slow is faster than ldr from Rthread on AArch64 + mov_address(card_table_base, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference); +} + +// The 2nd part of the store check. +void InterpreterMacroAssembler::store_check_part2(Register obj, Register card_table_base, Register tmp) { + assert_different_registers(obj, card_table_base, tmp); + + assert(CardTableModRefBS::dirty_card_val() == 0, "Dirty card value must be 0 due to optimizations."); +#ifdef AARCH64 + add(card_table_base, card_table_base, AsmOperand(obj, lsr, CardTableModRefBS::card_shift)); + Address card_table_addr(card_table_base); +#else + Address card_table_addr(card_table_base, obj, lsr, CardTableModRefBS::card_shift); +#endif + + if (UseCondCardMark) { + if (UseConcMarkSweepGC) { + membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), noreg); + } + Label already_dirty; + + ldrb(tmp, card_table_addr); + cbz(tmp, already_dirty); + + set_card(card_table_base, card_table_addr, tmp); + bind(already_dirty); + + } else { + if (UseConcMarkSweepGC && CMSPrecleaningEnabled) { + membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg); + } + set_card(card_table_base, card_table_addr, tmp); + } +} + +void InterpreterMacroAssembler::set_card(Register card_table_base, Address card_table_addr, Register tmp) { +#ifdef AARCH64 + strb(ZR, card_table_addr); +#else + CardTableModRefBS* ct = barrier_set_cast(Universe::heap()->barrier_set()); + if ((((uintptr_t)ct->byte_map_base & 0xff) == 0)) { + // Card table is aligned so the lowest byte of the table address base is zero. + // This works only if the code is not saved for later use, possibly + // in a context where the base would no longer be aligned. + strb(card_table_base, card_table_addr); + } else { + mov(tmp, 0); + strb(tmp, card_table_addr); + } +#endif // AARCH64 +} + +////////////////////////////////////////////////////////////////////////////////// +#if INCLUDE_ALL_GCS + +// G1 pre-barrier. +// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +// If store_addr != noreg, then previous value is loaded from [store_addr]; +// in such case store_addr and new_val registers are preserved; +// otherwise pre_val register is preserved. +void InterpreterMacroAssembler::g1_write_barrier_pre(Register store_addr, + Register new_val, + Register pre_val, + Register tmp1, + Register tmp2) { + Label done; + Label runtime; + + if (store_addr != noreg) { + assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); + } else { + assert (new_val == noreg, "should be"); + assert_different_registers(pre_val, tmp1, tmp2, noreg); + } + + Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_active())); + Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_index())); + Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_buf())); + + // Is marking active? + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); + ldrb(tmp1, in_progress); + cbz(tmp1, done); + + // Do we need to load the previous value? + if (store_addr != noreg) { + load_heap_oop(pre_val, Address(store_addr, 0)); + } + + // Is the previous value null? + cbz(pre_val, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + ldr(tmp1, index); // tmp1 := *index_adr + ldr(tmp2, buffer); + + subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize + b(runtime, lt); // If negative, goto runtime + + str(tmp1, index); // *index_adr := tmp1 + + // Record the previous value + str(pre_val, Address(tmp2, tmp1)); + b(done); + + bind(runtime); + + // save the live input values +#ifdef AARCH64 + if (store_addr != noreg) { + raw_push(store_addr, new_val); + } else { + raw_push(pre_val, ZR); + } +#else + if (store_addr != noreg) { + // avoid raw_push to support any ordering of store_addr and new_val + push(RegisterSet(store_addr) | RegisterSet(new_val)); + } else { + push(pre_val); + } +#endif // AARCH64 + + if (pre_val != R0) { + mov(R0, pre_val); + } + mov(R1, Rthread); + + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1); + +#ifdef AARCH64 + if (store_addr != noreg) { + raw_pop(store_addr, new_val); + } else { + raw_pop(pre_val, ZR); + } +#else + if (store_addr != noreg) { + pop(RegisterSet(store_addr) | RegisterSet(new_val)); + } else { + pop(pre_val); + } +#endif // AARCH64 + + bind(done); +} + +// G1 post-barrier. +// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +void InterpreterMacroAssembler::g1_write_barrier_post(Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + Register tmp3) { + + Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + + DirtyCardQueue::byte_offset_of_index())); + Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + + DirtyCardQueue::byte_offset_of_buf())); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + Label done; + Label runtime; + + // Does store cross heap regions? + + eor(tmp1, store_addr, new_val); +#ifdef AARCH64 + logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes); + cbz(tmp1, done); +#else + movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes)); + b(done, eq); +#endif + + // crosses regions, storing NULL? + + cbz(new_val, done); + + // storing region crossing non-NULL, is card already dirty? + const Register card_addr = tmp1; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + mov_address(tmp2, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference); + add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTableModRefBS::card_shift)); + + ldrb(tmp2, Address(card_addr)); + cmp(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val()); + b(done, eq); + + membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); + + assert(CardTableModRefBS::dirty_card_val() == 0, "adjust this code"); + ldrb(tmp2, Address(card_addr)); + cbz(tmp2, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + + strb(zero_register(tmp2), Address(card_addr)); + + ldr(tmp2, queue_index); + ldr(tmp3, buffer); + + subs(tmp2, tmp2, wordSize); + b(runtime, lt); // go to runtime if now negative + + str(tmp2, queue_index); + + str(card_addr, Address(tmp3, tmp2)); + b(done); + + bind(runtime); + + if (card_addr != R0) { + mov(R0, card_addr); + } + mov(R1, Rthread); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1); + + bind(done); +} + +#endif // INCLUDE_ALL_GCS +////////////////////////////////////////////////////////////////////////////////// + + +// Java Expression Stack + +void InterpreterMacroAssembler::pop_ptr(Register r) { + assert(r != Rstack_top, "unpredictable instruction"); + ldr(r, Address(Rstack_top, wordSize, post_indexed)); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + assert(r != Rstack_top, "unpredictable instruction"); + ldr_s32(r, Address(Rstack_top, wordSize, post_indexed)); + zap_high_non_significant_bits(r); +} + +#ifdef AARCH64 +void InterpreterMacroAssembler::pop_l(Register r) { + assert(r != Rstack_top, "unpredictable instruction"); + ldr(r, Address(Rstack_top, 2*wordSize, post_indexed)); +} +#else +void InterpreterMacroAssembler::pop_l(Register lo, Register hi) { + assert_different_registers(lo, hi); + assert(lo < hi, "lo must be < hi"); + pop(RegisterSet(lo) | RegisterSet(hi)); +} +#endif // AARCH64 + +void InterpreterMacroAssembler::pop_f(FloatRegister fd) { +#ifdef AARCH64 + ldr_s(fd, Address(Rstack_top, wordSize, post_indexed)); +#else + fpops(fd); +#endif // AARCH64 +} + +void InterpreterMacroAssembler::pop_d(FloatRegister fd) { +#ifdef AARCH64 + ldr_d(fd, Address(Rstack_top, 2*wordSize, post_indexed)); +#else + fpopd(fd); +#endif // AARCH64 +} + + +// Transition vtos -> state. Blows R0, R1. Sets TOS cached value. +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: pop_ptr(R0_tos); break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: pop_i(R0_tos); break; +#ifdef AARCH64 + case ltos: pop_l(R0_tos); break; +#else + case ltos: pop_l(R0_tos_lo, R1_tos_hi); break; +#endif // AARCH64 +#ifdef __SOFTFP__ + case ftos: pop_i(R0_tos); break; + case dtos: pop_l(R0_tos_lo, R1_tos_hi); break; +#else + case ftos: pop_f(S0_tos); break; + case dtos: pop_d(D0_tos); break; +#endif // __SOFTFP__ + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + interp_verify_oop(R0_tos, state, __FILE__, __LINE__); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + assert(r != Rstack_top, "unpredictable instruction"); + str(r, Address(Rstack_top, -wordSize, pre_indexed)); + check_stack_top_on_expansion(); +} + +void InterpreterMacroAssembler::push_i(Register r) { + assert(r != Rstack_top, "unpredictable instruction"); + str_32(r, Address(Rstack_top, -wordSize, pre_indexed)); + check_stack_top_on_expansion(); +} + +#ifdef AARCH64 +void InterpreterMacroAssembler::push_l(Register r) { + assert(r != Rstack_top, "unpredictable instruction"); + stp(r, ZR, Address(Rstack_top, -2*wordSize, pre_indexed)); + check_stack_top_on_expansion(); +} +#else +void InterpreterMacroAssembler::push_l(Register lo, Register hi) { + assert_different_registers(lo, hi); + assert(lo < hi, "lo must be < hi"); + push(RegisterSet(lo) | RegisterSet(hi)); +} +#endif // AARCH64 + +void InterpreterMacroAssembler::push_f() { +#ifdef AARCH64 + str_s(S0_tos, Address(Rstack_top, -wordSize, pre_indexed)); + check_stack_top_on_expansion(); +#else + fpushs(S0_tos); +#endif // AARCH64 +} + +void InterpreterMacroAssembler::push_d() { +#ifdef AARCH64 + str_d(D0_tos, Address(Rstack_top, -2*wordSize, pre_indexed)); + check_stack_top_on_expansion(); +#else + fpushd(D0_tos); +#endif // AARCH64 +} + +// Transition state -> vtos. Blows Rtemp. +void InterpreterMacroAssembler::push(TosState state) { + interp_verify_oop(R0_tos, state, __FILE__, __LINE__); + switch (state) { + case atos: push_ptr(R0_tos); break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: push_i(R0_tos); break; +#ifdef AARCH64 + case ltos: push_l(R0_tos); break; +#else + case ltos: push_l(R0_tos_lo, R1_tos_hi); break; +#endif // AARCH64 +#ifdef __SOFTFP__ + case ftos: push_i(R0_tos); break; + case dtos: push_l(R0_tos_lo, R1_tos_hi); break; +#else + case ftos: push_f(); break; + case dtos: push_d(); break; +#endif // __SOFTFP__ + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } +} + + +#ifndef AARCH64 + +// Converts return value in R0/R1 (interpreter calling conventions) to TOS cached value. +void InterpreterMacroAssembler::convert_retval_to_tos(TosState state) { +#if (!defined __SOFTFP__ && !defined __ABI_HARD__) + // According to interpreter calling conventions, result is returned in R0/R1, + // but templates expect ftos in S0, and dtos in D0. + if (state == ftos) { + fmsr(S0_tos, R0); + } else if (state == dtos) { + fmdrr(D0_tos, R0, R1); + } +#endif // !__SOFTFP__ && !__ABI_HARD__ +} + +// Converts TOS cached value to return value in R0/R1 (according to interpreter calling conventions). +void InterpreterMacroAssembler::convert_tos_to_retval(TosState state) { +#if (!defined __SOFTFP__ && !defined __ABI_HARD__) + // According to interpreter calling conventions, result is returned in R0/R1, + // so ftos (S0) and dtos (D0) are moved to R0/R1. + if (state == ftos) { + fmrs(R0, S0_tos); + } else if (state == dtos) { + fmrrd(R0, R1, D0_tos); + } +#endif // !__SOFTFP__ && !__ABI_HARD__ +} + +#endif // !AARCH64 + + +// Helpers for swap and dup +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + ldr(val, Address(Rstack_top, Interpreter::expr_offset_in_bytes(n))); +} + +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + str(val, Address(Rstack_top, Interpreter::expr_offset_in_bytes(n))); +} + + +void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { +#ifdef AARCH64 + check_no_cached_stack_top(Rtemp); + save_stack_top(); + cut_sp_before_call(); + mov(Rparams, Rstack_top); +#endif // AARCH64 + + // set sender sp + mov(Rsender_sp, SP); + +#ifndef AARCH64 + // record last_sp + str(Rsender_sp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); +#endif // !AARCH64 +} + +// Jump to from_interpreted entry of a call unless single stepping is possible +// in this thread in which case we must call the i2i entry +void InterpreterMacroAssembler::jump_from_interpreted(Register method) { + assert_different_registers(method, Rtemp); + + prepare_to_jump_from_interpreted(); + + if (can_post_interpreter_events()) { + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + + ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); +#ifdef AARCH64 + { + Label not_interp_only_mode; + + cbz(Rtemp, not_interp_only_mode); + indirect_jump(Address(method, Method::interpreter_entry_offset()), Rtemp); + + bind(not_interp_only_mode); + } +#else + cmp(Rtemp, 0); + ldr(PC, Address(method, Method::interpreter_entry_offset()), ne); +#endif // AARCH64 + } + + indirect_jump(Address(method, Method::from_interpreted_offset()), Rtemp); +} + + +void InterpreterMacroAssembler::restore_dispatch() { + mov_slow(RdispatchTable, (address)Interpreter::dispatch_table(vtos)); +} + + +// The following two routines provide a hook so that an implementation +// can schedule the dispatch in two parts. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { + // Nothing ARM-specific to be done here. +} + +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { + dispatch_next(state, step); +} + +void InterpreterMacroAssembler::dispatch_base(TosState state, + DispatchTableMode table_mode, + bool verifyoop) { + if (VerifyActivationFrameSize) { + Label L; +#ifdef AARCH64 + mov(Rtemp, SP); + sub(Rtemp, FP, Rtemp); +#else + sub(Rtemp, FP, SP); +#endif // AARCH64 + int min_frame_size = (frame::link_offset - frame::interpreter_frame_initial_sp_offset) * wordSize; + cmp(Rtemp, min_frame_size); + b(L, ge); + stop("broken stack frame"); + bind(L); + } + + if (verifyoop) { + interp_verify_oop(R0_tos, state, __FILE__, __LINE__); + } + + if((state == itos) || (state == btos) || (state == ztos) || (state == ctos) || (state == stos)) { + zap_high_non_significant_bits(R0_tos); + } + +#ifdef ASSERT + Label L; + mov_slow(Rtemp, (address)Interpreter::dispatch_table(vtos)); + cmp(Rtemp, RdispatchTable); + b(L, eq); + stop("invalid RdispatchTable"); + bind(L); +#endif + + if (table_mode == DispatchDefault) { + if (state == vtos) { + indirect_jump(Address::indexed_ptr(RdispatchTable, R3_bytecode), Rtemp); + } else { +#ifdef AARCH64 + sub(Rtemp, R3_bytecode, (Interpreter::distance_from_dispatch_table(vtos) - + Interpreter::distance_from_dispatch_table(state))); + indirect_jump(Address::indexed_ptr(RdispatchTable, Rtemp), Rtemp); +#else + // on 32-bit ARM this method is faster than the one above. + sub(Rtemp, RdispatchTable, (Interpreter::distance_from_dispatch_table(vtos) - + Interpreter::distance_from_dispatch_table(state)) * wordSize); + indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp); +#endif + } + } else { + assert(table_mode == DispatchNormal, "invalid dispatch table mode"); + address table = (address) Interpreter::normal_table(state); + mov_slow(Rtemp, table); + indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp); + } + + nop(); // to avoid filling CPU pipeline with invalid instructions + nop(); +} + +void InterpreterMacroAssembler::dispatch_only(TosState state) { + dispatch_base(state, DispatchDefault); +} + + +void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { + dispatch_base(state, DispatchNormal); +} + +void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { + dispatch_base(state, DispatchNormal, false); +} + +void InterpreterMacroAssembler::dispatch_next(TosState state, int step) { + // load next bytecode and advance Rbcp + ldrb(R3_bytecode, Address(Rbcp, step, pre_indexed)); + dispatch_base(state, DispatchDefault); +} + +void InterpreterMacroAssembler::narrow(Register result) { + // mask integer result to narrower return type. + const Register Rtmp = R2; + + // get method type + ldr(Rtmp, Address(Rmethod, Method::const_offset())); + ldrb(Rtmp, Address(Rtmp, ConstMethod::result_type_offset())); + + Label notBool, notByte, notChar, done; + cmp(Rtmp, T_INT); + b(done, eq); + + cmp(Rtmp, T_BOOLEAN); + b(notBool, ne); + and_32(result, result, 1); + b(done); + + bind(notBool); + cmp(Rtmp, T_BYTE); + b(notByte, ne); + sign_extend(result, result, 8); + b(done); + + bind(notByte); + cmp(Rtmp, T_CHAR); + b(notChar, ne); + zero_extend(result, result, 16); + b(done); + + bind(notChar); + // cmp(Rtmp, T_SHORT); + // b(done, ne); + sign_extend(result, result, 16); + + // Nothing to do + bind(done); +} + +// remove activation +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +void InterpreterMacroAssembler::remove_activation(TosState state, Register ret_addr, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { + Label unlock, unlocked, no_unlock; + + // Note: Registers R0, R1, S0 and D0 (TOS cached value) may be in use for the result. + + const Address do_not_unlock_if_synchronized(Rthread, + JavaThread::do_not_unlock_if_synchronized_offset()); + + const Register Rflag = R2; + const Register Raccess_flags = R3; + + restore_method(); + + ldrb(Rflag, do_not_unlock_if_synchronized); + + // get method access flags + ldr_u32(Raccess_flags, Address(Rmethod, Method::access_flags_offset())); + + strb(zero_register(Rtemp), do_not_unlock_if_synchronized); // reset the flag + + // check if method is synchronized + + tbz(Raccess_flags, JVM_ACC_SYNCHRONIZED_BIT, unlocked); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. + cbnz(Rflag, no_unlock); + + // unlock monitor + push(state); // save result + + // BasicObjectLock will be first in list, since this is a synchronized method. However, need + // to check that the object has not been unlocked by an explicit monitorexit bytecode. + + const Register Rmonitor = R1; // fixed in unlock_object() + const Register Robj = R2; + + // address of first monitor + sub(Rmonitor, FP, - frame::interpreter_frame_monitor_block_bottom_offset * wordSize + (int)sizeof(BasicObjectLock)); + + ldr(Robj, Address(Rmonitor, BasicObjectLock::obj_offset_in_bytes())); + cbnz(Robj, unlock); + + pop(state); + + if (throw_monitor_exception) { + // Entry already unlocked, need to throw exception + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Monitor already unlocked during a stack unroll. + // If requested, install an illegal_monitor_state_exception. + // Continue with stack unrolling. + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); + } + b(unlocked); + } + + + // Exception case for the check that all monitors are unlocked. + const Register Rcur = R2; + Label restart_check_monitors_unlocked, exception_monitor_is_still_locked; + + bind(exception_monitor_is_still_locked); + // Monitor entry is still locked, need to throw exception. + // Rcur: monitor entry. + + if (throw_monitor_exception) { + // Throw exception + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Stack unrolling. Unlock object and install illegal_monitor_exception + // Unlock does not block, so don't have to worry about the frame + + push(state); + mov(R1, Rcur); + unlock_object(R1); + + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); + } + + pop(state); + b(restart_check_monitors_unlocked); + } + + bind(unlock); + unlock_object(Rmonitor); + pop(state); + + // Check that for block-structured locking (i.e., that all locked objects has been unlocked) + bind(unlocked); + + // Check that all monitors are unlocked + { + Label loop; + + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + const Register Rbottom = R3; + const Register Rcur_obj = Rtemp; + + bind(restart_check_monitors_unlocked); + + ldr(Rcur, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize)); + // points to current entry, starting with top-most entry + sub(Rbottom, FP, -frame::interpreter_frame_monitor_block_bottom_offset * wordSize); + // points to word before bottom of monitor block + + cmp(Rcur, Rbottom); // check if there are no monitors +#ifndef AARCH64 + ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); + // prefetch monitor's object +#endif // !AARCH64 + b(no_unlock, eq); + + bind(loop); +#ifdef AARCH64 + ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes())); +#endif // AARCH64 + // check if current entry is used + cbnz(Rcur_obj, exception_monitor_is_still_locked); + + add(Rcur, Rcur, entry_size); // otherwise advance to next entry + cmp(Rcur, Rbottom); // check if bottom reached +#ifndef AARCH64 + ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); + // prefetch monitor's object +#endif // !AARCH64 + b(loop, ne); // if not at bottom then check this entry + } + + bind(no_unlock); + + // jvmti support + if (notify_jvmdi) { + notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + } else { + notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA + } + + // remove activation +#ifdef AARCH64 + ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); + ldp(FP, LR, Address(FP)); + mov(SP, Rtemp); +#else + mov(Rtemp, FP); + ldmia(FP, RegisterSet(FP) | RegisterSet(LR)); + ldr(SP, Address(Rtemp, frame::interpreter_frame_sender_sp_offset * wordSize)); +#endif + + if (ret_addr != LR) { + mov(ret_addr, LR); + } +} + + +// At certain points in the method invocation the monitor of +// synchronized methods hasn't been entered yet. +// To correctly handle exceptions at these points, we set the thread local +// variable _do_not_unlock_if_synchronized to true. The remove_activation will +// check this flag. +void InterpreterMacroAssembler::set_do_not_unlock_if_synchronized(bool flag, Register tmp) { + const Address do_not_unlock_if_synchronized(Rthread, + JavaThread::do_not_unlock_if_synchronized_offset()); + if (flag) { + mov(tmp, 1); + strb(tmp, do_not_unlock_if_synchronized); + } else { + strb(zero_register(tmp), do_not_unlock_if_synchronized); + } +} + +// Lock object +// +// Argument: R1 : Points to BasicObjectLock to be used for locking. +// Must be initialized with object to lock. +// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. +void InterpreterMacroAssembler::lock_object(Register Rlock) { + assert(Rlock == R1, "the second argument"); + + if (UseHeavyMonitors) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); + } else { + Label done; + + const Register Robj = R2; + const Register Rmark = R3; + assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp); + + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); + + Label already_locked, slow_case; + + // Load object pointer + ldr(Robj, Address(Rlock, obj_offset)); + + if (UseBiasedLocking) { + biased_locking_enter(Robj, Rmark/*scratched*/, R0, false, Rtemp, done, slow_case); + } + +#ifdef AARCH64 + assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); + ldr(Rmark, Robj); + + // Test if object is already locked + assert(markOopDesc::unlocked_value == 1, "adjust this code"); + tbz(Rmark, exact_log2(markOopDesc::unlocked_value), already_locked); + +#else // AARCH64 + + // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. + // That would be acceptable as ether CAS or slow case path is taken in that case. + // Exception to that is if the object is locked by the calling thread, then the recursive test will pass (guaranteed as + // loads are satisfied from a store queue if performed on the same processor). + + assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); + ldr(Rmark, Address(Robj, oopDesc::mark_offset_in_bytes())); + + // Test if object is already locked + tst(Rmark, markOopDesc::unlocked_value); + b(already_locked, eq); + +#endif // !AARCH64 + // Save old object->mark() into BasicLock's displaced header + str(Rmark, Address(Rlock, mark_offset)); + + cas_for_lock_acquire(Rmark, Rlock, Robj, Rtemp, slow_case); + +#ifndef PRODUCT + if (PrintBiasedLockingStatistics) { + cond_atomic_inc32(al, BiasedLocking::fast_path_entry_count_addr()); + } +#endif //!PRODUCT + + b(done); + + // If we got here that means the object is locked by ether calling thread or another thread. + bind(already_locked); + // Handling of locked objects: recursive locks and slow case. + + // Fast check for recursive lock. + // + // Can apply the optimization only if this is a stack lock + // allocated in this thread. For efficiency, we can focus on + // recently allocated stack locks (instead of reading the stack + // base and checking whether 'mark' points inside the current + // thread stack): + // 1) (mark & 3) == 0 + // 2) SP <= mark < SP + os::pagesize() + // + // Warning: SP + os::pagesize can overflow the stack base. We must + // neither apply the optimization for an inflated lock allocated + // just above the thread stack (this is why condition 1 matters) + // nor apply the optimization if the stack lock is inside the stack + // of another thread. The latter is avoided even in case of overflow + // because we have guard pages at the end of all stacks. Hence, if + // we go over the stack base and hit the stack of another thread, + // this should not be in a writeable area that could contain a + // stack lock allocated by that thread. As a consequence, a stack + // lock less than page size away from SP is guaranteed to be + // owned by the current thread. + // + // Note: assuming SP is aligned, we can check the low bits of + // (mark-SP) instead of the low bits of mark. In that case, + // assuming page size is a power of 2, we can merge the two + // conditions into a single test: + // => ((mark - SP) & (3 - os::pagesize())) == 0 + +#ifdef AARCH64 + // Use the single check since the immediate is OK for AARCH64 + sub(R0, Rmark, Rstack_top); + intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); + Assembler::LogicalImmediate imm(mask, false); + ands(R0, R0, imm); + + // For recursive case store 0 into lock record. + // It is harmless to store it unconditionally as lock record contains some garbage + // value in its _displaced_header field by this moment. + str(ZR, Address(Rlock, mark_offset)); + +#else // AARCH64 + // (3 - os::pagesize()) cannot be encoded as an ARM immediate operand. + // Check independently the low bits and the distance to SP. + // -1- test low 2 bits + movs(R0, AsmOperand(Rmark, lsl, 30)); + // -2- test (mark - SP) if the low two bits are 0 + sub(R0, Rmark, SP, eq); + movs(R0, AsmOperand(R0, lsr, exact_log2(os::vm_page_size())), eq); + // If still 'eq' then recursive locking OK: store 0 into lock record + str(R0, Address(Rlock, mark_offset), eq); + +#endif // AARCH64 + +#ifndef PRODUCT + if (PrintBiasedLockingStatistics) { + cond_atomic_inc32(eq, BiasedLocking::fast_path_entry_count_addr()); + } +#endif // !PRODUCT + + b(done, eq); + + bind(slow_case); + + // Call the runtime routine for slow case + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); + + bind(done); + } +} + + +// Unlocks an object. Used in monitorexit bytecode and remove_activation. +// +// Argument: R1: Points to BasicObjectLock structure for lock +// Throw an IllegalMonitorException if object is not locked by current thread +// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. +void InterpreterMacroAssembler::unlock_object(Register Rlock) { + assert(Rlock == R1, "the second argument"); + + if (UseHeavyMonitors) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock); + } else { + Label done, slow_case; + + const Register Robj = R2; + const Register Rmark = R3; + const Register Rresult = R0; + assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp); + + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); + + const Register Rzero = zero_register(Rtemp); + + // Load oop into Robj + ldr(Robj, Address(Rlock, obj_offset)); + + // Free entry + str(Rzero, Address(Rlock, obj_offset)); + + if (UseBiasedLocking) { + biased_locking_exit(Robj, Rmark, done); + } + + // Load the old header from BasicLock structure + ldr(Rmark, Address(Rlock, mark_offset)); + + // Test for recursion (zero mark in BasicLock) + cbz(Rmark, done); + + bool allow_fallthrough_on_failure = true; + + cas_for_lock_release(Rlock, Rmark, Robj, Rtemp, slow_case, allow_fallthrough_on_failure); + + b(done, eq); + + bind(slow_case); + + // Call the runtime routine for slow case. + str(Robj, Address(Rlock, obj_offset)); // restore obj + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock); + + bind(done); + } +} + + +// Test ImethodDataPtr. If it is null, continue at the specified label +void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ldr(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); + cbz(mdp, zero_continue); +} + + +// Set the method data pointer for the current bcp. +// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; + + // Test MDO to avoid the call if it is NULL. + ldr(Rtemp, Address(Rmethod, Method::method_data_offset())); + cbz(Rtemp, set_mdp); + + mov(R0, Rmethod); + mov(R1, Rbcp); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), R0, R1); + // R0/W0: mdi + + // mdo is guaranteed to be non-zero here, we checked for it before the call. + ldr(Rtemp, Address(Rmethod, Method::method_data_offset())); + add(Rtemp, Rtemp, in_bytes(MethodData::data_offset())); + add_ptr_scaled_int32(Rtemp, Rtemp, R0, 0); + + bind(set_mdp); + str(Rtemp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + Label verify_continue; + save_caller_save_registers(); + + const Register Rmdp = R2; + test_method_data_pointer(Rmdp, verify_continue); // If mdp is zero, continue + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + + ldrh(R3, Address(Rmdp, DataLayout::bci_offset())); + ldr(Rtemp, Address(Rmethod, Method::const_offset())); + add(R3, R3, Rtemp); + add(R3, R3, in_bytes(ConstMethod::codes_offset())); + cmp(R3, Rbcp); + b(verify_continue, eq); + + mov(R0, Rmethod); + mov(R1, Rbcp); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), R0, R1, Rmdp); + + bind(verify_continue); + restore_caller_save_registers(); +#endif // ASSERT +} + + +void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, int offset, Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert_different_registers(mdp_in, value); + str(value, Address(mdp_in, offset)); +} + + +// Increments mdp data. Sets bumped_count register to adjusted counter. +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + int offset, + Register bumped_count, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + + // Counter address + Address data(mdp_in, offset); + assert_different_registers(mdp_in, bumped_count); + + increment_mdp_data_at(data, bumped_count, decrement); +} + +void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, int flag_byte_constant) { + assert_different_registers(mdp_in, Rtemp); + assert(ProfileInterpreter, "must be profiling interpreter"); + assert((0 < flag_byte_constant) && (flag_byte_constant < (1 << BitsPerByte)), "flag mask is out of range"); + + // Set the flag + ldrb(Rtemp, Address(mdp_in, in_bytes(DataLayout::flags_offset()))); + orr(Rtemp, Rtemp, (unsigned)flag_byte_constant); + strb(Rtemp, Address(mdp_in, in_bytes(DataLayout::flags_offset()))); +} + + +// Increments mdp data. Sets bumped_count register to adjusted counter. +void InterpreterMacroAssembler::increment_mdp_data_at(Address data, + Register bumped_count, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + + ldr(bumped_count, data); + if (decrement) { + // Decrement the register. Set condition codes. + subs(bumped_count, bumped_count, DataLayout::counter_increment); + // Avoid overflow. +#ifdef AARCH64 + assert(DataLayout::counter_increment == 1, "required for cinc"); + cinc(bumped_count, bumped_count, pl); +#else + add(bumped_count, bumped_count, DataLayout::counter_increment, pl); +#endif // AARCH64 + } else { + // Increment the register. Set condition codes. + adds(bumped_count, bumped_count, DataLayout::counter_increment); + // Avoid overflow. +#ifdef AARCH64 + assert(DataLayout::counter_increment == 1, "required for cinv"); + cinv(bumped_count, bumped_count, mi); // inverts 0x80..00 back to 0x7f..ff +#else + sub(bumped_count, bumped_count, DataLayout::counter_increment, mi); +#endif // AARCH64 + } + str(bumped_count, data); +} + + +void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, + int offset, + Register value, + Register test_value_out, + Label& not_equal_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert_different_registers(mdp_in, test_value_out, value); + + ldr(test_value_out, Address(mdp_in, offset)); + cmp(test_value_out, value); + + b(not_equal_continue, ne); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, int offset_of_disp, Register reg_temp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert_different_registers(mdp_in, reg_temp); + + ldr(reg_temp, Address(mdp_in, offset_of_disp)); + add(mdp_in, mdp_in, reg_temp); + str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, Register reg_offset, Register reg_tmp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert_different_registers(mdp_in, reg_offset, reg_tmp); + + ldr(reg_tmp, Address(mdp_in, reg_offset)); + add(mdp_in, mdp_in, reg_tmp); + str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + add(mdp_in, mdp_in, constant); + str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert_different_registers(return_bci, R0, R1, R2, R3, Rtemp); + + mov(R1, return_bci); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), R1); +} + + +// Sets mdp, bumped_count registers, blows Rtemp. +void InterpreterMacroAssembler::profile_taken_branch(Register mdp, Register bumped_count) { + assert_different_registers(mdp, bumped_count); + + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + // Otherwise, assign to mdp + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the taken count. + increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()), bumped_count); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()), Rtemp); + + bind (profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { + assert_different_registers(mdp, Rtemp); + + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()), Rtemp); + + // The method data pointer needs to be updated to correspond to the next bytecode + update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + + bind (profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_call(Register mdp) { + assert_different_registers(mdp, Rtemp); + + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); + + bind (profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_final_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + + bind (profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_virtual_call(Register mdp, Register receiver, bool receiver_can_be_null) { + assert_different_registers(mdp, receiver, Rtemp); + + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label skip_receiver_profile; + if (receiver_can_be_null) { + Label not_null; + cbnz(receiver, not_null); + // We are making a call. Increment the count for null receiver. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); + b(skip_receiver_profile); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, mdp, Rtemp, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::record_klass_in_profile_helper( + Register receiver, Register mdp, + Register reg_tmp, + int start_row, Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) + return; + + assert_different_registers(receiver, mdp, reg_tmp); + + int last_row = VirtualCallData::row_limit() - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the receiver and for null. + // Take any of three different outcomes: + // 1. found receiver => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + Label next_test; + + // See if the receiver is receiver[n]. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); + + test_mdp_data_at(mdp, recvr_offset, receiver, reg_tmp, next_test); + + // The receiver is receiver[n]. Increment count[n]. + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); + increment_mdp_data_at(mdp, count_offset, reg_tmp); + b(done); + + bind(next_test); + // reg_tmp now contains the receiver from the CallData. + + if (row == start_row) { + Label found_null; + // Failed the equality check on receiver[n]... Test for null. + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (is_virtual_call) { + cbz(reg_tmp, found_null); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), reg_tmp); + b(done); + bind(found_null); + } else { + cbnz(reg_tmp, done); + } + break; + } + // Since null is rare, make it be the branch-taken case. + cbz(reg_tmp, found_null); + + // Put all the "Case 3" tests here. + record_klass_in_profile_helper(receiver, mdp, reg_tmp, start_row + 1, done, is_virtual_call); + + // Found a null. Keep searching for a matching receiver, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching receiver, but we + // observed the receiver[start_row] is NULL. + + // Fill in the receiver field and increment the count. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); + set_mdp_data_at(mdp, recvr_offset, receiver); + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); + mov(reg_tmp, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg_tmp); + if (start_row > 0) { + b(done); + } +} + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register mdp, + Register reg_tmp, + bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + assert_different_registers(receiver, mdp, reg_tmp); + + Label done; + + record_klass_in_profile_helper(receiver, mdp, reg_tmp, 0, done, is_virtual_call); + + bind (done); +} + +// Sets mdp, blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +void InterpreterMacroAssembler::profile_ret(Register mdp, Register return_bci) { + assert_different_registers(mdp, return_bci, Rtemp, R0, R1, R2, R3); + + if (ProfileInterpreter) { + Label profile_continue; + uint row; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the total ret count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); + + for (row = 0; row < RetData::row_limit(); row++) { + Label next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(mdp, in_bytes(RetData::bci_offset(row)), return_bci, + Rtemp, next_test); + + // return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)), Rtemp); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(RetData::bci_displacement_offset(row)), Rtemp); + b(profile_continue); + bind(next_test); + } + + update_mdp_for_ret(return_bci); + + bind(profile_continue); + } +} + + +// Sets mdp. +void InterpreterMacroAssembler::profile_null_seen(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind (profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { + assert_different_registers(mdp, Rtemp); + + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(mdp, count_offset, Rtemp, true); + + bind (profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass) +{ + assert_different_registers(mdp, klass, Rtemp); + + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + + // Record the object type. + record_klass_in_profile(klass, mdp, Rtemp, false); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_switch_default(Register mdp) { + assert_different_registers(mdp, Rtemp); + + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the default case count + increment_mdp_data_at(mdp, in_bytes(MultiBranchData::default_count_offset()), Rtemp); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, in_bytes(MultiBranchData::default_displacement_offset()), Rtemp); + + bind(profile_continue); + } +} + + +// Sets mdp. Blows reg_tmp1, reg_tmp2. Index could be the same as reg_tmp2. +void InterpreterMacroAssembler::profile_switch_case(Register mdp, Register index, Register reg_tmp1, Register reg_tmp2) { + assert_different_registers(mdp, reg_tmp1, reg_tmp2); + assert_different_registers(mdp, reg_tmp1, index); + + if (ProfileInterpreter) { + Label profile_continue; + + const int count_offset = in_bytes(MultiBranchData::case_array_offset()) + + in_bytes(MultiBranchData::relative_count_offset()); + + const int displacement_offset = in_bytes(MultiBranchData::case_array_offset()) + + in_bytes(MultiBranchData::relative_displacement_offset()); + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + logical_shift_left(reg_tmp1, index, exact_log2(in_bytes(MultiBranchData::per_case_size()))); + + // Update the case count + add(reg_tmp1, reg_tmp1, count_offset); + increment_mdp_data_at(Address(mdp, reg_tmp1), reg_tmp2); + + // The method data pointer needs to be updated. + add(reg_tmp1, reg_tmp1, displacement_offset - count_offset); + update_mdp_by_offset(mdp, reg_tmp1, reg_tmp2); + + bind (profile_continue); + } +} + + +void InterpreterMacroAssembler::byteswap_u32(Register r, Register rtmp1, Register rtmp2) { +#ifdef AARCH64 + rev_w(r, r); +#else + if (VM_Version::supports_rev()) { + rev(r, r); + } else { + eor(rtmp1, r, AsmOperand(r, ror, 16)); + mvn(rtmp2, 0x0000ff00); + andr(rtmp1, rtmp2, AsmOperand(rtmp1, lsr, 8)); + eor(r, rtmp1, AsmOperand(r, ror, 8)); + } +#endif // AARCH64 +} + + +void InterpreterMacroAssembler::inc_global_counter(address address_of_counter, int offset, Register tmp1, Register tmp2, bool avoid_overflow) { + const intx addr = (intx) (address_of_counter + offset); + + assert ((addr & 0x3) == 0, "address of counter should be aligned"); + const intx offset_mask = right_n_bits(AARCH64_ONLY(12 + 2) NOT_AARCH64(12)); + + const address base = (address) (addr & ~offset_mask); + const int offs = (int) (addr & offset_mask); + + const Register addr_base = tmp1; + const Register val = tmp2; + + mov_slow(addr_base, base); + ldr_s32(val, Address(addr_base, offs)); + + if (avoid_overflow) { + adds_32(val, val, 1); +#ifdef AARCH64 + Label L; + b(L, mi); + str_32(val, Address(addr_base, offs)); + bind(L); +#else + str(val, Address(addr_base, offs), pl); +#endif // AARCH64 + } else { + add_32(val, val, 1); + str_32(val, Address(addr_base, offs)); + } +} + +void InterpreterMacroAssembler::interp_verify_oop(Register reg, TosState state, const char *file, int line) { + if (state == atos) { MacroAssembler::_verify_oop(reg, "broken oop", file, line); } +} + +// Inline assembly for: +// +// if (thread is in interp_only_mode) { +// InterpreterRuntime::post_method_entry(); +// } +// if (DTraceMethodProbes) { +// SharedRuntime::dtrace_method_entry(method, receiver); +// } +// if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { +// SharedRuntime::rc_trace_method_entry(method, receiver); +// } + +void InterpreterMacroAssembler::notify_method_entry() { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (can_post_interpreter_events()) { + Label L; + + ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); + cbz(Rtemp, L); + + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry)); + + bind(L); + } + + // Note: Disable DTrace runtime check for now to eliminate overhead on each method entry + if (DTraceMethodProbes) { + Label Lcontinue; + + ldrb_global(Rtemp, (address)&DTraceMethodProbes); + cbz(Rtemp, Lcontinue); + + mov(R0, Rthread); + mov(R1, Rmethod); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), R0, R1); + + bind(Lcontinue); + } + // RedefineClasses() tracing support for obsolete method entry + if (log_is_enabled(Trace, redefine, class, obsolete)) { + mov(R0, Rthread); + mov(R1, Rmethod); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + R0, R1); + } +} + + +void InterpreterMacroAssembler::notify_method_exit( + TosState state, NotifyMethodExitMode mode, + bool native, Register result_lo, Register result_hi, FloatRegister result_fp) { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (mode == NotifyJVMTI && can_post_interpreter_events()) { + Label L; + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. If this + // is changed then the interpreter_frame_result implementation will + // need to be updated too. + + ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); + cbz(Rtemp, L); + + if (native) { + // For c++ and template interpreter push both result registers on the + // stack in native, we don't know the state. + // On AArch64 result registers are stored into the frame at known locations. + // See frame::interpreter_frame_result for code that gets the result values from here. + assert(result_lo != noreg, "result registers should be defined"); + +#ifdef AARCH64 + assert(result_hi == noreg, "result_hi is not used on AArch64"); + assert(result_fp != fnoreg, "FP result register must be defined"); + + str_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); + str(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); +#else + assert(result_hi != noreg, "result registers should be defined"); + +#ifdef __ABI_HARD__ + assert(result_fp != fnoreg, "FP result register must be defined"); + sub(SP, SP, 2 * wordSize); + fstd(result_fp, Address(SP)); +#endif // __ABI_HARD__ + + push(RegisterSet(result_lo) | RegisterSet(result_hi)); +#endif // AARCH64 + + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + +#ifdef AARCH64 + ldr_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); + ldr(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); +#else + pop(RegisterSet(result_lo) | RegisterSet(result_hi)); +#ifdef __ABI_HARD__ + fldd(result_fp, Address(SP)); + add(SP, SP, 2 * wordSize); +#endif // __ABI_HARD__ +#endif // AARCH64 + + } else { + // For the template interpreter, the value on tos is the size of the + // state. (c++ interpreter calls jvmti somewhere else). + push(state); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + pop(state); + } + + bind(L); + } + + // Note: Disable DTrace runtime check for now to eliminate overhead on each method exit + if (DTraceMethodProbes) { + Label Lcontinue; + + ldrb_global(Rtemp, (address)&DTraceMethodProbes); + cbz(Rtemp, Lcontinue); + + push(state); + + mov(R0, Rthread); + mov(R1, Rmethod); + + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), R0, R1); + + pop(state); + + bind(Lcontinue); + } +} + + +#ifndef PRODUCT + +void InterpreterMacroAssembler::trace_state(const char* msg) { + int push_size = save_caller_save_registers(); + + Label Lcontinue; + InlinedString Lmsg0("%s: FP=" INTPTR_FORMAT ", SP=" INTPTR_FORMAT "\n"); + InlinedString Lmsg(msg); + InlinedAddress Lprintf((address)printf); + + ldr_literal(R0, Lmsg0); + ldr_literal(R1, Lmsg); + mov(R2, FP); + add(R3, SP, push_size); // original SP (without saved registers) + ldr_literal(Rtemp, Lprintf); + call(Rtemp); + + b(Lcontinue); + + bind_literal(Lmsg0); + bind_literal(Lmsg); + bind_literal(Lprintf); + + + bind(Lcontinue); + + restore_caller_save_registers(); +} + +#endif + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, Address mask_addr, + Register scratch, Register scratch2, + AsmCondition cond, Label* where) { + // caution: scratch2 and base address of counter_addr can be the same + assert_different_registers(scratch, scratch2); + ldr_u32(scratch, counter_addr); + add(scratch, scratch, increment); + str_32(scratch, counter_addr); + +#ifdef AARCH64 + ldr_u32(scratch2, mask_addr); + ands_w(ZR, scratch, scratch2); +#else + ldr(scratch2, mask_addr); + andrs(scratch, scratch, scratch2); +#endif // AARCH64 + b(*where, cond); +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register Rcounters, + Label& skip) { + const Address method_counters(method, Method::method_counters_offset()); + Label has_counters; + + ldr(Rcounters, method_counters); + cbnz(Rcounters, has_counters); + +#ifdef AARCH64 + const Register tmp = Rcounters; + const int saved_regs_size = 20*wordSize; + + // Note: call_VM will cut SP according to Rstack_top value before call, and restore SP to + // extended_sp value from frame after the call. + // So make sure there is enough stack space to save registers and adjust Rstack_top accordingly. + { + Label enough_stack_space; + check_extended_sp(tmp); + sub(Rstack_top, Rstack_top, saved_regs_size); + cmp(SP, Rstack_top); + b(enough_stack_space, ls); + + align_reg(tmp, Rstack_top, StackAlignmentInBytes); + mov(SP, tmp); + str(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); + + bind(enough_stack_space); + check_stack_top(); + + int offset = 0; + stp(R0, R1, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R2, R3, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R4, R5, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R6, R7, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R8, R9, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R10, R11, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R12, R13, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R14, R15, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R16, R17, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R18, LR, Address(Rstack_top, offset)); offset += 2*wordSize; + assert (offset == saved_regs_size, "should be"); + } +#else + push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(R14)); +#endif // AARCH64 + + mov(R1, method); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), R1); + +#ifdef AARCH64 + { + int offset = 0; + ldp(R0, R1, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R2, R3, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R4, R5, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R6, R7, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R8, R9, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R10, R11, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R12, R13, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R14, R15, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R16, R17, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R18, LR, Address(Rstack_top, offset)); offset += 2*wordSize; + assert (offset == saved_regs_size, "should be"); + + add(Rstack_top, Rstack_top, saved_regs_size); + } +#else + pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(R14)); +#endif // AARCH64 + + ldr(Rcounters, method_counters); + cbz(Rcounters, skip); // No MethodCounters created, OutOfMemory + + bind(has_counters); +} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/interp_masm_arm.hpp 2016-12-02 11:21:04.674974248 -0500 @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_INTERP_MASM_ARM_HPP +#define CPU_ARM_VM_INTERP_MASM_ARM_HPP + +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/invocationCounter.hpp" +#include "runtime/frame.hpp" +#include "prims/jvmtiExport.hpp" + +// This file specializes the assember with interpreter-specific macros + + +class InterpreterMacroAssembler: public MacroAssembler { + + public: + + // allow JvmtiExport checks to be extended + bool can_force_early_return() { return JvmtiExport::can_force_early_return(); } + bool can_post_interpreter_events() { return JvmtiExport::can_post_interpreter_events(); } + bool can_pop_frame() { return JvmtiExport::can_pop_frame(); } + bool can_post_breakpoint() { return JvmtiExport::can_post_breakpoint(); } + bool can_post_field_access() { return JvmtiExport::can_post_field_access(); } + bool can_post_field_modification() { return JvmtiExport::can_post_field_modification(); } + // flags controlled by JVMTI settings + bool rewrite_frequent_pairs() { return RewriteFrequentPairs; } + + protected: + + // Template interpreter specific version of call_VM_helper + virtual void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); + + virtual void check_and_handle_popframe(); + virtual void check_and_handle_earlyret(); + + // base routine for all dispatches + typedef enum { DispatchDefault, DispatchNormal } DispatchTableMode; + void dispatch_base(TosState state, DispatchTableMode table_mode, bool verifyoop = true); + + public: + InterpreterMacroAssembler(CodeBuffer* code); + + // Interpreter-specific registers +#if defined(AARCH64) && defined(ASSERT) + +#define check_stack_top() _check_stack_top("invalid Rstack_top at " __FILE__ ":" XSTR(__LINE__)) +#define check_stack_top_on_expansion() _check_stack_top("invalid Rstack_top at " __FILE__ ":" XSTR(__LINE__), VerifyInterpreterStackTop) +#define check_extended_sp(tmp) _check_extended_sp(tmp, "SP does not match extended SP in frame at " __FILE__ ":" XSTR(__LINE__)) +#define check_no_cached_stack_top(tmp) _check_no_cached_stack_top(tmp, "stack_top is already cached in frame at " __FILE__ ":" XSTR(__LINE__)) + + void _check_stack_top(const char* msg, bool enabled = true) { + if (enabled) { + Label L; + cmp(SP, Rstack_top); + b(L, ls); + stop(msg); + bind(L); + } + } + + void _check_extended_sp(Register tmp, const char* msg) { + Label L; + ldr(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); + cmp(SP, tmp); + b(L, eq); + stop(msg); + bind(L); + } + + void _check_no_cached_stack_top(Register tmp, const char* msg) { + Label L; + ldr(tmp, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); + cbz(tmp, L); + stop(msg); + bind(L); + } + +#else + + inline void check_stack_top() {} + inline void check_stack_top_on_expansion() {} + inline void check_extended_sp(Register tmp) {} + inline void check_no_cached_stack_top(Register tmp) {} + +#endif // AARCH64 && ASSERT + + void save_bcp() { str(Rbcp, Address(FP, frame::interpreter_frame_bcp_offset * wordSize)); } + void restore_bcp() { ldr(Rbcp, Address(FP, frame::interpreter_frame_bcp_offset * wordSize)); } + void restore_locals() { ldr(Rlocals, Address(FP, frame::interpreter_frame_locals_offset * wordSize)); } + void restore_method() { ldr(Rmethod, Address(FP, frame::interpreter_frame_method_offset * wordSize)); } + void restore_dispatch(); + +#ifdef AARCH64 + void save_stack_top() { check_stack_top(); str(Rstack_top, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); } + void clear_cached_stack_top() { str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); } + void restore_stack_top() { ldr(Rstack_top, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize)); clear_cached_stack_top(); check_stack_top(); } + void cut_sp_before_call() { align_reg(SP, Rstack_top, StackAlignmentInBytes); } + void restore_sp_after_call(Register tmp) { ldr(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); mov(SP, tmp); } +#endif + + // Helpers for runtime call arguments/results + void get_const(Register reg) { ldr(reg, Address(Rmethod, Method::const_offset())); } + void get_constant_pool(Register reg) { get_const(reg); ldr(reg, Address(reg, ConstMethod::constants_offset())); } + void get_constant_pool_cache(Register reg) { get_constant_pool(reg); ldr(reg, Address(reg, ConstantPool::cache_offset_in_bytes())); } + void get_cpool_and_tags(Register cpool, Register tags) { get_constant_pool(cpool); ldr(tags, Address(cpool, ConstantPool::tags_offset_in_bytes())); } + + // Sets reg. Blows Rtemp. + void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); + + // Sets index. Blows reg_tmp. + void get_index_at_bcp(Register index, int bcp_offset, Register reg_tmp, size_t index_size = sizeof(u2)); + // Sets cache, index. + void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); + // Sets cache. Blows reg_tmp. + void get_cache_entry_pointer_at_bcp(Register cache, Register reg_tmp, int bcp_offset, size_t index_size = sizeof(u2)); + + // Load object from cpool->resolved_references(*bcp+1) + void load_resolved_reference_at_index(Register result, Register tmp); + + void store_check_part1(Register card_table_base); // Sets card_table_base register. + void store_check_part2(Register obj, Register card_table_base, Register tmp); + + void set_card(Register card_table_base, Address card_table_addr, Register tmp); + +#if INCLUDE_ALL_GCS + // G1 pre-barrier. + // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). + // If store_addr != noreg, then previous value is loaded from [store_addr]; + // in such case store_addr and new_val registers are preserved; + // otherwise pre_val register is preserved. + void g1_write_barrier_pre(Register store_addr, + Register new_val, + Register pre_val, + Register tmp1, + Register tmp2); + + // G1 post-barrier. + // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). + void g1_write_barrier_post(Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + Register tmp3); +#endif // INCLUDE_ALL_GCS + + void pop_ptr(Register r); + void pop_i(Register r = R0_tos); +#ifdef AARCH64 + void pop_l(Register r = R0_tos); +#else + void pop_l(Register lo = R0_tos_lo, Register hi = R1_tos_hi); +#endif + void pop_f(FloatRegister fd); + void pop_d(FloatRegister fd); + + void push_ptr(Register r); + void push_i(Register r = R0_tos); +#ifdef AARCH64 + void push_l(Register r = R0_tos); +#else + void push_l(Register lo = R0_tos_lo, Register hi = R1_tos_hi); +#endif + void push_f(); + void push_d(); + + // Transition vtos -> state. Blows R0, R1. Sets TOS cached value. + void pop(TosState state); + // Transition state -> vtos. Blows Rtemp. + void push(TosState state); + +#ifndef AARCH64 + // The following methods are overridden to allow overloaded calls to + // MacroAssembler::push/pop(Register) + // MacroAssembler::push/pop(RegisterSet) + // InterpreterMacroAssembler::push/pop(TosState) + void push(Register rd, AsmCondition cond = al) { MacroAssembler::push(rd, cond); } + void pop(Register rd, AsmCondition cond = al) { MacroAssembler::pop(rd, cond); } + + void push(RegisterSet reg_set, AsmCondition cond = al) { MacroAssembler::push(reg_set, cond); } + void pop(RegisterSet reg_set, AsmCondition cond = al) { MacroAssembler::pop(reg_set, cond); } + + // Converts return value in R0/R1 (interpreter calling conventions) to TOS cached value. + void convert_retval_to_tos(TosState state); + // Converts TOS cached value to return value in R0/R1 (according to interpreter calling conventions). + void convert_tos_to_retval(TosState state); +#endif + + // JVMTI ForceEarlyReturn support + void load_earlyret_value(TosState state); + + void jump_to_entry(address entry); + + // Blows Rtemp. + void empty_expression_stack() { + ldr(Rstack_top, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize)); + check_stack_top(); +#ifdef AARCH64 + clear_cached_stack_top(); +#else + // NULL last_sp until next java call + str(zero_register(Rtemp), Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); +#endif // AARCH64 + } + + // Helpers for swap and dup + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + + // Generate a subtype check: branch to not_subtype if sub_klass is + // not a subtype of super_klass. + // Profiling code for the subtype check failure (profile_typecheck_failed) + // should be explicitly generated by the caller in the not_subtype case. + // Blows Rtemp, tmp1, tmp2. + void gen_subtype_check(Register Rsub_klass, Register Rsuper_klass, + Label ¬_subtype, Register tmp1, Register tmp2); + + // Dispatching + void dispatch_prolog(TosState state, int step = 0); + void dispatch_epilog(TosState state, int step = 0); + void dispatch_only(TosState state); // dispatch by R3_bytecode + void dispatch_only_normal(TosState state); // dispatch normal table by R3_bytecode + void dispatch_only_noverify(TosState state); + void dispatch_next(TosState state, int step = 0); // load R3_bytecode from [Rbcp + step] and dispatch by R3_bytecode + + // jump to an invoked target + void prepare_to_jump_from_interpreted(); + void jump_from_interpreted(Register method); + + void narrow(Register result); + + // Returning from interpreted functions + // + // Removes the current activation (incl. unlocking of monitors) + // and sets up the return address. This code is also used for + // exception unwindwing. In that case, we do not want to throw + // IllegalMonitorStateExceptions, since that might get us into an + // infinite rethrow exception loop. + // Additionally this code is used for popFrame and earlyReturn. + // In popFrame case we want to skip throwing an exception, + // installing an exception, and notifying jvmdi. + // In earlyReturn case we only want to skip throwing an exception + // and installing an exception. + void remove_activation(TosState state, Register ret_addr, + bool throw_monitor_exception = true, + bool install_monitor_exception = true, + bool notify_jvmdi = true); + + // At certain points in the method invocation the monitor of + // synchronized methods hasn't been entered yet. + // To correctly handle exceptions at these points, we set the thread local + // variable _do_not_unlock_if_synchronized to true. The remove_activation will + // check this flag. + void set_do_not_unlock_if_synchronized(bool flag, Register tmp); + + // Debugging + void interp_verify_oop(Register reg, TosState state, const char* file, int line); // only if +VerifyOops && state == atos + + void verify_FPU(int stack_depth, TosState state = ftos) { + // No VFP state verification is required for ARM + } + + // Object locking + void lock_object (Register lock_reg); + void unlock_object(Register lock_reg); + + // Interpreter profiling operations + void set_method_data_pointer_for_bcp(); // Blows R0-R3/R0-R18, Rtemp, LR + void test_method_data_pointer(Register mdp, Label& zero_continue); + void verify_method_data_pointer(); + + void set_mdp_data_at(Register mdp_in, int offset, Register value); + + // Increments mdp data. Sets bumped_count register to adjusted counter. + void increment_mdp_data_at(Address data, Register bumped_count, bool decrement = false); + // Increments mdp data. Sets bumped_count register to adjusted counter. + void increment_mdp_data_at(Register mdp_in, int offset, Register bumped_count, bool decrement = false); + void increment_mask_and_jump(Address counter_addr, + int increment, Address mask_addr, + Register scratch, Register scratch2, + AsmCondition cond, Label* where); + void set_mdp_flag_at(Register mdp_in, int flag_constant); + + void test_mdp_data_at(Register mdp_in, int offset, Register value, + Register test_value_out, + Label& not_equal_continue); + + void record_klass_in_profile(Register receiver, Register mdp, + Register reg_tmp, bool is_virtual_call); + void record_klass_in_profile_helper(Register receiver, Register mdp, + Register reg_tmp, + int start_row, Label& done, bool is_virtual_call); + + void update_mdp_by_offset(Register mdp_in, int offset_of_offset, Register reg_tmp); + void update_mdp_by_offset(Register mdp_in, Register reg_offset, Register reg_tmp); + void update_mdp_by_constant(Register mdp_in, int constant); + void update_mdp_for_ret(Register return_bci); // Blows R0-R3/R0-R18, Rtemp, LR + + void profile_taken_branch(Register mdp, Register bumped_count); // Sets mdp, bumped_count registers, blows Rtemp. + void profile_not_taken_branch(Register mdp); // Sets mdp, blows Rtemp. + + void profile_call(Register mdp); // Sets mdp, blows Rtemp. + void profile_final_call(Register mdp); // Sets mdp, blows Rtemp. + void profile_virtual_call(Register mdp, Register receiver, // Sets mdp, blows Rtemp. + bool receiver_can_be_null = false); + void profile_ret(Register mdp, Register return_bci); // Sets mdp, blows R0-R3/R0-R18, Rtemp, LR + void profile_null_seen(Register mdp); // Sets mdp. + void profile_typecheck(Register mdp, Register klass); // Sets mdp, blows Rtemp. + + void profile_typecheck_failed(Register mdp); // Sets mdp, blows Rtemp. + void profile_switch_default(Register mdp); // Sets mdp, blows Rtemp. + + // Sets mdp. Blows reg_tmp1, reg_tmp2. Index could be the same as reg_tmp2. + void profile_switch_case(Register mdp, Register index, Register reg_tmp1, Register reg_tmp2); + + void byteswap_u32(Register r, Register rtmp1, Register rtmp2); + + void inc_global_counter(address address_of_counter, int offset_in_bytes, Register tmp1, Register tmp2, bool avoid_overflow); + + typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + + // support for jvmti + void notify_method_entry(); + void notify_method_exit(TosState state, NotifyMethodExitMode mode, + bool native = false, Register result_lo = noreg, Register result_hi = noreg, FloatRegister result_fp = fnoreg); + + void trace_state(const char* msg) PRODUCT_RETURN; + + void get_method_counters(Register method, Register Rcounters, Label& skip); +}; + +#endif // CPU_ARM_VM_INTERP_MASM_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/interpreterRT_arm.cpp 2016-12-02 11:21:09.991275732 -0500 @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/universe.inline.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/signature.hpp" + +#define __ _masm-> + +#ifdef SHARING_FAST_NATIVE_FINGERPRINTS +// mapping from SignatureIterator param to (common) type of parsing +static const u1 shared_type[] = { + (u1) SignatureIterator::int_parm, // bool + (u1) SignatureIterator::int_parm, // byte + (u1) SignatureIterator::int_parm, // char + (u1) SignatureIterator::int_parm, // short + (u1) SignatureIterator::int_parm, // int + (u1) SignatureIterator::long_parm, // long +#ifndef __ABI_HARD__ + (u1) SignatureIterator::int_parm, // float, passed as int + (u1) SignatureIterator::long_parm, // double, passed as long +#else + (u1) SignatureIterator::float_parm, // float + (u1) SignatureIterator::double_parm, // double +#endif + (u1) SignatureIterator::obj_parm, // obj + (u1) SignatureIterator::done_parm // done +}; + +uint64_t InterpreterRuntime::normalize_fast_native_fingerprint(uint64_t fingerprint) { + if (fingerprint == UCONST64(-1)) { + // special signature used when the argument list cannot be encoded in a 64 bits value + return fingerprint; + } + int shift = SignatureIterator::static_feature_size; + uint64_t result = fingerprint & ((1 << shift) - 1); + fingerprint >>= shift; + + BasicType ret_type = (BasicType) (fingerprint & SignatureIterator::result_feature_mask); + // For ARM, the fast signature handler only needs to know whether + // the return value must be unboxed. T_OBJECT and T_ARRAY need not + // be distinguished from each other and all other return values + // behave like integers with respect to the handler. + bool unbox = (ret_type == T_OBJECT) || (ret_type == T_ARRAY); + if (unbox) { + ret_type = T_OBJECT; + } else { + ret_type = T_INT; + } + result |= ((uint64_t) ret_type) << shift; + shift += SignatureIterator::result_feature_size; + fingerprint >>= SignatureIterator::result_feature_size; + + while (true) { + uint32_t type = (uint32_t) (fingerprint & SignatureIterator::parameter_feature_mask); + if (type == SignatureIterator::done_parm) { + result |= ((uint64_t) SignatureIterator::done_parm) << shift; + return result; + } + assert((type >= SignatureIterator::bool_parm) && (type <= SignatureIterator::obj_parm), "check fingerprint encoding"); + int shared = shared_type[type - SignatureIterator::bool_parm]; + result |= ((uint64_t) shared) << shift; + shift += SignatureIterator::parameter_feature_size; + fingerprint >>= SignatureIterator::parameter_feature_size; + } +} +#endif // SHARING_FAST_NATIVE_FINGERPRINTS + +// Implementation of SignatureHandlerGenerator +void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { + if (_ireg < GPR_PARAMS) { + Register dst = as_Register(_ireg); + __ ldr_s32(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + _ireg++; + } else { + __ ldr_s32(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + __ str_32(Rtemp, Address(SP, _abi_offset * wordSize)); + _abi_offset++; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { +#ifdef AARCH64 + if (_ireg < GPR_PARAMS) { + Register dst = as_Register(_ireg); + __ ldr(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1))); + _ireg++; + } else { + __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1))); + __ str(Rtemp, Address(SP, _abi_offset * wordSize)); + _abi_offset++; + } +#else + if (_ireg <= 2) { +#if (ALIGN_WIDE_ARGUMENTS == 1) + if ((_ireg & 1) != 0) { + // 64-bit values should be 8-byte aligned + _ireg++; + } +#endif + Register dst1 = as_Register(_ireg); + Register dst2 = as_Register(_ireg+1); + __ ldr(dst1, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()+1))); + __ ldr(dst2, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + _ireg += 2; +#if (ALIGN_WIDE_ARGUMENTS == 0) + } else if (_ireg == 3) { + // uses R3 + one stack slot + Register dst1 = as_Register(_ireg); + __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + __ ldr(dst1, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()+1))); + __ str(Rtemp, Address(SP, _abi_offset * wordSize)); + _ireg += 1; + _abi_offset += 1; +#endif + } else { +#if (ALIGN_WIDE_ARGUMENTS == 1) + if(_abi_offset & 1) _abi_offset++; +#endif + __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()+1))); + __ str(Rtemp, Address(SP, (_abi_offset) * wordSize)); + __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + __ str(Rtemp, Address(SP, (_abi_offset+1) * wordSize)); + _abi_offset += 2; + _ireg = 4; + } +#endif // AARCH64 +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { +#ifdef AARCH64 + __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + __ cmp(Rtemp, 0); + __ sub(Rtemp, Rlocals, -Interpreter::local_offset_in_bytes(offset())); + if (_ireg < GPR_PARAMS) { + Register dst = as_Register(_ireg); + __ csel(dst, ZR, Rtemp, eq); + _ireg++; + } else { + __ csel(Rtemp, ZR, Rtemp, eq); + __ str(Rtemp, Address(SP, _abi_offset * wordSize)); + _abi_offset++; + } +#else + if (_ireg < 4) { + Register dst = as_Register(_ireg); + __ ldr(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + __ cmp(dst, 0); + __ sub(dst, Rlocals, -Interpreter::local_offset_in_bytes(offset()), ne); + _ireg++; + } else { + __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + __ cmp(Rtemp, 0); + __ sub(Rtemp, Rlocals, -Interpreter::local_offset_in_bytes(offset()), ne); + __ str(Rtemp, Address(SP, _abi_offset * wordSize)); + _abi_offset++; + } +#endif // AARCH64 +} + +#ifndef __ABI_HARD__ +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + if (_ireg < 4) { + Register dst = as_Register(_ireg); + __ ldr(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + _ireg++; + } else { + __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + __ str(Rtemp, Address(SP, _abi_offset * wordSize)); + _abi_offset++; + } +} + +#else +#ifndef __SOFTFP__ +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { +#ifdef AARCH64 + if (_freg < FPR_PARAMS) { + FloatRegister dst = as_FloatRegister(_freg); + __ ldr_s(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + _freg++; + } else { + __ ldr_u32(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + __ str_32(Rtemp, Address(SP, _abi_offset * wordSize)); + _abi_offset++; + } +#else + if((_fp_slot < 16) || (_single_fpr_slot & 1)) { + if ((_single_fpr_slot & 1) == 0) { + _single_fpr_slot = _fp_slot; + _fp_slot += 2; + } + __ flds(as_FloatRegister(_single_fpr_slot), Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + _single_fpr_slot++; + } else { + __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + __ str(Rtemp, Address(SP, _abi_offset * wordSize)); + _abi_offset++; + } +#endif // AARCH64 +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { +#ifdef AARCH64 + if (_freg < FPR_PARAMS) { + FloatRegister dst = as_FloatRegister(_freg); + __ ldr_d(dst, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1))); + _freg++; + } else { + __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset() + 1))); + __ str(Rtemp, Address(SP, _abi_offset * wordSize)); + _abi_offset++; + } +#else + if(_fp_slot <= 14) { + __ fldd(as_FloatRegister(_fp_slot), Address(Rlocals, Interpreter::local_offset_in_bytes(offset()+1))); + _fp_slot += 2; + } else { + __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()+1))); + __ str(Rtemp, Address(SP, (_abi_offset) * wordSize)); + __ ldr(Rtemp, Address(Rlocals, Interpreter::local_offset_in_bytes(offset()))); + __ str(Rtemp, Address(SP, (_abi_offset+1) * wordSize)); + _abi_offset += 2; + _single_fpr_slot = 16; + } +#endif // AARCH64 +} +#endif // __SOFTFP__ +#endif // __ABI_HARD__ + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + iterate(fingerprint); + + BasicType result_type = SignatureIterator::return_type(fingerprint); + + address result_handler = Interpreter::result_handler(result_type); + +#ifdef AARCH64 + __ mov_slow(R0, (address)result_handler); +#else + // Check that result handlers are not real handler on ARM (0 or -1). + // This ensures the signature handlers do not need symbolic information. + assert((result_handler == NULL)||(result_handler==(address)0xffffffff),""); + __ mov_slow(R0, (intptr_t)result_handler); +#endif + + __ ret(); +} + + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) {} + +class SlowSignatureHandler: public NativeSignatureIterator { + private: + address _from; + intptr_t* _to; + +#ifndef __ABI_HARD__ + virtual void pass_int() { + *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + } + + virtual void pass_float() { + *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + } + + virtual void pass_long() { +#if (ALIGN_WIDE_ARGUMENTS == 1) + if (((intptr_t)_to & 7) != 0) { + // 64-bit values should be 8-byte aligned + _to++; + } +#endif + _to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _to[1] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0)); + _to += 2; + _from -= 2*Interpreter::stackElementSize; + } + + virtual void pass_object() { + intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0)); + *_to++ = (*(intptr_t*)from_addr == 0) ? (intptr_t)NULL : from_addr; + _from -= Interpreter::stackElementSize; + } + +#else + + intptr_t* _toFP; + intptr_t* _toGP; + int _last_gp; + int _last_fp; +#ifndef AARCH64 + int _last_single_fp; +#endif // !AARCH64 + + virtual void pass_int() { + if(_last_gp < GPR_PARAMS) { + _toGP[_last_gp++] = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + } else { + *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + } + _from -= Interpreter::stackElementSize; + } + + virtual void pass_long() { +#ifdef AARCH64 + if(_last_gp < GPR_PARAMS) { + _toGP[_last_gp++] = *(jlong *)(_from+Interpreter::local_offset_in_bytes(1)); + } else { + *_to++ = *(jlong *)(_from+Interpreter::local_offset_in_bytes(1)); + } +#else + assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments"); + if (_last_gp <= 2) { + if(_last_gp & 1) _last_gp++; + _toGP[_last_gp++] = *(jint *)(_from+Interpreter::local_offset_in_bytes(1)); + _toGP[_last_gp++] = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + } else { + if (((intptr_t)_to & 7) != 0) { + // 64-bit values should be 8-byte aligned + _to++; + } + _to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _to[1] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0)); + _to += 2; + _last_gp = 4; + } +#endif // AARCH64 + _from -= 2*Interpreter::stackElementSize; + } + + virtual void pass_object() { + intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0)); + if(_last_gp < GPR_PARAMS) { + _toGP[_last_gp++] = (*(intptr_t*)from_addr == 0) ? NULL : from_addr; + } else { + *_to++ = (*(intptr_t*)from_addr == 0) ? NULL : from_addr; + } + _from -= Interpreter::stackElementSize; + } + + virtual void pass_float() { +#ifdef AARCH64 + if(_last_fp < FPR_PARAMS) { + _toFP[_last_fp++] = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + } else { + *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + } +#else + if((_last_fp < 16) || (_last_single_fp & 1)) { + if ((_last_single_fp & 1) == 0) { + _last_single_fp = _last_fp; + _last_fp += 2; + } + + _toFP[_last_single_fp++] = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + } else { + *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + } +#endif // AARCH64 + _from -= Interpreter::stackElementSize; + } + + virtual void pass_double() { +#ifdef AARCH64 + if(_last_fp < FPR_PARAMS) { + _toFP[_last_fp++] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + } else { + *_to++ = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + } +#else + assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments"); + if(_last_fp <= 14) { + _toFP[_last_fp++] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _toFP[_last_fp++] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0)); + } else { + if (((intptr_t)_to & 7) != 0) { // 64-bit values should be 8-byte aligned + _to++; + } + _to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _to[1] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0)); + _to += 2; + _last_single_fp = 16; + } +#endif // AARCH64 + _from -= 2*Interpreter::stackElementSize; + } + +#endif // !__ABI_HARD__ + + public: + SlowSignatureHandler(methodHandle method, address from, intptr_t* to) : + NativeSignatureIterator(method) { + _from = from; + +#ifdef __ABI_HARD__ + _toGP = to; + _toFP = _toGP + GPR_PARAMS; + _to = _toFP + AARCH64_ONLY(FPR_PARAMS) NOT_AARCH64(8*2); + _last_gp = (is_static() ? 2 : 1); + _last_fp = 0; +#ifndef AARCH64 + _last_single_fp = 0; +#endif // !AARCH64 +#else + _to = to + (is_static() ? 2 : 1); +#endif // __ABI_HARD__ + } +}; + +IRT_ENTRY(address, InterpreterRuntime::slow_signature_handler(JavaThread* thread, Method* method, intptr_t* from, intptr_t* to)) + methodHandle m(thread, (Method*)method); + assert(m->is_native(), "sanity check"); + SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); + return Interpreter::result_handler(m->result_type()); +IRT_END --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/interpreterRT_arm.hpp 2016-12-02 11:21:15.307577211 -0500 @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_INTERPRETERRT_ARM_HPP +#define CPU_ARM_VM_INTERPRETERRT_ARM_HPP + +#include "memory/allocation.hpp" + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + int _abi_offset; + int _ireg; + +#ifdef __ABI_HARD__ +#ifdef AARCH64 + int _freg; +#else + int _fp_slot; // number of FPR's with arguments loaded + int _single_fpr_slot; +#endif +#endif + + void move(int from_offset, int to_offset); + void box(int from_offset, int to_offset); + + void pass_int(); + void pass_long(); + void pass_float(); + void pass_object(); +#ifdef __ABI_HARD__ + void pass_double(); +#endif + public: + // Creation + SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) { + _masm = new MacroAssembler(buffer); + _abi_offset = 0; + _ireg = is_static() ? 2 : 1; +#ifdef __ABI_HARD__ +#ifdef AARCH64 + _freg = 0; +#else + _fp_slot = 0; + _single_fpr_slot = 0; +#endif +#endif + } + + // Code generation + void generate(uint64_t fingerprint); + +}; + +#ifndef AARCH64 +// ARM provides a normalized fingerprint for native calls (to increase +// sharing). See normalize_fast_native_fingerprint +#define SHARING_FAST_NATIVE_FINGERPRINTS +#endif + +#endif // CPU_ARM_VM_INTERPRETERRT_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/javaFrameAnchor_arm.hpp 2016-12-02 11:21:20.931896161 -0500 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_JAVAFRAMEANCHOR_ARM_HPP +#define CPU_ARM_VM_JAVAFRAMEANCHOR_ARM_HPP + +private: + + // FP value associated with _last_Java_sp: + intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + +public: + // Each arch must define reset, save, restore + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls) + + void clear(void) { + // clearing _last_Java_sp must be first + _last_Java_sp = NULL; + // fence? + _last_Java_fp = NULL; + _last_Java_pc = NULL; + } + + void copy(JavaFrameAnchor* src) { + // In order to make sure the transition state is valid for "this" + // We must clear _last_Java_sp before copying the rest of the new data + // + // Hack Alert: Temporary bugfix for 4717480/4721647 + // To act like previous version (pd_cache_state) don't NULL _last_Java_sp + // unless the value is changing + // + if (_last_Java_sp != src->_last_Java_sp) + _last_Java_sp = NULL; + + _last_Java_fp = src->_last_Java_fp; + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if has_last_frame() is true + _last_Java_sp = src->_last_Java_sp; + } + + // Always walkable + bool walkable(void) { return true; } + // Never any thing to do since we are always walkable and can find address of return addresses + void make_walkable(JavaThread* thread) { } + + intptr_t* last_Java_sp(void) const { return _last_Java_sp; } + + address last_Java_pc(void) { return _last_Java_pc; } + +private: + + static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } + +public: + + void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + // Assert (last_Java_sp == NULL || fp == NULL) + void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } + +#endif // CPU_ARM_VM_JAVAFRAMEANCHOR_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/jniFastGetField_arm.cpp 2016-12-02 11:21:27.296257080 -0500 @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "assembler_arm.inline.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/safepoint.hpp" + +#define __ masm-> + +#define BUFFER_SIZE 96 + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + const char* name = NULL; + address slow_case_addr = NULL; + switch (type) { + case T_BOOLEAN: + name = "jni_fast_GetBooleanField"; + slow_case_addr = jni_GetBooleanField_addr(); + break; + case T_BYTE: + name = "jni_fast_GetByteField"; + slow_case_addr = jni_GetByteField_addr(); + break; + case T_CHAR: + name = "jni_fast_GetCharField"; + slow_case_addr = jni_GetCharField_addr(); + break; + case T_SHORT: + name = "jni_fast_GetShortField"; + slow_case_addr = jni_GetShortField_addr(); + break; + case T_INT: + name = "jni_fast_GetIntField"; + slow_case_addr = jni_GetIntField_addr(); + break; + case T_LONG: + name = "jni_fast_GetLongField"; + slow_case_addr = jni_GetLongField_addr(); + break; + case T_FLOAT: + name = "jni_fast_GetFloatField"; + slow_case_addr = jni_GetFloatField_addr(); + break; + case T_DOUBLE: + name = "jni_fast_GetDoubleField"; + slow_case_addr = jni_GetDoubleField_addr(); + break; + default: + ShouldNotReachHere(); + } + + // R0 - jni env + // R1 - object handle + // R2 - jfieldID + + const Register Rsafepoint_counter_addr = AARCH64_ONLY(R4) NOT_AARCH64(R3); + const Register Robj = AARCH64_ONLY(R5) NOT_AARCH64(R1); + const Register Rres = AARCH64_ONLY(R6) NOT_AARCH64(R0); +#ifndef AARCH64 + const Register Rres_hi = R1; +#endif // !AARCH64 + const Register Rsafept_cnt = Rtemp; + const Register Rsafept_cnt2 = Rsafepoint_counter_addr; + const Register Rtmp1 = AARCH64_ONLY(R7) NOT_AARCH64(R3); // same as Rsafepoint_counter_addr on 32-bit ARM + const Register Rtmp2 = AARCH64_ONLY(R8) NOT_AARCH64(R2); // same as jfieldID on 32-bit ARM + +#ifdef AARCH64 + assert_different_registers(Rsafepoint_counter_addr, Rsafept_cnt, Robj, Rres, Rtmp1, Rtmp2, R0, R1, R2, LR); + assert_different_registers(Rsafept_cnt2, Rsafept_cnt, Rres, R0, R1, R2, LR); +#else + assert_different_registers(Rsafepoint_counter_addr, Rsafept_cnt, Robj, Rres, LR); + assert_different_registers(Rsafept_cnt, R1, R2, Rtmp1, LR); + assert_different_registers(Rsafepoint_counter_addr, Rsafept_cnt, Rres, Rres_hi, Rtmp2, LR); + assert_different_registers(Rsafept_cnt2, Rsafept_cnt, Rres, Rres_hi, LR); +#endif // AARCH64 + + address fast_entry; + + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + fast_entry = __ pc(); + + // Safepoint check + InlinedAddress safepoint_counter_addr(SafepointSynchronize::safepoint_counter_addr()); + Label slow_case; + __ ldr_literal(Rsafepoint_counter_addr, safepoint_counter_addr); + +#ifndef AARCH64 + __ push(RegisterSet(R0, R3)); // save incoming arguments for slow case +#endif // !AARCH64 + + __ ldr_s32(Rsafept_cnt, Address(Rsafepoint_counter_addr)); + __ tbnz(Rsafept_cnt, 0, slow_case); + + if (os::is_MP()) { + // Address dependency restricts memory access ordering. It's cheaper than explicit LoadLoad barrier + __ andr(Rtmp1, Rsafept_cnt, (unsigned)1); + __ ldr(Robj, Address(R1, Rtmp1)); + } else { + __ ldr(Robj, Address(R1)); + } + +#ifdef AARCH64 + __ add(Robj, Robj, AsmOperand(R2, lsr, 2)); + Address field_addr = Address(Robj); +#else + Address field_addr; + if (type != T_BOOLEAN + && type != T_INT +#ifndef __ABI_HARD__ + && type != T_FLOAT +#endif // !__ABI_HARD__ + ) { + // Only ldr and ldrb support embedded shift, other loads do not + __ add(Robj, Robj, AsmOperand(R2, lsr, 2)); + field_addr = Address(Robj); + } else { + field_addr = Address(Robj, R2, lsr, 2); + } +#endif // AARCH64 + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); + + switch (type) { + case T_BOOLEAN: + __ ldrb(Rres, field_addr); + break; + case T_BYTE: + __ ldrsb(Rres, field_addr); + break; + case T_CHAR: + __ ldrh(Rres, field_addr); + break; + case T_SHORT: + __ ldrsh(Rres, field_addr); + break; + case T_INT: +#ifndef __ABI_HARD__ + case T_FLOAT: +#endif + __ ldr_s32(Rres, field_addr); + break; + case T_LONG: +#ifndef __ABI_HARD__ + case T_DOUBLE: +#endif +#ifdef AARCH64 + __ ldr(Rres, field_addr); +#else + // Safe to use ldrd since long and double fields are 8-byte aligned + __ ldrd(Rres, field_addr); +#endif // AARCH64 + break; +#ifdef __ABI_HARD__ + case T_FLOAT: + __ ldr_float(S0, field_addr); + break; + case T_DOUBLE: + __ ldr_double(D0, field_addr); + break; +#endif // __ABI_HARD__ + default: + ShouldNotReachHere(); + } + + if(os::is_MP()) { + // Address dependency restricts memory access ordering. It's cheaper than explicit LoadLoad barrier +#if defined(__ABI_HARD__) && !defined(AARCH64) + if (type == T_FLOAT || type == T_DOUBLE) { + __ ldr_literal(Rsafepoint_counter_addr, safepoint_counter_addr); + __ fmrrd(Rres, Rres_hi, D0); + __ eor(Rtmp2, Rres, Rres); + __ ldr_s32(Rsafept_cnt2, Address(Rsafepoint_counter_addr, Rtmp2)); + } else +#endif // __ABI_HARD__ && !AARCH64 + { +#ifndef AARCH64 + __ ldr_literal(Rsafepoint_counter_addr, safepoint_counter_addr); +#endif // !AARCH64 + __ eor(Rtmp2, Rres, Rres); + __ ldr_s32(Rsafept_cnt2, Address(Rsafepoint_counter_addr, Rtmp2)); + } + } else { + __ ldr_s32(Rsafept_cnt2, Address(Rsafepoint_counter_addr)); + } + __ cmp(Rsafept_cnt2, Rsafept_cnt); +#ifdef AARCH64 + __ b(slow_case, ne); + __ mov(R0, Rres); + __ ret(); +#else + // discards saved R0 R1 R2 R3 + __ add(SP, SP, 4 * wordSize, eq); + __ bx(LR, eq); +#endif // AARCH64 + + slowcase_entry_pclist[count++] = __ pc(); + + __ bind(slow_case); +#ifndef AARCH64 + __ pop(RegisterSet(R0, R3)); +#endif // !AARCH64 + // thumb mode switch handled by MacroAssembler::jump if needed + __ jump(slow_case_addr, relocInfo::none, Rtemp); + + __ bind_literal(safepoint_counter_addr); + + __ flush(); + + guarantee((__ pc() - fast_entry) <= BUFFER_SIZE, "BUFFER_SIZE too small"); + + return fast_entry; +} + +address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { + ShouldNotReachHere(); + return NULL; +} + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + return generate_fast_get_int_field0(T_LONG); +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_int_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_int_field0(T_DOUBLE); +} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/jniTypes_arm.hpp 2016-12-02 11:21:33.312598258 -0500 @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_JNITYPES_ARM_HPP +#define CPU_ARM_VM_JNITYPES_ARM_HPP + +#include "memory/allocation.hpp" +#include "oops/oop.hpp" +#include "prims/jni.h" + +// This file holds platform-dependent routines used to write primitive jni +// types to the array of arguments passed into JavaCalls::call + +class JNITypes : AllStatic { + // These functions write a java primitive type (in native format) + // to a java stack slot array to be passed as an argument to JavaCalls:calls. + // I.e., they are functionally 'push' operations if they have a 'pos' + // formal parameter. Note that jlong's and jdouble's are written + // _in reverse_ of the order in which they appear in the interpreter + // stack. This is because call stubs (see stubGenerator_arm.cpp) + // reverse the argument list constructed by JavaCallArguments (see + // javaCalls.hpp). + +private: + +#ifndef AARCH64 + // 32bit Helper routines. + static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; + *(jint *)(to ) = from[0]; } + static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } +#endif + +public: + // Ints are stored in native format in one JavaCallArgument slot at *to. + static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } + static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } + static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } + +#ifdef AARCH64 + // Longs are stored in native format in one JavaCallArgument slot at *(to+1). + static inline void put_long(jlong from, intptr_t *to) { *(jlong *)(to + 1 + 0) = from; } + static inline void put_long(jlong from, intptr_t *to, int& pos) { *(jlong *)(to + 1 + pos) = from; pos += 2; } + static inline void put_long(jlong *from, intptr_t *to, int& pos) { *(jlong *)(to + 1 + pos) = *from; pos += 2; } +#else + // Longs are stored in big-endian word format in two JavaCallArgument slots at *to. + // The high half is in *to and the low half in *(to+1). + static inline void put_long(jlong from, intptr_t *to) { put_int2r((jint *)&from, to); } + static inline void put_long(jlong from, intptr_t *to, int& pos) { put_int2r((jint *)&from, to, pos); } + static inline void put_long(jlong *from, intptr_t *to, int& pos) { put_int2r((jint *) from, to, pos); } +#endif + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } + static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } + static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + +#ifdef AARCH64 + // Doubles are stored in native word format in one JavaCallArgument slot at *(to+1). + static inline void put_double(jdouble from, intptr_t *to) { *(jdouble *)(to + 1 + 0) = from; } + static inline void put_double(jdouble from, intptr_t *to, int& pos) { *(jdouble *)(to + 1 + pos) = from; pos += 2; } + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { *(jdouble *)(to + 1 + pos) = *from; pos += 2; } +#else + // Doubles are stored in big-endian word format in two JavaCallArgument slots at *to. + // The high half is in *to and the low half in *(to+1). + static inline void put_double(jdouble from, intptr_t *to) { put_int2r((jint *)&from, to); } + static inline void put_double(jdouble from, intptr_t *to, int& pos) { put_int2r((jint *)&from, to, pos); } + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { put_int2r((jint *) from, to, pos); } +#endif + +}; + +#endif // CPU_ARM_VM_JNITYPES_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/jni_arm.h 2016-12-02 11:21:40.565009537 -0500 @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef _JAVASOFT_JNI_MD_H_ +#define _JAVASOFT_JNI_MD_H_ + +// Note: please do not change these without also changing jni_md.h in the JDK +// repository +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility) + #define JNIEXPORT __attribute__((externally_visible,visibility("default"))) + #define JNIIMPORT __attribute__((externally_visible,visibility("default"))) +#else + #define JNIEXPORT + #define JNIIMPORT +#endif + +#define JNICALL + +typedef int jint; +#if defined(_LP64) + typedef long jlong; +#else + typedef long long jlong; +#endif +typedef signed char jbyte; + +#endif /* !_JAVASOFT_JNI_MD_H_ */ --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/jvmciCodeInstaller_arm.cpp 2016-12-02 11:21:45.689300130 -0500 @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "jvmci/jvmciCodeInstaller.hpp" +#include "jvmci/jvmciRuntime.hpp" +#include "jvmci/jvmciCompilerToVM.hpp" +#include "jvmci/jvmciJavaClasses.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_arm.inline.hpp" + +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) { + Unimplemented(); + return 0; +} + +void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) { + Unimplemented(); +} + +void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) { + Unimplemented(); +} + +void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) { + Unimplemented(); +} + +// convert JVMCI register indices (as used in oop maps) to HotSpot registers +VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) { + return NULL; +} + +bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) { + return false; +} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/macroAssembler_arm.cpp 2016-12-02 11:21:50.529574616 -0500 @@ -0,0 +1,3120 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "ci/ciEnv.hpp" +#include "code/nativeInst.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/cardTableModRefBS.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/macros.hpp" +#if INCLUDE_ALL_GCS +#include "gc/g1/g1CollectedHeap.inline.hpp" +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#include "gc/g1/heapRegion.hpp" +#endif + +// Implementation of AddressLiteral + +void AddressLiteral::set_rspec(relocInfo::relocType rtype) { + switch (rtype) { + case relocInfo::oop_type: + // Oops are a special case. Normally they would be their own section + // but in cases like icBuffer they are literals in the code stream that + // we don't have a section for. We use none so that we get a literal address + // which is always patchable. + break; + case relocInfo::external_word_type: + _rspec = external_word_Relocation::spec(_target); + break; + case relocInfo::internal_word_type: + _rspec = internal_word_Relocation::spec(_target); + break; + case relocInfo::opt_virtual_call_type: + _rspec = opt_virtual_call_Relocation::spec(); + break; + case relocInfo::static_call_type: + _rspec = static_call_Relocation::spec(); + break; + case relocInfo::runtime_call_type: + _rspec = runtime_call_Relocation::spec(); + break; + case relocInfo::poll_type: + case relocInfo::poll_return_type: + _rspec = Relocation::spec_simple(rtype); + break; + case relocInfo::none: + break; + default: + ShouldNotReachHere(); + break; + } +} + +// Initially added to the Assembler interface as a pure virtual: +// RegisterConstant delayed_value(..) +// for: +// 6812678 macro assembler needs delayed binding of a few constants (for 6655638) +// this was subsequently modified to its present name and return type +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + ShouldNotReachHere(); + return RegisterOrConstant(-1); +} + + +#ifdef AARCH64 +// Note: ARM32 version is OS dependent +void MacroAssembler::breakpoint(AsmCondition cond) { + if (cond == al) { + brk(); + } else { + Label L; + b(L, inverse(cond)); + brk(); + bind(L); + } +} +#endif // AARCH64 + + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + Register vtable_index, + Register method_result) { + const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes(); + assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord)); + ldr(method_result, Address(recv_klass, base_offset)); +} + + +// Simplified, combined version, good for typical uses. +// Falls through on failure. +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp_reg2, + Register temp_reg3, + Label& L_success) { + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL); + bind(L_failure); +}; + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp_reg2, + Label* L_success, + Label* L_failure, + Label* L_slow_path) { + + assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg); + const Register super_check_offset = temp_reg2; + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + Address super_check_offset_addr(super_klass, sco_offset); + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + cmp(sub_klass, super_klass); + b(*L_success, eq); + + // Check the supertype display: + ldr_u32(super_check_offset, super_check_offset_addr); + + Address super_check_addr(sub_klass, super_check_offset); + ldr(temp_reg, super_check_addr); + cmp(super_klass, temp_reg); // load displayed supertype + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + b(*L_success, eq); + cmp_32(super_check_offset, sc_offset); + if (L_failure == &L_fallthrough) { + b(*L_slow_path, eq); + } else { + b(*L_failure, ne); + if (L_slow_path != &L_fallthrough) { + b(*L_slow_path); + } + } + + bind(L_fallthrough); +} + + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Register temp3_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { +#ifdef AARCH64 + NOT_IMPLEMENTED(); +#else + // Note: if used by code that expects a register to be 0 on success, + // this register must be temp_reg and set_cond_codes must be true + + Register saved_reg = noreg; + + // get additional tmp registers + if (temp3_reg == noreg) { + saved_reg = temp3_reg = LR; + push(saved_reg); + } + + assert(temp2_reg != noreg, "need all the temporary registers"); + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg); + + Register cmp_temp = temp_reg; + Register scan_temp = temp3_reg; + Register count_temp = temp2_reg; + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + +#ifndef PRODUCT + inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp); +#endif + + // We will consult the secondary-super array. + ldr(scan_temp, Address(sub_klass, ss_offset)); + + assert(! UseCompressedOops, "search_key must be the compressed super_klass"); + // else search_key is the + Register search_key = super_klass; + + // Load the array length. + ldr(count_temp, Address(scan_temp, Array::length_offset_in_bytes())); + add(scan_temp, scan_temp, Array::base_offset_in_bytes()); + + add(count_temp, count_temp, 1); + + Label L_loop, L_setnz_and_fail, L_fail; + + // Top of search loop + bind(L_loop); + // Notes: + // scan_temp starts at the array elements + // count_temp is 1+size + subs(count_temp, count_temp, 1); + if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) { + // direct jump to L_failure if failed and no cleanup needed + b(*L_failure, eq); // not found and + } else { + b(L_fail, eq); // not found in the array + } + + // Load next super to check + // In the array of super classes elements are pointer sized. + int element_size = wordSize; + ldr(cmp_temp, Address(scan_temp, element_size, post_indexed)); + + // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list + subs(cmp_temp, cmp_temp, search_key); + + // A miss means we are NOT a subtype and need to keep looping + b(L_loop, ne); + + // Falling out the bottom means we found a hit; we ARE a subtype + + // Note: temp_reg/cmp_temp is already 0 and flag Z is set + + // Success. Cache the super we found and proceed in triumph. + str(super_klass, Address(sub_klass, sc_offset)); + + if (saved_reg != noreg) { + // Return success + pop(saved_reg); + } + + b(*L_success); + + bind(L_fail); + // Note1: check "b(*L_failure, eq)" above if adding extra instructions here + if (set_cond_codes) { + movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed + } + if (saved_reg != noreg) { + pop(saved_reg); + } + if (L_failure != &L_fallthrough) { + b(*L_failure); + } + + bind(L_fallthrough); +#endif +} + +// Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. +Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) { + assert_different_registers(params_base, params_count); + add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize)); + return Address(tmp, -Interpreter::stackElementSize); +} + + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) { + nop(); + } +} + +int MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + bool save_last_java_pc, + Register tmp) { + int pc_offset; + if (last_java_fp != noreg) { + // optional + str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset())); + _fp_saved = true; + } else { + _fp_saved = false; + } + if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM +#ifdef AARCH64 + pc_offset = mov_pc_to(tmp); + str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); +#else + str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); + pc_offset = offset() + VM_Version::stored_pc_adjustment(); +#endif + _pc_saved = true; + } else { + _pc_saved = false; + pc_offset = -1; + } + // According to comment in javaFrameAnchorm SP must be saved last, so that other + // entries are valid when SP is set. + + // However, this is probably not a strong constrainst since for instance PC is + // sometimes read from the stack at SP... but is pushed later (by the call). Hence, + // we now write the fields in the expected order but we have not added a StoreStore + // barrier. + + // XXX: if the ordering is really important, PC should always be saved (without forgetting + // to update oop_map offsets) and a StoreStore barrier might be needed. + + if (last_java_sp == noreg) { + last_java_sp = SP; // always saved + } +#ifdef AARCH64 + if (last_java_sp == SP) { + mov(tmp, SP); + str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); + } else { + str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); + } +#else + str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); +#endif + + return pc_offset; // for oopmaps +} + +void MacroAssembler::reset_last_Java_frame(Register tmp) { + const Register Rzero = zero_register(tmp); + str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset())); + if (_fp_saved) { + str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset())); + } + if (_pc_saved) { + str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset())); + } +} + + +// Implementation of call_VM versions + +void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) { + assert(number_of_arguments >= 0, "cannot have negative number of arguments"); + assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); + +#ifndef AARCH64 + // Safer to save R9 here since callers may have been written + // assuming R9 survives. This is suboptimal but is not worth + // optimizing for the few platforms where R9 is scratched. + push(RegisterSet(R4) | R9ifScratched); + mov(R4, SP); + bic(SP, SP, StackAlignmentInBytes - 1); +#endif // AARCH64 + call(entry_point, relocInfo::runtime_call_type); +#ifndef AARCH64 + mov(SP, R4); + pop(RegisterSet(R4) | R9ifScratched); +#endif // AARCH64 +} + + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + assert(number_of_arguments >= 0, "cannot have negative number of arguments"); + assert(number_of_arguments <= 3, "cannot have more than 3 arguments"); + + const Register tmp = Rtemp; + assert_different_registers(oop_result, tmp); + + set_last_Java_frame(SP, FP, true, tmp); + +#ifdef ASSERT + AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); +#endif // ASSERT + +#ifndef AARCH64 +#if R9_IS_SCRATCHED + // Safer to save R9 here since callers may have been written + // assuming R9 survives. This is suboptimal but is not worth + // optimizing for the few platforms where R9 is scratched. + + // Note: cannot save R9 above the saved SP (some calls expect for + // instance the Java stack top at the saved SP) + // => once saved (with set_last_Java_frame), decrease SP before rounding to + // ensure the slot at SP will be free for R9). + sub(SP, SP, 4); + bic(SP, SP, StackAlignmentInBytes - 1); + str(R9, Address(SP, 0)); +#else + bic(SP, SP, StackAlignmentInBytes - 1); +#endif // R9_IS_SCRATCHED +#endif + + mov(R0, Rthread); + call(entry_point, relocInfo::runtime_call_type); + +#ifndef AARCH64 +#if R9_IS_SCRATCHED + ldr(R9, Address(SP, 0)); +#endif + ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); +#endif + + reset_last_Java_frame(tmp); + + // C++ interp handles this in the interpreter + check_and_handle_popframe(); + check_and_handle_earlyret(); + + if (check_exceptions) { + // check for pending exceptions + ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); +#ifdef AARCH64 + Label L; + cbz(tmp, L); + mov_pc_to(Rexception_pc); + b(StubRoutines::forward_exception_entry()); + bind(L); +#else + cmp(tmp, 0); + mov(Rexception_pc, PC, ne); + b(StubRoutines::forward_exception_entry(), ne); +#endif // AARCH64 + } + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + get_vm_result(oop_result, tmp); + } +} + +void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { + call_VM_helper(oop_result, entry_point, 0, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { + assert (arg_1 == R1, "fixed register for arg_1"); + call_VM_helper(oop_result, entry_point, 1, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { + assert (arg_1 == R1, "fixed register for arg_1"); + assert (arg_2 == R2, "fixed register for arg_2"); + call_VM_helper(oop_result, entry_point, 2, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { + assert (arg_1 == R1, "fixed register for arg_1"); + assert (arg_2 == R2, "fixed register for arg_2"); + assert (arg_3 == R3, "fixed register for arg_3"); + call_VM_helper(oop_result, entry_point, 3, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { + // Not used on ARM + Unimplemented(); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { + // Not used on ARM + Unimplemented(); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { +// Not used on ARM + Unimplemented(); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { + // Not used on ARM + Unimplemented(); +} + +// Raw call, without saving/restoring registers, exception handling, etc. +// Mainly used from various stubs. +void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) { + const Register tmp = Rtemp; // Rtemp free since scratched by call + set_last_Java_frame(SP, FP, true, tmp); +#if R9_IS_SCRATCHED + if (save_R9_if_scratched) { + // Note: Saving also R10 for alignment. + push(RegisterSet(R9, R10)); + } +#endif + mov(R0, Rthread); + call(entry_point, relocInfo::runtime_call_type); +#if R9_IS_SCRATCHED + if (save_R9_if_scratched) { + pop(RegisterSet(R9, R10)); + } +#endif + reset_last_Java_frame(tmp); +} + +void MacroAssembler::call_VM_leaf(address entry_point) { + call_VM_leaf_helper(entry_point, 0); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { + assert (arg_1 == R0, "fixed register for arg_1"); + call_VM_leaf_helper(entry_point, 1); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { + assert (arg_1 == R0, "fixed register for arg_1"); + assert (arg_2 == R1, "fixed register for arg_2"); + call_VM_leaf_helper(entry_point, 2); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { + assert (arg_1 == R0, "fixed register for arg_1"); + assert (arg_2 == R1, "fixed register for arg_2"); + assert (arg_3 == R2, "fixed register for arg_3"); + call_VM_leaf_helper(entry_point, 3); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) { + assert (arg_1 == R0, "fixed register for arg_1"); + assert (arg_2 == R1, "fixed register for arg_2"); + assert (arg_3 == R2, "fixed register for arg_3"); + assert (arg_4 == R3, "fixed register for arg_4"); + call_VM_leaf_helper(entry_point, 4); +} + +void MacroAssembler::get_vm_result(Register oop_result, Register tmp) { + assert_different_registers(oop_result, tmp); + ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset())); + str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset())); + verify_oop(oop_result); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) { + assert_different_registers(metadata_result, tmp); + ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset())); + str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset())); +} + +void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) { + if (arg2.is_register()) { + add(dst, arg1, arg2.as_register()); + } else { + add(dst, arg1, arg2.as_constant()); + } +} + +void MacroAssembler::add_slow(Register rd, Register rn, int c) { +#ifdef AARCH64 + if (c == 0) { + if (rd != rn) { + mov(rd, rn); + } + return; + } + if (c < 0) { + sub_slow(rd, rn, -c); + return; + } + if (c > right_n_bits(24)) { + guarantee(rd != rn, "no large add_slow with only one register"); + mov_slow(rd, c); + add(rd, rn, rd); + } else { + int lo = c & right_n_bits(12); + int hi = (c >> 12) & right_n_bits(12); + if (lo != 0) { + add(rd, rn, lo, lsl0); + } + if (hi != 0) { + add(rd, (lo == 0) ? rn : rd, hi, lsl12); + } + } +#else + // This function is used in compiler for handling large frame offsets + if ((c < 0) && (((-c) & ~0x3fc) == 0)) { + return sub(rd, rn, (-c)); + } + int low = c & 0x3fc; + if (low != 0) { + add(rd, rn, low); + rn = rd; + } + if (c & ~0x3fc) { + assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c); + add(rd, rn, c & ~0x3fc); + } else if (rd != rn) { + assert(c == 0, ""); + mov(rd, rn); // need to generate at least one move! + } +#endif // AARCH64 +} + +void MacroAssembler::sub_slow(Register rd, Register rn, int c) { +#ifdef AARCH64 + if (c <= 0) { + add_slow(rd, rn, -c); + return; + } + if (c > right_n_bits(24)) { + guarantee(rd != rn, "no large sub_slow with only one register"); + mov_slow(rd, c); + sub(rd, rn, rd); + } else { + int lo = c & right_n_bits(12); + int hi = (c >> 12) & right_n_bits(12); + if (lo != 0) { + sub(rd, rn, lo, lsl0); + } + if (hi != 0) { + sub(rd, (lo == 0) ? rn : rd, hi, lsl12); + } + } +#else + // This function is used in compiler for handling large frame offsets + if ((c < 0) && (((-c) & ~0x3fc) == 0)) { + return add(rd, rn, (-c)); + } + int low = c & 0x3fc; + if (low != 0) { + sub(rd, rn, low); + rn = rd; + } + if (c & ~0x3fc) { + assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c); + sub(rd, rn, c & ~0x3fc); + } else if (rd != rn) { + assert(c == 0, ""); + mov(rd, rn); // need to generate at least one move! + } +#endif // AARCH64 +} + +void MacroAssembler::mov_slow(Register rd, address addr) { + // do *not* call the non relocated mov_related_address + mov_slow(rd, (intptr_t)addr); +} + +void MacroAssembler::mov_slow(Register rd, const char *str) { + mov_slow(rd, (intptr_t)str); +} + +#ifdef AARCH64 + +// Common code for mov_slow and instr_count_for_mov_slow. +// Returns number of instructions of mov_slow pattern, +// generating it if non-null MacroAssembler is given. +int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { + // This code pattern is matched in NativeIntruction::is_mov_slow. + // Update it at modifications. + + const intx mask = right_n_bits(16); + // 1 movz instruction + for (int base_shift = 0; base_shift < 64; base_shift += 16) { + if ((c & ~(mask << base_shift)) == 0) { + if (masm != NULL) { + masm->movz(rd, ((uintx)c) >> base_shift, base_shift); + } + return 1; + } + } + // 1 movn instruction + for (int base_shift = 0; base_shift < 64; base_shift += 16) { + if (((~c) & ~(mask << base_shift)) == 0) { + if (masm != NULL) { + masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); + } + return 1; + } + } + // 1 orr instruction + { + LogicalImmediate imm(c, false); + if (imm.is_encoded()) { + if (masm != NULL) { + masm->orr(rd, ZR, imm); + } + return 1; + } + } + // 1 movz/movn + up to 3 movk instructions + int zeroes = 0; + int ones = 0; + for (int base_shift = 0; base_shift < 64; base_shift += 16) { + int part = (c >> base_shift) & mask; + if (part == 0) { + ++zeroes; + } else if (part == mask) { + ++ones; + } + } + int def_bits = 0; + if (ones > zeroes) { + def_bits = mask; + } + int inst_count = 0; + for (int base_shift = 0; base_shift < 64; base_shift += 16) { + int part = (c >> base_shift) & mask; + if (part != def_bits) { + if (masm != NULL) { + if (inst_count > 0) { + masm->movk(rd, part, base_shift); + } else { + if (def_bits == 0) { + masm->movz(rd, part, base_shift); + } else { + masm->movn(rd, ~part & mask, base_shift); + } + } + } + inst_count++; + } + } + assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); + return inst_count; +} + +void MacroAssembler::mov_slow(Register rd, intptr_t c) { +#ifdef ASSERT + int off = offset(); +#endif + (void) mov_slow_helper(rd, c, this); + assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); +} + +// Counts instructions generated by mov_slow(rd, c). +int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { + return mov_slow_helper(noreg, c, NULL); +} + +int MacroAssembler::instr_count_for_mov_slow(address c) { + return mov_slow_helper(noreg, (intptr_t)c, NULL); +} + +#else + +void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { + if (AsmOperand::is_rotated_imm(c)) { + mov(rd, c, cond); + } else if (AsmOperand::is_rotated_imm(~c)) { + mvn(rd, ~c, cond); + } else if (VM_Version::supports_movw()) { + movw(rd, c & 0xffff, cond); + if ((unsigned int)c >> 16) { + movt(rd, (unsigned int)c >> 16, cond); + } + } else { + // Find first non-zero bit + int shift = 0; + while ((c & (3 << shift)) == 0) { + shift += 2; + } + // Put the least significant part of the constant + int mask = 0xff << shift; + mov(rd, c & mask, cond); + // Add up to 3 other parts of the constant; + // each of them can be represented as rotated_imm + if (c & (mask << 8)) { + orr(rd, rd, c & (mask << 8), cond); + } + if (c & (mask << 16)) { + orr(rd, rd, c & (mask << 16), cond); + } + if (c & (mask << 24)) { + orr(rd, rd, c & (mask << 24), cond); + } + } +} + +#endif // AARCH64 + +void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, +#ifdef AARCH64 + bool patchable +#else + AsmCondition cond +#endif + ) { + + if (o == NULL) { +#ifdef AARCH64 + if (patchable) { + nop(); + } + mov(rd, ZR); +#else + mov(rd, 0, cond); +#endif + return; + } + + if (oop_index == 0) { + oop_index = oop_recorder()->allocate_oop_index(o); + } + relocate(oop_Relocation::spec(oop_index)); + +#ifdef AARCH64 + if (patchable) { + nop(); + } + ldr(rd, pc()); +#else + if (VM_Version::supports_movw()) { + movw(rd, 0, cond); + movt(rd, 0, cond); + } else { + ldr(rd, Address(PC), cond); + // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). + nop(); + } +#endif +} + +void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { + if (o == NULL) { +#ifdef AARCH64 + if (patchable) { + nop(); + } +#endif + mov(rd, 0); + return; + } + + if (metadata_index == 0) { + metadata_index = oop_recorder()->allocate_metadata_index(o); + } + relocate(metadata_Relocation::spec(metadata_index)); + +#ifdef AARCH64 + if (patchable) { + nop(); + } +#ifdef COMPILER2 + if (!patchable && VM_Version::prefer_moves_over_load_literal()) { + mov_slow(rd, (address)o); + return; + } +#endif + ldr(rd, pc()); +#else + if (VM_Version::supports_movw()) { + movw(rd, ((int)o) & 0xffff); + movt(rd, (unsigned int)o >> 16); + } else { + ldr(rd, Address(PC)); + // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). + nop(); + } +#endif // AARCH64 +} + +void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { + Label skip_constant; + union { + jfloat f; + jint i; + } accessor; + accessor.f = c; + +#ifdef AARCH64 + // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow + Label L; + ldr_s(fd, target(L)); + b(skip_constant); + bind(L); + emit_int32(accessor.i); + bind(skip_constant); +#else + flds(fd, Address(PC), cond); + b(skip_constant); + emit_int32(accessor.i); + bind(skip_constant); +#endif // AARCH64 +} + +void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { + Label skip_constant; + union { + jdouble d; + jint i[2]; + } accessor; + accessor.d = c; + +#ifdef AARCH64 + // TODO-AARCH64 - try to optimize loading of double constants with fmov + Label L; + ldr_d(fd, target(L)); + b(skip_constant); + align(wordSize); + bind(L); + emit_int32(accessor.i[0]); + emit_int32(accessor.i[1]); + bind(skip_constant); +#else + fldd(fd, Address(PC), cond); + b(skip_constant); + emit_int32(accessor.i[0]); + emit_int32(accessor.i[1]); + bind(skip_constant); +#endif // AARCH64 +} + +void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { + intptr_t addr = (intptr_t) address_of_global; +#ifdef AARCH64 + assert((addr & 0x3) == 0, "address should be aligned"); + + // FIXME: TODO + if (false && page_reachable_from_cache(address_of_global)) { + assert(false,"TODO: relocate"); + //relocate(); + adrp(reg, address_of_global); + ldrsw(reg, Address(reg, addr & 0xfff)); + } else { + mov_slow(reg, addr & ~0x3fff); + ldrsw(reg, Address(reg, addr & 0x3fff)); + } +#else + mov_slow(reg, addr & ~0xfff); + ldr(reg, Address(reg, addr & 0xfff)); +#endif +} + +void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { +#ifdef AARCH64 + intptr_t addr = (intptr_t) address_of_global; + assert ((addr & 0x7) == 0, "address should be aligned"); + mov_slow(reg, addr & ~0x7fff); + ldr(reg, Address(reg, addr & 0x7fff)); +#else + ldr_global_s32(reg, address_of_global); +#endif +} + +void MacroAssembler::ldrb_global(Register reg, address address_of_global) { + intptr_t addr = (intptr_t) address_of_global; + mov_slow(reg, addr & ~0xfff); + ldrb(reg, Address(reg, addr & 0xfff)); +} + +void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { +#ifdef AARCH64 + switch (bits) { + case 8: uxtb(rd, rn); break; + case 16: uxth(rd, rn); break; + case 32: mov_w(rd, rn); break; + default: ShouldNotReachHere(); + } +#else + if (bits <= 8) { + andr(rd, rn, (1 << bits) - 1); + } else if (bits >= 24) { + bic(rd, rn, -1 << bits); + } else { + mov(rd, AsmOperand(rn, lsl, 32 - bits)); + mov(rd, AsmOperand(rd, lsr, 32 - bits)); + } +#endif +} + +void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { +#ifdef AARCH64 + switch (bits) { + case 8: sxtb(rd, rn); break; + case 16: sxth(rd, rn); break; + case 32: sxtw(rd, rn); break; + default: ShouldNotReachHere(); + } +#else + mov(rd, AsmOperand(rn, lsl, 32 - bits)); + mov(rd, AsmOperand(rd, asr, 32 - bits)); +#endif +} + +#ifndef AARCH64 + +void MacroAssembler::long_move(Register rd_lo, Register rd_hi, + Register rn_lo, Register rn_hi, + AsmCondition cond) { + if (rd_lo != rn_hi) { + if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } + if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } + } else if (rd_hi != rn_lo) { + if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } + if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } + } else { + eor(rd_lo, rd_hi, rd_lo, cond); + eor(rd_hi, rd_lo, rd_hi, cond); + eor(rd_lo, rd_hi, rd_lo, cond); + } +} + +void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, + Register rn_lo, Register rn_hi, + AsmShift shift, Register count) { + Register tmp; + if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) { + tmp = rd_lo; + } else { + tmp = rd_hi; + } + assert_different_registers(tmp, count, rn_lo, rn_hi); + + subs(tmp, count, 32); + if (shift == lsl) { + assert_different_registers(rd_hi, rn_lo); + assert_different_registers(count, rd_hi); + mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl); + rsb(tmp, count, 32, mi); + if (rd_hi == rn_hi) { + mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi); + orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); + } else { + mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); + orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi); + } + mov(rd_lo, AsmOperand(rn_lo, shift, count)); + } else { + assert_different_registers(rd_lo, rn_hi); + assert_different_registers(rd_lo, count); + mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl); + rsb(tmp, count, 32, mi); + if (rd_lo == rn_lo) { + mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi); + orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); + } else { + mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); + orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi); + } + mov(rd_hi, AsmOperand(rn_hi, shift, count)); + } +} + +void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, + Register rn_lo, Register rn_hi, + AsmShift shift, int count) { + assert(count != 0 && (count & ~63) == 0, "must be"); + + if (shift == lsl) { + assert_different_registers(rd_hi, rn_lo); + if (count >= 32) { + mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32)); + mov(rd_lo, 0); + } else { + mov(rd_hi, AsmOperand(rn_hi, lsl, count)); + orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count)); + mov(rd_lo, AsmOperand(rn_lo, lsl, count)); + } + } else { + assert_different_registers(rd_lo, rn_hi); + if (count >= 32) { + if (count == 32) { + mov(rd_lo, rn_hi); + } else { + mov(rd_lo, AsmOperand(rn_hi, shift, count - 32)); + } + if (shift == asr) { + mov(rd_hi, AsmOperand(rn_hi, asr, 0)); + } else { + mov(rd_hi, 0); + } + } else { + mov(rd_lo, AsmOperand(rn_lo, lsr, count)); + orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count)); + mov(rd_hi, AsmOperand(rn_hi, shift, count)); + } + } +} +#endif // !AARCH64 + +void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { + // This code pattern is matched in NativeIntruction::skip_verify_oop. + // Update it at modifications. + if (!VerifyOops) return; + + char buffer[64]; +#ifdef COMPILER1 + if (CommentedAssembly) { + snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); + block_comment(buffer); + } +#endif + const char* msg_buffer = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("%s at offset %d (%s:%d)", s, offset(), file, line); + msg_buffer = code_string(ss.as_string()); + } + + save_all_registers(); + + if (reg != R2) { + mov(R2, reg); // oop to verify + } + mov(R1, SP); // register save area + + Label done; + InlinedString Lmsg(msg_buffer); + ldr_literal(R0, Lmsg); // message + + // call indirectly to solve generation ordering problem + ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); + call(Rtemp); + + restore_all_registers(); + + b(done); +#ifdef COMPILER2 + int off = offset(); +#endif + bind_literal(Lmsg); +#ifdef COMPILER2 + if (offset() - off == 1 * wordSize) { + // no padding, so insert nop for worst-case sizing + nop(); + } +#endif + bind(done); +} + +void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { + if (!VerifyOops) return; + + const char* msg_buffer = NULL; + { + ResourceMark rm; + stringStream ss; + if ((addr.base() == SP) && (addr.index()==noreg)) { + ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s); + } else { + ss.print("verify_oop_addr: %s", s); + } + ss.print(" (%s:%d)", file, line); + msg_buffer = code_string(ss.as_string()); + } + + int push_size = save_all_registers(); + + if (addr.base() == SP) { + // computes an addr that takes into account the push + if (addr.index() != noreg) { + Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index + add(new_base, SP, push_size); + addr = addr.rebase(new_base); + } else { + addr = addr.plus_disp(push_size); + } + } + + ldr(R2, addr); // oop to verify + mov(R1, SP); // register save area + + Label done; + InlinedString Lmsg(msg_buffer); + ldr_literal(R0, Lmsg); // message + + // call indirectly to solve generation ordering problem + ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); + call(Rtemp); + + restore_all_registers(); + + b(done); + bind_literal(Lmsg); + bind(done); +} + +void MacroAssembler::null_check(Register reg, Register tmp, int offset) { + if (needs_explicit_null_check(offset)) { +#ifdef AARCH64 + ldr(ZR, Address(reg)); +#else + assert_different_registers(reg, tmp); + if (tmp == noreg) { + tmp = Rtemp; + assert((! Thread::current()->is_Compiler_thread()) || + (! (ciEnv::current()->task() == NULL)) || + (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), + "Rtemp not available in C2"); // explicit tmp register required + // XXX: could we mark the code buffer as not compatible with C2 ? + } + ldr(tmp, Address(reg)); +#endif + } +} + +// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. +void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, + RegisterOrConstant size_expression, Label& slow_case) { + if (!Universe::heap()->supports_inline_contig_alloc()) { + b(slow_case); + return; + } + + CollectedHeap* ch = Universe::heap(); + + const Register top_addr = tmp1; + const Register heap_end = tmp2; + + if (size_expression.is_register()) { + assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); + } else { + assert_different_registers(obj, obj_end, top_addr, heap_end); + } + + bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance + if (load_const) { + mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); + } else { + ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); + } + // Calculate new heap_top by adding the size of the object + Label retry; + bind(retry); + +#ifdef AARCH64 + ldxr(obj, top_addr); +#else + ldr(obj, Address(top_addr)); +#endif // AARCH64 + + ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); + add_rc(obj_end, obj, size_expression); + // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. + cmp(obj_end, obj); + b(slow_case, lo); + // Update heap_top if allocation succeeded + cmp(obj_end, heap_end); + b(slow_case, hi); + +#ifdef AARCH64 + stxr(heap_end/*scratched*/, obj_end, top_addr); + cbnz_w(heap_end, retry); +#else + atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); + b(retry, ne); +#endif // AARCH64 +} + +// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. +void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, + RegisterOrConstant size_expression, Label& slow_case) { + const Register tlab_end = tmp1; + assert_different_registers(obj, obj_end, tlab_end); + + ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); + ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); + add_rc(obj_end, obj, size_expression); + cmp(obj_end, tlab_end); + b(slow_case, hi); + str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); +} + +void MacroAssembler::tlab_refill(Register top, Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + Label& try_eden, Label& slow_case) { + if (!Universe::heap()->supports_inline_contig_alloc()) { + b(slow_case); + return; + } + + InlinedAddress intArrayKlass_addr((address)Universe::intArrayKlassObj_addr()); + Label discard_tlab, do_refill; + ldr(top, Address(Rthread, JavaThread::tlab_top_offset())); + ldr(tmp1, Address(Rthread, JavaThread::tlab_end_offset())); + ldr(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset())); + + // Calculate amount of free space + sub(tmp1, tmp1, top); + // Retain tlab and allocate in shared space + // if the amount of free space in tlab is too large to discard + cmp(tmp2, AsmOperand(tmp1, lsr, LogHeapWordSize)); + b(discard_tlab, ge); + + // Increment waste limit to prevent getting stuck on this slow path + mov_slow(tmp3, ThreadLocalAllocBuffer::refill_waste_limit_increment()); + add(tmp2, tmp2, tmp3); + str(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset())); + if (TLABStats) { + ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset())); + add_32(tmp2, tmp2, 1); + str_32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset())); + } + b(try_eden); + bind_literal(intArrayKlass_addr); + + bind(discard_tlab); + if (TLABStats) { + ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset())); + ldr_u32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset())); + add_32(tmp2, tmp2, 1); + add_32(tmp3, tmp3, AsmOperand(tmp1, lsr, LogHeapWordSize)); + str_32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset())); + str_32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset())); + } + // If tlab is currently allocated (top or end != null) + // then fill [top, end + alignment_reserve) with array object + cbz(top, do_refill); + + // Set up the mark word + mov_slow(tmp2, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); + str(tmp2, Address(top, oopDesc::mark_offset_in_bytes())); + // Set klass to intArrayKlass and the length to the remaining space + ldr_literal(tmp2, intArrayKlass_addr); + add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes() - + typeArrayOopDesc::header_size(T_INT) * HeapWordSize); + Register klass = tmp2; + ldr(klass, Address(tmp2)); + logical_shift_right(tmp1, tmp1, LogBytesPerInt); // divide by sizeof(jint) + str_32(tmp1, Address(top, arrayOopDesc::length_offset_in_bytes())); + store_klass(klass, top); // blows klass: + klass = noreg; + + ldr(tmp1, Address(Rthread, JavaThread::tlab_start_offset())); + sub(tmp1, top, tmp1); // size of tlab's allocated portion + incr_allocated_bytes(tmp1, tmp2); + + bind(do_refill); + // Refill the tlab with an eden allocation + ldr(tmp1, Address(Rthread, JavaThread::tlab_size_offset())); + logical_shift_left(tmp4, tmp1, LogHeapWordSize); + eden_allocate(top, tmp1, tmp2, tmp3, tmp4, slow_case); + str(top, Address(Rthread, JavaThread::tlab_start_offset())); + str(top, Address(Rthread, JavaThread::tlab_top_offset())); + +#ifdef ASSERT + // Verify that tmp1 contains tlab_end + ldr(tmp2, Address(Rthread, JavaThread::tlab_size_offset())); + add(tmp2, top, AsmOperand(tmp2, lsl, LogHeapWordSize)); + cmp(tmp1, tmp2); + breakpoint(ne); +#endif + + sub(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); + str(tmp1, Address(Rthread, JavaThread::tlab_end_offset())); + + if (ZeroTLAB) { + // clobbers start and tmp + // top must be preserved! + add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); + ldr(tmp2, Address(Rthread, JavaThread::tlab_start_offset())); + zero_memory(tmp2, tmp1, tmp3); + } +} + +// Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. +void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { + Label loop; + const Register ptr = start; + +#ifdef AARCH64 + // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x + const Register size = tmp; + Label remaining, done; + + sub(size, end, start); + +#ifdef ASSERT + { Label L; + tst(size, wordSize - 1); + b(L, eq); + stop("size is not a multiple of wordSize"); + bind(L); + } +#endif // ASSERT + + subs(size, size, wordSize); + b(remaining, le); + + // Zero by 2 words per iteration. + bind(loop); + subs(size, size, 2*wordSize); + stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); + b(loop, gt); + + bind(remaining); + b(done, ne); + str(ZR, Address(ptr)); + bind(done); +#else + mov(tmp, 0); + bind(loop); + cmp(ptr, end); + str(tmp, Address(ptr, wordSize, post_indexed), lo); + b(loop, lo); +#endif // AARCH64 +} + +void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { +#ifdef AARCH64 + ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); + add_rc(tmp, tmp, size_in_bytes); + str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); +#else + // Bump total bytes allocated by this thread + Label done; + + ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); + adds(tmp, tmp, size_in_bytes); + str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc); + b(done, cc); + + // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) + // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by + // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. + Register low, high; + // Select ether R0/R1 or R2/R3 + + if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { + low = R2; + high = R3; + } else { + low = R0; + high = R1; + } + push(RegisterSet(low, high)); + + ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); + adds(low, low, size_in_bytes); + adc(high, high, 0); + strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); + + pop(RegisterSet(low, high)); + + bind(done); +#endif // AARCH64 +} + +void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { + // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM + if (UseStackBanging) { + const int page_size = os::vm_page_size(); + + sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); + strb(R0, Address(tmp)); +#ifdef AARCH64 + for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { + sub(tmp, tmp, page_size); + strb(R0, Address(tmp)); + } +#else + for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { + strb(R0, Address(tmp, -0xff0, pre_indexed)); + } +#endif // AARCH64 + } +} + +void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) { + if (UseStackBanging) { + Label loop; + + mov(tmp, SP); + add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); +#ifdef AARCH64 + sub(tmp, tmp, Rsize); + bind(loop); + subs(Rsize, Rsize, os::vm_page_size()); + strb(ZR, Address(tmp, Rsize)); +#else + bind(loop); + subs(Rsize, Rsize, 0xff0); + strb(R0, Address(tmp, -0xff0, pre_indexed)); +#endif // AARCH64 + b(loop, hi); + } +} + +void MacroAssembler::stop(const char* msg) { + // This code pattern is matched in NativeIntruction::is_stop. + // Update it at modifications. +#ifdef COMPILER1 + if (CommentedAssembly) { + block_comment("stop"); + } +#endif + + InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug)); + InlinedString Lmsg(msg); + + // save all registers for further inspection + save_all_registers(); + + ldr_literal(R0, Lmsg); // message + mov(R1, SP); // register save area + +#ifdef AARCH64 + ldr_literal(Rtemp, Ldebug); + br(Rtemp); +#else + ldr_literal(PC, Ldebug); // call MacroAssembler::debug +#endif // AARCH64 + +#if defined(COMPILER2) && defined(AARCH64) + int off = offset(); +#endif + bind_literal(Lmsg); + bind_literal(Ldebug); +#if defined(COMPILER2) && defined(AARCH64) + if (offset() - off == 2 * wordSize) { + // no padding, so insert nop for worst-case sizing + nop(); + } +#endif +} + +void MacroAssembler::warn(const char* msg) { +#ifdef COMPILER1 + if (CommentedAssembly) { + block_comment("warn"); + } +#endif + + InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning)); + InlinedString Lmsg(msg); + Label done; + + int push_size = save_caller_save_registers(); + +#ifdef AARCH64 + // TODO-AARCH64 - get rid of extra debug parameters + mov(R1, LR); + mov(R2, FP); + add(R3, SP, push_size); +#endif + + ldr_literal(R0, Lmsg); // message + ldr_literal(LR, Lwarn); // call warning + + call(LR); + + restore_caller_save_registers(); + + b(done); + bind_literal(Lmsg); + bind_literal(Lwarn); + bind(done); +} + + +int MacroAssembler::save_all_registers() { + // This code pattern is matched in NativeIntruction::is_save_all_registers. + // Update it at modifications. +#ifdef AARCH64 + const Register tmp = Rtemp; + raw_push(R30, ZR); + for (int i = 28; i >= 0; i -= 2) { + raw_push(as_Register(i), as_Register(i+1)); + } + mov_pc_to(tmp); + str(tmp, Address(SP, 31*wordSize)); + ldr(tmp, Address(SP, tmp->encoding()*wordSize)); + return 32*wordSize; +#else + push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); + return 15*wordSize; +#endif // AARCH64 +} + +void MacroAssembler::restore_all_registers() { +#ifdef AARCH64 + for (int i = 0; i <= 28; i += 2) { + raw_pop(as_Register(i), as_Register(i+1)); + } + raw_pop(R30, ZR); +#else + pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers + add(SP, SP, wordSize); // discard saved PC +#endif // AARCH64 +} + +int MacroAssembler::save_caller_save_registers() { +#ifdef AARCH64 + for (int i = 0; i <= 16; i += 2) { + raw_push(as_Register(i), as_Register(i+1)); + } + raw_push(R18, LR); + return 20*wordSize; +#else +#if R9_IS_SCRATCHED + // Save also R10 to preserve alignment + push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); + return 8*wordSize; +#else + push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); + return 6*wordSize; +#endif +#endif // AARCH64 +} + +void MacroAssembler::restore_caller_save_registers() { +#ifdef AARCH64 + raw_pop(R18, LR); + for (int i = 16; i >= 0; i -= 2) { + raw_pop(as_Register(i), as_Register(i+1)); + } +#else +#if R9_IS_SCRATCHED + pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); +#else + pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); +#endif +#endif // AARCH64 +} + +void MacroAssembler::debug(const char* msg, const intx* registers) { + // In order to get locks to work, we need to fake a in_VM state + JavaThread* thread = JavaThread::current(); + thread->set_thread_state(_thread_in_vm); + + if (ShowMessageBoxOnError) { + ttyLocker ttyl; + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + BytecodeCounter::print(); + } + if (os::message_box(msg, "Execution stopped, print registers?")) { +#ifdef AARCH64 + // saved registers: R0-R30, PC + const int nregs = 32; +#else + // saved registers: R0-R12, LR, PC + const int nregs = 15; + const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; +#endif // AARCH64 + + for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { + tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); + } + +#ifdef AARCH64 + tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); +#endif // AARCH64 + + // derive original SP value from the address of register save area + tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); + } + BREAKPOINT; + } else { + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); + } + assert(false, "DEBUG MESSAGE: %s", msg); + fatal("%s", msg); // returning from MacroAssembler::debug is not supported +} + +void MacroAssembler::unimplemented(const char* what) { + const char* buf = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("unimplemented: %s", what); + buf = code_string(ss.as_string()); + } + stop(buf); +} + + +// Implementation of FixedSizeCodeBlock + +FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) : +_masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) { +} + +FixedSizeCodeBlock::~FixedSizeCodeBlock() { + if (_enabled) { + address curr_pc = _masm->pc(); + + assert(_start < curr_pc, "invalid current pc"); + guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long"); + + int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs; + for (int i = 0; i < nops_count; i++) { + _masm->nop(); + } + } +} + +#ifdef AARCH64 + +// Serializes memory. +// tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM +void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { + if (!os::is_MP()) return; + + // TODO-AARCH64 investigate dsb vs dmb effects + if (order_constraint == StoreStore) { + dmb(DMB_st); + } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { + dmb(DMB_ld); + } else { + dmb(DMB_all); + } +} + +#else + +// Serializes memory. Potentially blows flags and reg. +// tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) +// preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional. +// load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional. +void MacroAssembler::membar(Membar_mask_bits order_constraint, + Register tmp, + bool preserve_flags, + Register load_tgt) { + if (!os::is_MP()) return; + + if (order_constraint == StoreStore) { + dmb(DMB_st, tmp); + } else if ((order_constraint & StoreLoad) || + (order_constraint & LoadLoad) || + (order_constraint & StoreStore) || + (load_tgt == noreg) || + preserve_flags) { + dmb(DMB_all, tmp); + } else { + // LoadStore: speculative stores reordeing is prohibited + + // By providing an ordered load target register, we avoid an extra memory load reference + Label not_taken; + bind(not_taken); + cmp(load_tgt, load_tgt); + b(not_taken, ne); + } +} + +#endif // AARCH64 + +// If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" +// on failure, so fall-through can only mean success. +// "one_shot" controls whether we loop and retry to mitigate spurious failures. +// This is only needed for C2, which for some reason does not rety, +// while C1/interpreter does. +// TODO: measure if it makes a difference + +void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, + Register base, Register tmp, Label &slow_case, + bool allow_fallthrough_on_failure, bool one_shot) +{ + + bool fallthrough_is_success = false; + + // ARM Litmus Test example does prefetching here. + // TODO: investigate if it helps performance + + // The last store was to the displaced header, so to prevent + // reordering we must issue a StoreStore or Release barrier before + // the CAS store. + +#ifdef AARCH64 + + Register Rscratch = tmp; + Register Roop = base; + Register mark = oldval; + Register Rbox = newval; + Label loop; + + assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); + + // Instead of StoreStore here, we use store-release-exclusive below + + bind(loop); + + ldaxr(tmp, base); // acquire + cmp(tmp, oldval); + b(slow_case, ne); + stlxr(tmp, newval, base); // release + if (one_shot) { + cmp_w(tmp, 0); + } else { + cbnz_w(tmp, loop); + fallthrough_is_success = true; + } + + // MemBarAcquireLock would normally go here, but + // we already do ldaxr+stlxr above, which has + // Sequential Consistency + +#else + membar(MacroAssembler::StoreStore, noreg); + + if (one_shot) { + ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); + cmp(tmp, oldval); + strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); + cmp(tmp, 0, eq); + } else { + atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); + } + + // MemBarAcquireLock barrier + // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, + // but that doesn't prevent a load or store from floating up between + // the load and store in the CAS sequence, so play it safe and + // do a full fence. + membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); +#endif + if (!fallthrough_is_success && !allow_fallthrough_on_failure) { + b(slow_case, ne); + } +} + +void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, + Register base, Register tmp, Label &slow_case, + bool allow_fallthrough_on_failure, bool one_shot) +{ + + bool fallthrough_is_success = false; + + assert_different_registers(oldval,newval,base,tmp); + +#ifdef AARCH64 + Label loop; + + assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); + + bind(loop); + ldxr(tmp, base); + cmp(tmp, oldval); + b(slow_case, ne); + // MemBarReleaseLock barrier + stlxr(tmp, newval, base); + if (one_shot) { + cmp_w(tmp, 0); + } else { + cbnz_w(tmp, loop); + fallthrough_is_success = true; + } +#else + // MemBarReleaseLock barrier + // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, + // but that doesn't prevent a load or store from floating down between + // the load and store in the CAS sequence, so play it safe and + // do a full fence. + membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp); + + if (one_shot) { + ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); + cmp(tmp, oldval); + strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); + cmp(tmp, 0, eq); + } else { + atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); + } +#endif + if (!fallthrough_is_success && !allow_fallthrough_on_failure) { + b(slow_case, ne); + } + + // ExitEnter + // According to JSR-133 Cookbook, this should be StoreLoad, the same + // barrier that follows volatile store. + // TODO: Should be able to remove on armv8 if volatile loads + // use the load-acquire instruction. + membar(StoreLoad, noreg); +} + +#ifndef PRODUCT + +// Preserves flags and all registers. +// On SMP the updated value might not be visible to external observers without a sychronization barrier +void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { + if (counter_addr != NULL) { + InlinedAddress counter_addr_literal((address)counter_addr); + Label done, retry; + if (cond != al) { + b(done, inverse(cond)); + } + +#ifdef AARCH64 + raw_push(R0, R1); + raw_push(R2, ZR); + + ldr_literal(R0, counter_addr_literal); + + bind(retry); + ldxr_w(R1, R0); + add_w(R1, R1, 1); + stxr_w(R2, R1, R0); + cbnz_w(R2, retry); + + raw_pop(R2, ZR); + raw_pop(R0, R1); +#else + push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); + ldr_literal(R0, counter_addr_literal); + + mrs(CPSR, Rtemp); + + bind(retry); + ldr_s32(R1, Address(R0)); + add(R2, R1, 1); + atomic_cas_bool(R1, R2, R0, 0, R3); + b(retry, ne); + + msr(CPSR_fsxc, Rtemp); + + pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); +#endif // AARCH64 + + b(done); + bind_literal(counter_addr_literal); + + bind(done); + } +} + +#endif // !PRODUCT + + +// Building block for CAS cases of biased locking: makes CAS and records statistics. +// The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. +void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, + Register tmp, Label& slow_case, int* counter_addr) { + + cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); +#ifdef ASSERT + breakpoint(ne); // Fallthrough only on success +#endif +#ifndef PRODUCT + if (counter_addr != NULL) { + cond_atomic_inc32(al, counter_addr); + } +#endif // !PRODUCT +} + +int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, + bool swap_reg_contains_mark, + Register tmp2, + Label& done, Label& slow_case, + BiasedLockingCounters* counters) { + // obj_reg must be preserved (at least) if the bias locking fails + // tmp_reg is a temporary register + // swap_reg was used as a temporary but contained a value + // that was used afterwards in some call pathes. Callers + // have been fixed so that swap_reg no longer needs to be + // saved. + // Rtemp in no longer scratched + + assert(UseBiasedLocking, "why call this otherwise?"); + assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); + guarantee(swap_reg!=tmp_reg, "invariant"); + assert(tmp_reg != noreg, "must supply tmp_reg"); + +#ifndef PRODUCT + if (PrintBiasedLockingStatistics && (counters == NULL)) { + counters = BiasedLocking::counters(); + } +#endif + + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + + // The null check applies to the mark loading, if we need to load it. + // If the mark has already been loaded in swap_reg then it has already + // been performed and the offset is irrelevant. + int null_check_offset = offset(); + if (!swap_reg_contains_mark) { + ldr(swap_reg, mark_addr); + } + + // On MP platform loads could return 'stale' values in some cases. + // That is acceptable since either CAS or slow case path is taken in the worst case. + + andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place); + cmp(tmp_reg, markOopDesc::biased_lock_pattern); + + b(cas_label, ne); + + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + load_klass(tmp_reg, obj_reg); + ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); + orr(tmp_reg, tmp_reg, Rthread); + eor(tmp_reg, tmp_reg, swap_reg); + +#ifdef AARCH64 + ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); +#else + bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); +#endif // AARCH64 + +#ifndef PRODUCT + if (counters != NULL) { + cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); + } +#endif // !PRODUCT + + b(done, eq); + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); + b(try_revoke_bias, ne); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place); + b(try_rebias, ne); + + // tmp_reg has the age, epoch and pattern bits cleared + // The remaining (owner) bits are (Thread ^ current_owner) + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + + // Note that we know the owner is not ourself. Hence, success can + // only happen when the owner bits is 0 + +#ifdef AARCH64 + // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has + // cleared bit in the middle (cms bit). So it is loaded with separate instruction. + mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); + andr(swap_reg, swap_reg, tmp2); +#else + // until the assembler can be made smarter, we need to make some assumptions about the values + // so we can optimize this: + assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); + + mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); + mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) +#endif // AARCH64 + + orr(tmp_reg, swap_reg, Rthread); // new mark + + biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, + (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); + + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + + b(done); + + bind(try_rebias); + + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + + // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) + + eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) + + // owner bits 'random'. Set them to Rthread. +#ifdef AARCH64 + mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); + andr(tmp_reg, tmp_reg, tmp2); +#else + mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); + mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); +#endif // AARCH64 + + orr(tmp_reg, tmp_reg, Rthread); // new mark + + biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, + (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); + + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + + b(done); + + bind(try_revoke_bias); + + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + + // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) + + eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) + + // owner bits 'random'. Clear them +#ifdef AARCH64 + mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); + andr(tmp_reg, tmp_reg, tmp2); +#else + mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); + mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); +#endif // AARCH64 + + biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, + (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); + + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + + bind(cas_label); + + return null_check_offset; +} + + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + + andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); + cmp(tmp_reg, markOopDesc::biased_lock_pattern); + b(done, eq); +} + +#ifdef AARCH64 + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { + switch (size_in_bytes) { + case 8: ldr(dst, src); break; + case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; + case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; + case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { + switch (size_in_bytes) { + case 8: str(src, dst); break; + case 4: str_32(src, dst); break; + case 2: strh(src, dst); break; + case 1: strb(src, dst); break; + default: ShouldNotReachHere(); + } +} + +#else + +void MacroAssembler::load_sized_value(Register dst, Address src, + size_t size_in_bytes, bool is_signed, AsmCondition cond) { + switch (size_in_bytes) { + case 4: ldr(dst, src, cond); break; + case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break; + case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break; + default: ShouldNotReachHere(); + } +} + + +void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) { + switch (size_in_bytes) { + case 4: str(src, dst, cond); break; + case 2: strh(src, dst, cond); break; + case 1: strb(src, dst, cond); break; + default: ShouldNotReachHere(); + } +} +#endif // AARCH64 + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in Rklass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register Rklass, + Register Rinterf, + Register Rindex, + Register method_result, + Register temp_reg1, + Register temp_reg2, + Label& L_no_such_interface) { + + assert_different_registers(Rklass, Rinterf, temp_reg1, temp_reg2, Rindex); + + Register Ritable = temp_reg1; + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + const int base = in_bytes(Klass::vtable_start_offset()); + const int scale = exact_log2(vtableEntry::size_in_bytes()); + ldr_s32(temp_reg2, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable + add(Ritable, Rklass, base); + add(Ritable, Ritable, AsmOperand(temp_reg2, lsl, scale)); + + Label entry, search; + + b(entry); + + bind(search); + add(Ritable, Ritable, itableOffsetEntry::size() * HeapWordSize); + + bind(entry); + + // Check that the entry is non-null. A null entry means that the receiver + // class doesn't implement the interface, and wasn't the same as the + // receiver class checked when the interface was resolved. + + ldr(temp_reg2, Address(Ritable, itableOffsetEntry::interface_offset_in_bytes())); + cbz(temp_reg2, L_no_such_interface); + + cmp(Rinterf, temp_reg2); + b(search, ne); + + ldr_s32(temp_reg2, Address(Ritable, itableOffsetEntry::offset_offset_in_bytes())); + add(temp_reg2, temp_reg2, Rklass); // Add offset to Klass* + assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below"); + assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below"); + + ldr(method_result, Address::indexed_ptr(temp_reg2, Rindex)); +} + +#ifdef COMPILER2 +// TODO: 8 bytes at a time? pre-fetch? +// Compare char[] arrays aligned to 4 bytes. +void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, + Register limit, Register result, + Register chr1, Register chr2, Label& Ldone) { + Label Lvector, Lloop; + + // Note: limit contains number of bytes (2*char_elements) != 0. + tst(limit, 0x2); // trailing character ? + b(Lvector, eq); + + // compare the trailing char + sub(limit, limit, sizeof(jchar)); + ldrh(chr1, Address(ary1, limit)); + ldrh(chr2, Address(ary2, limit)); + cmp(chr1, chr2); + mov(result, 0, ne); // not equal + b(Ldone, ne); + + // only one char ? + tst(limit, limit); + mov(result, 1, eq); + b(Ldone, eq); + + // word by word compare, dont't need alignment check + bind(Lvector); + + // Shift ary1 and ary2 to the end of the arrays, negate limit + add(ary1, limit, ary1); + add(ary2, limit, ary2); + neg(limit, limit); + + bind(Lloop); + ldr_u32(chr1, Address(ary1, limit)); + ldr_u32(chr2, Address(ary2, limit)); + cmp_32(chr1, chr2); + mov(result, 0, ne); // not equal + b(Ldone, ne); + adds(limit, limit, 2*sizeof(jchar)); + b(Lloop, ne); + + // Caller should set it: + // mov(result_reg, 1); //equal +} +#endif + +void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) { + mov_slow(tmpreg1, counter_addr); + ldr_s32(tmpreg2, tmpreg1); + add_32(tmpreg2, tmpreg2, 1); + str_32(tmpreg2, tmpreg1); +} + +void MacroAssembler::floating_cmp(Register dst) { +#ifdef AARCH64 + NOT_TESTED(); + cset(dst, gt); // 1 if '>', else 0 + csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 +#else + vmrs(dst, FPSCR); + orr(dst, dst, 0x08000000); + eor(dst, dst, AsmOperand(dst, lsl, 3)); + mov(dst, AsmOperand(dst, asr, 30)); +#endif +} + +void MacroAssembler::restore_default_fp_mode() { +#ifdef AARCH64 + msr(SysReg_FPCR, ZR); +#else +#ifndef __SOFTFP__ + // Round to Near mode, IEEE compatible, masked exceptions + mov(Rtemp, 0); + vmsr(FPSCR, Rtemp); +#endif // !__SOFTFP__ +#endif // AARCH64 +} + +#ifndef AARCH64 +// 24-bit word range == 26-bit byte range +bool check26(int offset) { + // this could be simplified, but it mimics encoding and decoding + // an actual branch insrtuction + int off1 = offset << 6 >> 8; + int encoded = off1 & ((1<<24)-1); + int decoded = encoded << 8 >> 6; + return offset == decoded; +} +#endif // !AARCH64 + +// Perform some slight adjustments so the default 32MB code cache +// is fully reachable. +static inline address first_cache_address() { + return CodeCache::low_bound() + sizeof(HeapBlock::Header); +} +static inline address last_cache_address() { + return CodeCache::high_bound() - Assembler::InstructionSize; +} + +#ifdef AARCH64 +// Can we reach target using ADRP? +bool MacroAssembler::page_reachable_from_cache(address target) { + intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; + intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; + intptr_t addr = (intptr_t)target & ~0xfff; + + intptr_t loffset = addr - cl; + intptr_t hoffset = addr - ch; + return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); +} +#endif + +// Can we reach target using unconditional branch or call from anywhere +// in the code cache (because code can be relocated)? +bool MacroAssembler::_reachable_from_cache(address target) { +#ifdef __thumb__ + if ((1 & (intptr_t)target) != 0) { + // Return false to avoid 'b' if we need switching to THUMB mode. + return false; + } +#endif + + address cl = first_cache_address(); + address ch = last_cache_address(); + + if (ForceUnreachable) { + // Only addresses from CodeCache can be treated as reachable. + if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) { + return false; + } + } + + intptr_t loffset = (intptr_t)target - (intptr_t)cl; + intptr_t hoffset = (intptr_t)target - (intptr_t)ch; + +#ifdef AARCH64 + return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); +#else + return check26(loffset - 8) && check26(hoffset - 8); +#endif +} + +bool MacroAssembler::reachable_from_cache(address target) { + assert(CodeCache::contains(pc()), "not supported"); + return _reachable_from_cache(target); +} + +// Can we reach the entire code cache from anywhere else in the code cache? +bool MacroAssembler::_cache_fully_reachable() { + address cl = first_cache_address(); + address ch = last_cache_address(); + return _reachable_from_cache(cl) && _reachable_from_cache(ch); +} + +bool MacroAssembler::cache_fully_reachable() { + assert(CodeCache::contains(pc()), "not supported"); + return _cache_fully_reachable(); +} + +void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { + assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); + if (reachable_from_cache(target)) { + relocate(rtype); + b(target NOT_AARCH64_ARG(cond)); + return; + } + + // Note: relocate is not needed for the code below, + // encoding targets in absolute format. + if (ignore_non_patchable_relocations()) { + rtype = relocInfo::none; + } + +#ifdef AARCH64 + assert (scratch != noreg, "should be specified"); + InlinedAddress address_literal(target, rtype); + ldr_literal(scratch, address_literal); + br(scratch); + int off = offset(); + bind_literal(address_literal); +#ifdef COMPILER2 + if (offset() - off == wordSize) { + // no padding, so insert nop for worst-case sizing + nop(); + } +#endif +#else + if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { + // Note: this version cannot be (atomically) patched + mov_slow(scratch, (intptr_t)target, cond); + bx(scratch, cond); + } else { + Label skip; + InlinedAddress address_literal(target); + if (cond != al) { + b(skip, inverse(cond)); + } + relocate(rtype); + ldr_literal(PC, address_literal); + bind_literal(address_literal); + bind(skip); + } +#endif // AARCH64 +} + +// Similar to jump except that: +// - near calls are valid only if any destination in the cache is near +// - no movt/movw (not atomically patchable) +void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { + assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); + if (cache_fully_reachable()) { + // Note: this assumes that all possible targets (the initial one + // and the addressed patched to) are all in the code cache. + assert(CodeCache::contains(target), "target might be too far"); + relocate(rtype); + b(target NOT_AARCH64_ARG(cond)); + return; + } + + // Discard the relocation information if not needed for CacheCompiledCode + // since the next encodings are all in absolute format. + if (ignore_non_patchable_relocations()) { + rtype = relocInfo::none; + } + +#ifdef AARCH64 + assert (scratch != noreg, "should be specified"); + InlinedAddress address_literal(target); + relocate(rtype); + ldr_literal(scratch, address_literal); + br(scratch); + int off = offset(); + bind_literal(address_literal); +#ifdef COMPILER2 + if (offset() - off == wordSize) { + // no padding, so insert nop for worst-case sizing + nop(); + } +#endif +#else + { + Label skip; + InlinedAddress address_literal(target); + if (cond != al) { + b(skip, inverse(cond)); + } + relocate(rtype); + ldr_literal(PC, address_literal); + bind_literal(address_literal); + bind(skip); + } +#endif // AARCH64 +} + +void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { + Register scratch = LR; + assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); + if (reachable_from_cache(target)) { + relocate(rspec); + bl(target NOT_AARCH64_ARG(cond)); + return; + } + + // Note: relocate is not needed for the code below, + // encoding targets in absolute format. + if (ignore_non_patchable_relocations()) { + // This assumes the information was needed only for relocating the code. + rspec = RelocationHolder::none; + } + +#ifndef AARCH64 + if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { + // Note: this version cannot be (atomically) patched + mov_slow(scratch, (intptr_t)target, cond); + blx(scratch, cond); + return; + } +#endif + + { + Label ret_addr; +#ifndef AARCH64 + if (cond != al) { + b(ret_addr, inverse(cond)); + } +#endif + + +#ifdef AARCH64 + // TODO-AARCH64: make more optimal implementation + // [ Keep in sync with MacroAssembler::call_size ] + assert(rspec.type() == relocInfo::none, "call reloc not implemented"); + mov_slow(scratch, target); + blr(scratch); +#else + InlinedAddress address_literal(target); + relocate(rspec); + adr(LR, ret_addr); + ldr_literal(PC, address_literal); + + bind_literal(address_literal); + bind(ret_addr); +#endif + } +} + +#if defined(AARCH64) && defined(COMPILER2) +int MacroAssembler::call_size(address target, bool far, bool patchable) { + // FIXME: mov_slow is variable-length + if (!far) return 1; // bl + if (patchable) return 2; // ldr; blr + return instr_count_for_mov_slow((intptr_t)target) + 1; +} +#endif + +int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { + assert(rspec.type() == relocInfo::static_call_type || + rspec.type() == relocInfo::none || + rspec.type() == relocInfo::opt_virtual_call_type, "not supported"); + + // Always generate the relocation information, needed for patching + relocate(rspec); // used by NativeCall::is_call_before() + if (cache_fully_reachable()) { + // Note: this assumes that all possible targets (the initial one + // and the addresses patched to) are all in the code cache. + assert(CodeCache::contains(target), "target might be too far"); + bl(target); + } else { +#if defined(AARCH64) && defined(COMPILER2) + if (c2) { + // return address needs to match call_size(). + // no need to trash Rtemp + int off = offset(); + Label skip_literal; + InlinedAddress address_literal(target); + ldr_literal(LR, address_literal); + blr(LR); + int ret_addr_offset = offset(); + assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); + b(skip_literal); + int off2 = offset(); + bind_literal(address_literal); + if (offset() - off2 == wordSize) { + // no padding, so insert nop for worst-case sizing + nop(); + } + bind(skip_literal); + return ret_addr_offset; + } +#endif + Label ret_addr; + InlinedAddress address_literal(target); +#ifdef AARCH64 + ldr_literal(Rtemp, address_literal); + adr(LR, ret_addr); + br(Rtemp); +#else + adr(LR, ret_addr); + ldr_literal(PC, address_literal); +#endif + bind_literal(address_literal); + bind(ret_addr); + } + return offset(); +} + + +void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + ldr(tmp, Address(method, Method::const_offset())); + ldr(tmp, Address(tmp, ConstMethod::constants_offset())); + ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes())); + ldr(mirror, Address(tmp, mirror_offset)); +} + +/////////////////////////////////////////////////////////////////////////////// + +// Compressed pointers + +#ifdef AARCH64 + +void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { + if (UseCompressedClassPointers) { + ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); + decode_klass_not_null(dst_klass); + } else { + ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); + } +} + +#else + +void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { + ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); +} + +#endif // AARCH64 + +// Blows src_klass. +void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { +#ifdef AARCH64 + if (UseCompressedClassPointers) { + assert(src_klass != dst_oop, "not enough registers"); + encode_klass_not_null(src_klass); + str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); + return; + } +#endif // AARCH64 + str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); +} + +#ifdef AARCH64 + +void MacroAssembler::store_klass_gap(Register dst) { + if (UseCompressedClassPointers) { + str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); + } +} + +#endif // AARCH64 + + +void MacroAssembler::load_heap_oop(Register dst, Address src) { +#ifdef AARCH64 + if (UseCompressedOops) { + ldr_w(dst, src); + decode_heap_oop(dst); + return; + } +#endif // AARCH64 + ldr(dst, src); +} + +// Blows src and flags. +void MacroAssembler::store_heap_oop(Register src, Address dst) { +#ifdef AARCH64 + if (UseCompressedOops) { + assert(!dst.uses(src), "not enough registers"); + encode_heap_oop(src); + str_w(src, dst); + return; + } +#endif // AARCH64 + str(src, dst); +} + +void MacroAssembler::store_heap_oop_null(Register src, Address dst) { +#ifdef AARCH64 + if (UseCompressedOops) { + str_w(src, dst); + return; + } +#endif // AARCH64 + str(src, dst); +} + + +#ifdef AARCH64 + +// Algorithm must match oop.inline.hpp encode_heap_oop. +void MacroAssembler::encode_heap_oop(Register dst, Register src) { + // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. + // Update it at modifications. + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); +#endif + verify_oop(src); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + _lsr(dst, src, Universe::narrow_oop_shift()); + } else if (dst != src) { + mov(dst, src); + } + } else { + tst(src, src); + csel(dst, Rheap_base, src, eq); + sub(dst, dst, Rheap_base); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + _lsr(dst, dst, Universe::narrow_oop_shift()); + } + } +} + +// Same algorithm as oop.inline.hpp decode_heap_oop. +void MacroAssembler::decode_heap_oop(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); +#endif + assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (Universe::narrow_oop_base() != NULL) { + tst(src, src); + add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); + csel(dst, dst, ZR, ne); + } else { + _lsl(dst, src, Universe::narrow_oop_shift()); + } + verify_oop(dst); +} + +#ifdef COMPILER2 +// Algorithm must match oop.inline.hpp encode_heap_oop. +// Must preserve condition codes, or C2 encodeHeapOop_not_null rule +// must be changed. +void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); +#endif + verify_oop(src); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + _lsr(dst, src, Universe::narrow_oop_shift()); + } else if (dst != src) { + mov(dst, src); + } + } else { + sub(dst, src, Rheap_base); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + _lsr(dst, dst, Universe::narrow_oop_shift()); + } + } +} + +// Same algorithm as oops.inline.hpp decode_heap_oop. +// Must preserve condition codes, or C2 decodeHeapOop_not_null rule +// must be changed. +void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); +#endif + assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (Universe::narrow_oop_base() != NULL) { + add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); + } else { + _lsl(dst, src, Universe::narrow_oop_shift()); + } + verify_oop(dst); +} + +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert(UseCompressedClassPointers, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + + // Relocation with special format (see relocInfo_arm.hpp). + relocate(rspec); + narrowKlass encoded_k = Klass::encode_klass(k); + movz(dst, encoded_k & 0xffff, 0); + movk(dst, (encoded_k >> 16) & 0xffff, 16); +} + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { + assert(UseCompressedOops, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + relocate(rspec); + movz(dst, 0xffff, 0); + movk(dst, 0xffff, 16); +} + +#endif // COMPILER2 + +// Must preserve condition codes, or C2 encodeKlass_not_null rule +// must be changed. +void MacroAssembler::encode_klass_not_null(Register r) { + if (Universe::narrow_klass_base() != NULL) { + // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. + assert(r != Rheap_base, "Encoding a klass in Rheap_base"); + mov_slow(Rheap_base, Universe::narrow_klass_base()); + sub(r, r, Rheap_base); + } + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + _lsr(r, r, Universe::narrow_klass_shift()); + } + if (Universe::narrow_klass_base() != NULL) { + reinit_heapbase(); + } +} + +// Must preserve condition codes, or C2 encodeKlass_not_null rule +// must be changed. +void MacroAssembler::encode_klass_not_null(Register dst, Register src) { + if (dst == src) { + encode_klass_not_null(src); + return; + } + if (Universe::narrow_klass_base() != NULL) { + mov_slow(dst, (int64_t)Universe::narrow_klass_base()); + sub(dst, src, dst); + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + _lsr(dst, dst, Universe::narrow_klass_shift()); + } + } else { + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + _lsr(dst, src, Universe::narrow_klass_shift()); + } else { + mov(dst, src); + } + } +} + +// Function instr_count_for_decode_klass_not_null() counts the instructions +// generated by decode_klass_not_null(register r) and reinit_heapbase(), +// when (Universe::heap() != NULL). Hence, if the instructions they +// generate change, then this method needs to be updated. +int MacroAssembler::instr_count_for_decode_klass_not_null() { + assert(UseCompressedClassPointers, "only for compressed klass ptrs"); + assert(Universe::heap() != NULL, "java heap should be initialized"); + if (Universe::narrow_klass_base() != NULL) { + return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow + 1 + // add + instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow + } else { + if (Universe::narrow_klass_shift() != 0) { + return 1; + } + } + return 0; +} + +// Must preserve condition codes, or C2 decodeKlass_not_null rule +// must be changed. +void MacroAssembler::decode_klass_not_null(Register r) { + int off = offset(); + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + assert(Universe::heap() != NULL, "java heap should be initialized"); + assert(r != Rheap_base, "Decoding a klass in Rheap_base"); + // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_klass_base() != NULL) { + // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. + mov_slow(Rheap_base, Universe::narrow_klass_base()); + add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); + reinit_heapbase(); + } else { + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + _lsl(r, r, Universe::narrow_klass_shift()); + } + } + assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); +} + +// Must preserve condition codes, or C2 decodeKlass_not_null rule +// must be changed. +void MacroAssembler::decode_klass_not_null(Register dst, Register src) { + if (src == dst) { + decode_klass_not_null(src); + return; + } + + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + assert(Universe::heap() != NULL, "java heap should be initialized"); + assert(src != Rheap_base, "Decoding a klass in Rheap_base"); + assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_klass_base() != NULL) { + mov_slow(dst, Universe::narrow_klass_base()); + add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); + } else { + _lsl(dst, src, Universe::narrow_klass_shift()); + } +} + + +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops || UseCompressedClassPointers) { + if (Universe::heap() != NULL) { + mov_slow(Rheap_base, Universe::narrow_ptrs_base()); + } else { + ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); + } + } +} + +#ifdef ASSERT +void MacroAssembler::verify_heapbase(const char* msg) { + // This code pattern is matched in NativeIntruction::skip_verify_heapbase. + // Update it at modifications. + assert (UseCompressedOops, "should be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + if (CheckCompressedOops) { + Label ok; + str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); + raw_push(Rtemp, ZR); + mrs(Rtemp, Assembler::SysReg_NZCV); + str(Rtemp, Address(SP, 1 * wordSize)); + mov_slow(Rtemp, Universe::narrow_ptrs_base()); + cmp(Rheap_base, Rtemp); + b(ok, eq); + stop(msg); + bind(ok); + ldr(Rtemp, Address(SP, 1 * wordSize)); + msr(Assembler::SysReg_NZCV, Rtemp); + raw_pop(Rtemp, ZR); + str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); + } +} +#endif // ASSERT + +#endif // AARCH64 + +#ifdef COMPILER2 +void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) +{ + assert(VM_Version::supports_ldrex(), "unsupported, yet?"); + + Register Rmark = Rscratch2; + + assert(Roop != Rscratch, ""); + assert(Roop != Rmark, ""); + assert(Rbox != Rscratch, ""); + assert(Rbox != Rmark, ""); + + Label fast_lock, done; + + if (UseBiasedLocking && !UseOptoBiasInlining) { + Label failed; +#ifdef AARCH64 + biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); +#else + biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); +#endif + bind(failed); + } + + ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); + tst(Rmark, markOopDesc::unlocked_value); + b(fast_lock, ne); + + // Check for recursive lock + // See comments in InterpreterMacroAssembler::lock_object for + // explanations on the fast recursive locking check. +#ifdef AARCH64 + intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); + Assembler::LogicalImmediate imm(mask, false); + mov(Rscratch, SP); + sub(Rscratch, Rmark, Rscratch); + ands(Rscratch, Rscratch, imm); + b(done, ne); // exit with failure + str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero + b(done); + +#else + // -1- test low 2 bits + movs(Rscratch, AsmOperand(Rmark, lsl, 30)); + // -2- test (hdr - SP) if the low two bits are 0 + sub(Rscratch, Rmark, SP, eq); + movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); + // If still 'eq' then recursive locking OK + str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero + b(done); +#endif + + bind(fast_lock); + str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); + + bool allow_fallthrough_on_failure = true; + bool one_shot = true; + cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); + + bind(done); + +} + +void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) +{ + assert(VM_Version::supports_ldrex(), "unsupported, yet?"); + + Register Rmark = Rscratch2; + + assert(Roop != Rscratch, ""); + assert(Roop != Rmark, ""); + assert(Rbox != Rscratch, ""); + assert(Rbox != Rmark, ""); + + Label done; + + if (UseBiasedLocking && !UseOptoBiasInlining) { + biased_locking_exit(Roop, Rscratch, done); + } + + ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); + // If hdr is NULL, we've got recursive locking and there's nothing more to do + cmp(Rmark, 0); + b(done, eq); + + // Restore the object header + bool allow_fallthrough_on_failure = true; + bool one_shot = true; + cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); + + bind(done); + +} +#endif // COMPILER2 + --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/macroAssembler_arm.hpp 2016-12-02 11:21:55.745870426 -0500 @@ -0,0 +1,1390 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_MACROASSEMBLER_ARM_HPP +#define CPU_ARM_VM_MACROASSEMBLER_ARM_HPP + +#include "code/relocInfo.hpp" +#include "code/relocInfo_ext.hpp" + +class BiasedLockingCounters; + +// Introduced AddressLiteral and its subclasses to ease portability from +// x86 and avoid relocation issues +class AddressLiteral VALUE_OBJ_CLASS_SPEC { + RelocationHolder _rspec; + // Typically we use AddressLiterals we want to use their rval + // However in some situations we want the lval (effect address) of the item. + // We provide a special factory for making those lvals. + bool _is_lval; + + address _target; + + private: + static relocInfo::relocType reloc_for_target(address target) { + // Used for ExternalAddress or when the type is not specified + // Sometimes ExternalAddress is used for values which aren't + // exactly addresses, like the card table base. + // external_word_type can't be used for values in the first page + // so just skip the reloc in that case. + return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; + } + + void set_rspec(relocInfo::relocType rtype); + + protected: + // creation + AddressLiteral() + : _is_lval(false), + _target(NULL) + {} + + public: + + AddressLiteral(address target, relocInfo::relocType rtype) { + _is_lval = false; + _target = target; + set_rspec(rtype); + } + + AddressLiteral(address target, RelocationHolder const& rspec) + : _rspec(rspec), + _is_lval(false), + _target(target) + {} + + AddressLiteral(address target) { + _is_lval = false; + _target = target; + set_rspec(reloc_for_target(target)); + } + + AddressLiteral addr() { + AddressLiteral ret = *this; + ret._is_lval = true; + return ret; + } + + private: + + address target() { return _target; } + bool is_lval() { return _is_lval; } + + relocInfo::relocType reloc() const { return _rspec.type(); } + const RelocationHolder& rspec() const { return _rspec; } + + friend class Assembler; + friend class MacroAssembler; + friend class Address; + friend class LIR_Assembler; + friend class InlinedAddress; +}; + +class ExternalAddress: public AddressLiteral { + + public: + + ExternalAddress(address target) : AddressLiteral(target) {} + +}; + +class InternalAddress: public AddressLiteral { + + public: + + InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} + +}; + +// Inlined constants, for use with ldr_literal / bind_literal +// Note: InlinedInteger not supported (use move_slow(Register,int[,cond])) +class InlinedLiteral: StackObj { + public: + Label label; // need to be public for direct access with & + InlinedLiteral() { + } +}; + +class InlinedMetadata: public InlinedLiteral { + private: + Metadata *_data; + + public: + InlinedMetadata(Metadata *data): InlinedLiteral() { + _data = data; + } + Metadata *data() { return _data; } +}; + +// Currently unused +// class InlinedOop: public InlinedLiteral { +// private: +// jobject _jobject; +// +// public: +// InlinedOop(jobject target): InlinedLiteral() { +// _jobject = target; +// } +// jobject jobject() { return _jobject; } +// }; + +class InlinedAddress: public InlinedLiteral { + private: + AddressLiteral _literal; + + public: + + InlinedAddress(jobject object): InlinedLiteral(), _literal((address)object, relocInfo::oop_type) { + ShouldNotReachHere(); // use mov_oop (or implement InlinedOop) + } + + InlinedAddress(Metadata *data): InlinedLiteral(), _literal((address)data, relocInfo::metadata_type) { + ShouldNotReachHere(); // use InlinedMetadata or mov_metadata + } + + InlinedAddress(address target, const RelocationHolder &rspec): InlinedLiteral(), _literal(target, rspec) { + assert(rspec.type() != relocInfo::oop_type, "Do not use InlinedAddress for oops"); + assert(rspec.type() != relocInfo::metadata_type, "Do not use InlinedAddress for metadatas"); + } + + InlinedAddress(address target, relocInfo::relocType rtype): InlinedLiteral(), _literal(target, rtype) { + assert(rtype != relocInfo::oop_type, "Do not use InlinedAddress for oops"); + assert(rtype != relocInfo::metadata_type, "Do not use InlinedAddress for metadatas"); + } + + // Note: default is relocInfo::none for InlinedAddress + InlinedAddress(address target): InlinedLiteral(), _literal(target, relocInfo::none) { + } + + address target() { return _literal.target(); } + + const RelocationHolder& rspec() const { return _literal.rspec(); } +}; + +class InlinedString: public InlinedLiteral { + private: + const char* _msg; + + public: + InlinedString(const char* msg): InlinedLiteral() { + _msg = msg; + } + const char* msg() { return _msg; } +}; + +class MacroAssembler: public Assembler { +protected: + + // Support for VM calls + // + + // This is the base routine called by the different versions of call_VM_leaf. + void call_VM_leaf_helper(address entry_point, int number_of_arguments); + + // This is the base routine called by the different versions of call_VM. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + virtual void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); + + // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. + // The implementation is only non-empty for the InterpreterMacroAssembler, + // as only the interpreter handles PopFrame and ForceEarlyReturn requests. + virtual void check_and_handle_popframe() {} + virtual void check_and_handle_earlyret() {} + +public: + + MacroAssembler(CodeBuffer* code) : Assembler(code) {} + + // By default, we do not need relocation information for non + // patchable absolute addresses. However, when needed by some + // extensions, ignore_non_patchable_relocations can be modified, + // returning false to preserve all relocation information. + inline bool ignore_non_patchable_relocations() { return true; } + + // Initially added to the Assembler interface as a pure virtual: + // RegisterConstant delayed_value(..) + // for: + // 6812678 macro assembler needs delayed binding of a few constants (for 6655638) + // this was subsequently modified to its present name and return type + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset); + +#ifdef AARCH64 +# define NOT_IMPLEMENTED() unimplemented("NYI at " __FILE__ ":" XSTR(__LINE__)) +# define NOT_TESTED() warn("Not tested at " __FILE__ ":" XSTR(__LINE__)) +#endif + + void align(int modulus); + + // Support for VM calls + // + // It is imperative that all calls into the VM are handled via the call_VM methods. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + + void call_VM(Register oop_result, address entry_point, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); + + // The following methods are required by templateTable.cpp, + // but not used on ARM. + void call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); + void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); + void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); + void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); + + // Note: The super_call_VM calls are not used on ARM + + // Raw call, without saving/restoring registers, exception handling, etc. + // Mainly used from various stubs. + // Note: if 'save_R9_if_scratched' is true, call_VM may on some + // platforms save values on the stack. Set it to false (and handle + // R9 in the callers) if the top of the stack must not be modified + // by call_VM. + void call_VM(address entry_point, bool save_R9_if_scratched); + + void call_VM_leaf(address entry_point); + void call_VM_leaf(address entry_point, Register arg_1); + void call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); + + void get_vm_result(Register oop_result, Register tmp); + void get_vm_result_2(Register metadata_result, Register tmp); + + // Always sets/resets sp, which default to SP if (last_sp == noreg) + // Optionally sets/resets fp (use noreg to avoid setting it) + // Always sets/resets pc on AArch64; optionally sets/resets pc on 32-bit ARM depending on save_last_java_pc flag + // Note: when saving PC, set_last_Java_frame returns PC's offset in the code section + // (for oop_maps offset computation) + int set_last_Java_frame(Register last_sp, Register last_fp, bool save_last_java_pc, Register tmp); + void reset_last_Java_frame(Register tmp); + // status set in set_last_Java_frame for reset_last_Java_frame + bool _fp_saved; + bool _pc_saved; + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) __ stop(error) +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#define STOP(error) __ block_comment(error); __ stop(error) +#endif + + void lookup_virtual_method(Register recv_klass, + Register vtable_index, + Register method_result); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // No registers are killed, except temp_regs. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp_reg2, + Label* L_success, + Label* L_failure, + Label* L_slow_path); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // temp_reg3 can be noreg, if no temps are available. + // Updates the sub's secondary super cache as necessary. + // If set_cond_codes: + // - condition codes will be Z on success, NZ on failure. + // - temp_reg will be 0 on success, non-0 on failure + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp_reg2, + Register temp_reg3, // auto assigned if noreg + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // Simplified, combined version, good for typical uses. + // temp_reg3 can be noreg, if no temps are available. It is used only on slow path. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp_reg2, + Register temp_reg3, // auto assigned on slow path if noreg + Label& L_success); + + // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. + Address receiver_argument_address(Register params_base, Register params_count, Register tmp); + + void _verify_oop(Register reg, const char* s, const char* file, int line); + void _verify_oop_addr(Address addr, const char * s, const char* file, int line); + + // TODO: verify method and klass metadata (compare against vptr?) + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {} + +#define verify_oop(reg) _verify_oop(reg, "broken oop " #reg, __FILE__, __LINE__) +#define verify_oop_addr(addr) _verify_oop_addr(addr, "broken oop ", __FILE__, __LINE__) +#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) +#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + void null_check(Register reg, Register tmp, int offset = -1); + inline void null_check(Register reg) { null_check(reg, noreg, -1); } // for C1 lir_null_check + + // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. + void eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, + RegisterOrConstant size_expression, Label& slow_case); + void tlab_allocate(Register obj, Register obj_end, Register tmp1, + RegisterOrConstant size_expression, Label& slow_case); + + void tlab_refill(Register top, Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Label& try_eden, Label& slow_case); + void zero_memory(Register start, Register end, Register tmp); + + void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp); + + static bool needs_explicit_null_check(intptr_t offset); + + void arm_stack_overflow_check(int frame_size_in_bytes, Register tmp); + void arm_stack_overflow_check(Register Rsize, Register tmp); + + void bang_stack_with_offset(int offset) { + ShouldNotReachHere(); + } + + // Biased locking support + // lock_reg and obj_reg must be loaded up with the appropriate values. + // swap_reg must be supplied. + // tmp_reg must be supplied. + // Optional slow case is for implementations (interpreter and C1) which branch to + // slow case directly. If slow_case is NULL, then leaves condition + // codes set (for C2's Fast_Lock node) and jumps to done label. + // Falls through for the fast locking attempt. + // Returns offset of first potentially-faulting instruction for null + // check info (currently consumed only by C1). If + // swap_reg_contains_mark is true then returns -1 as it is assumed + // the calling code has already passed any potential faults. + // Notes: + // - swap_reg and tmp_reg are scratched + // - Rtemp was (implicitly) scratched and can now be specified as the tmp2 + int biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, + bool swap_reg_contains_mark, + Register tmp2, + Label& done, Label& slow_case, + BiasedLockingCounters* counters = NULL); + void biased_locking_exit(Register obj_reg, Register temp_reg, Label& done); + + // Building block for CAS cases of biased locking: makes CAS and records statistics. + // Optional slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. + void biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, + Register tmp, Label& slow_case, int* counter_addr); + +#ifndef AARCH64 + void nop() { + mov(R0, R0); + } + + void push(Register rd, AsmCondition cond = al) { + assert(rd != SP, "unpredictable instruction"); + str(rd, Address(SP, -wordSize, pre_indexed), cond); + } + + void push(RegisterSet reg_set, AsmCondition cond = al) { + assert(!reg_set.contains(SP), "unpredictable instruction"); + stmdb(SP, reg_set, writeback, cond); + } + + void pop(Register rd, AsmCondition cond = al) { + assert(rd != SP, "unpredictable instruction"); + ldr(rd, Address(SP, wordSize, post_indexed), cond); + } + + void pop(RegisterSet reg_set, AsmCondition cond = al) { + assert(!reg_set.contains(SP), "unpredictable instruction"); + ldmia(SP, reg_set, writeback, cond); + } + + void fpushd(FloatRegister fd, AsmCondition cond = al) { + fstmdbd(SP, FloatRegisterSet(fd), writeback, cond); + } + + void fpushs(FloatRegister fd, AsmCondition cond = al) { + fstmdbs(SP, FloatRegisterSet(fd), writeback, cond); + } + + void fpopd(FloatRegister fd, AsmCondition cond = al) { + fldmiad(SP, FloatRegisterSet(fd), writeback, cond); + } + + void fpops(FloatRegister fd, AsmCondition cond = al) { + fldmias(SP, FloatRegisterSet(fd), writeback, cond); + } +#endif // !AARCH64 + + // Order access primitives + enum Membar_mask_bits { + StoreStore = 1 << 3, + LoadStore = 1 << 2, + StoreLoad = 1 << 1, + LoadLoad = 1 << 0 + }; + +#ifdef AARCH64 + // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM + void membar(Membar_mask_bits order_constraint, Register tmp = noreg); +#else + void membar(Membar_mask_bits mask, + Register tmp, + bool preserve_flags = true, + Register load_tgt = noreg); +#endif + + void breakpoint(AsmCondition cond = al); + void stop(const char* msg); + // prints msg and continues + void warn(const char* msg); + void unimplemented(const char* what = ""); + void should_not_reach_here() { stop("should not reach here"); } + static void debug(const char* msg, const intx* registers); + + // Create a walkable frame to help tracking down who called this code. + // Returns the frame size in words. + int should_not_call_this() { + raw_push(FP, LR); + should_not_reach_here(); + flush(); + return 2; // frame_size_in_words (FP+LR) + } + + int save_all_registers(); + void restore_all_registers(); + int save_caller_save_registers(); + void restore_caller_save_registers(); + + void add_rc(Register dst, Register arg1, RegisterOrConstant arg2); + + // add_slow and mov_slow are used to manipulate offsets larger than 1024, + // these functions are not expected to handle all possible constants, + // only those that can really occur during compilation + void add_slow(Register rd, Register rn, int c); + void sub_slow(Register rd, Register rn, int c); + +#ifdef AARCH64 + static int mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm /* optional */); +#endif + + void mov_slow(Register rd, intptr_t c NOT_AARCH64_ARG(AsmCondition cond = al)); + void mov_slow(Register rd, const char *string); + void mov_slow(Register rd, address addr); + + void patchable_mov_oop(Register rd, jobject o, int oop_index) { + mov_oop(rd, o, oop_index AARCH64_ONLY_ARG(true)); + } + void mov_oop(Register rd, jobject o, int index = 0 + AARCH64_ONLY_ARG(bool patchable = false) + NOT_AARCH64_ARG(AsmCondition cond = al)); + + + void patchable_mov_metadata(Register rd, Metadata* o, int index) { + mov_metadata(rd, o, index AARCH64_ONLY_ARG(true)); + } + void mov_metadata(Register rd, Metadata* o, int index = 0 AARCH64_ONLY_ARG(bool patchable = false)); + + void mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond = al)); + void mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond = al)); + +#ifdef AARCH64 + int mov_pc_to(Register rd) { + Label L; + adr(rd, L); + bind(L); + return offset(); + } +#endif + + // Note: this variant of mov_address assumes the address moves with + // the code. Do *not* implement it with non-relocated instructions, + // unless PC-relative. +#ifdef AARCH64 + void mov_relative_address(Register rd, address addr) { + adr(rd, addr); + } +#else + void mov_relative_address(Register rd, address addr, AsmCondition cond = al) { + int offset = addr - pc() - 8; + assert((offset & 3) == 0, "bad alignment"); + if (offset >= 0) { + assert(AsmOperand::is_rotated_imm(offset), "addr too far"); + add(rd, PC, offset, cond); + } else { + assert(AsmOperand::is_rotated_imm(-offset), "addr too far"); + sub(rd, PC, -offset, cond); + } + } +#endif // AARCH64 + + // Runtime address that may vary from one execution to another. The + // symbolic_reference describes what the address is, allowing + // the address to be resolved in a different execution context. + // Warning: do not implement as a PC relative address. + void mov_address(Register rd, address addr, symbolic_Relocation::symbolic_reference t) { + mov_address(rd, addr, RelocationHolder::none); + } + + // rspec can be RelocationHolder::none (for ignored symbolic_Relocation). + // In that case, the address is absolute and the generated code need + // not be relocable. + void mov_address(Register rd, address addr, RelocationHolder const& rspec) { + assert(rspec.type() != relocInfo::runtime_call_type, "do not use mov_address for runtime calls"); + assert(rspec.type() != relocInfo::static_call_type, "do not use mov_address for relocable calls"); + if (rspec.type() == relocInfo::none) { + // absolute address, relocation not needed + mov_slow(rd, (intptr_t)addr); + return; + } +#ifndef AARCH64 + if (VM_Version::supports_movw()) { + relocate(rspec); + int c = (int)addr; + movw(rd, c & 0xffff); + if ((unsigned int)c >> 16) { + movt(rd, (unsigned int)c >> 16); + } + return; + } +#endif + Label skip_literal; + InlinedAddress addr_literal(addr, rspec); + ldr_literal(rd, addr_literal); + b(skip_literal); + bind_literal(addr_literal); + // AARCH64 WARNING: because of alignment padding, extra padding + // may be required to get a consistent size for C2, or rules must + // overestimate size see MachEpilogNode::size + bind(skip_literal); + } + + // Note: Do not define mov_address for a Label + // + // Load from addresses potentially within the code are now handled + // InlinedLiteral subclasses (to allow more flexibility on how the + // ldr_literal is performed). + + void ldr_literal(Register rd, InlinedAddress& L) { + assert(L.rspec().type() != relocInfo::runtime_call_type, "avoid ldr_literal for calls"); + assert(L.rspec().type() != relocInfo::static_call_type, "avoid ldr_literal for calls"); + relocate(L.rspec()); +#ifdef AARCH64 + ldr(rd, target(L.label)); +#else + ldr(rd, Address(PC, target(L.label) - pc() - 8)); +#endif + } + + void ldr_literal(Register rd, InlinedString& L) { + const char* msg = L.msg(); + if (code()->consts()->contains((address)msg)) { + // string address moves with the code +#ifdef AARCH64 + ldr(rd, (address)msg); +#else + ldr(rd, Address(PC, ((address)msg) - pc() - 8)); +#endif + return; + } + // Warning: use external strings with care. They are not relocated + // if the code moves. If needed, use code_string to move them + // to the consts section. +#ifdef AARCH64 + ldr(rd, target(L.label)); +#else + ldr(rd, Address(PC, target(L.label) - pc() - 8)); +#endif + } + + void ldr_literal(Register rd, InlinedMetadata& L) { + // relocation done in the bind_literal for metadatas +#ifdef AARCH64 + ldr(rd, target(L.label)); +#else + ldr(rd, Address(PC, target(L.label) - pc() - 8)); +#endif + } + + void bind_literal(InlinedAddress& L) { + AARCH64_ONLY(align(wordSize)); + bind(L.label); + assert(L.rspec().type() != relocInfo::metadata_type, "Must use InlinedMetadata"); + // We currently do not use oop 'bound' literals. + // If the code evolves and the following assert is triggered, + // we need to implement InlinedOop (see InlinedMetadata). + assert(L.rspec().type() != relocInfo::oop_type, "Inlined oops not supported"); + // Note: relocation is handled by relocate calls in ldr_literal + AbstractAssembler::emit_address((address)L.target()); + } + + void bind_literal(InlinedString& L) { + const char* msg = L.msg(); + if (code()->consts()->contains((address)msg)) { + // The Label should not be used; avoid binding it + // to detect errors. + return; + } + AARCH64_ONLY(align(wordSize)); + bind(L.label); + AbstractAssembler::emit_address((address)L.msg()); + } + + void bind_literal(InlinedMetadata& L) { + AARCH64_ONLY(align(wordSize)); + bind(L.label); + relocate(metadata_Relocation::spec_for_immediate()); + AbstractAssembler::emit_address((address)L.data()); + } + + void load_mirror(Register mirror, Register method, Register tmp); + + // Porting layer between 32-bit ARM and AArch64 + +#define COMMON_INSTR_1(common_mnemonic, aarch64_mnemonic, arm32_mnemonic, arg_type) \ + void common_mnemonic(arg_type arg) { \ + AARCH64_ONLY(aarch64_mnemonic) NOT_AARCH64(arm32_mnemonic) (arg); \ + } + +#define COMMON_INSTR_2(common_mnemonic, aarch64_mnemonic, arm32_mnemonic, arg1_type, arg2_type) \ + void common_mnemonic(arg1_type arg1, arg2_type arg2) { \ + AARCH64_ONLY(aarch64_mnemonic) NOT_AARCH64(arm32_mnemonic) (arg1, arg2); \ + } + +#define COMMON_INSTR_3(common_mnemonic, aarch64_mnemonic, arm32_mnemonic, arg1_type, arg2_type, arg3_type) \ + void common_mnemonic(arg1_type arg1, arg2_type arg2, arg3_type arg3) { \ + AARCH64_ONLY(aarch64_mnemonic) NOT_AARCH64(arm32_mnemonic) (arg1, arg2, arg3); \ + } + + COMMON_INSTR_1(jump, br, bx, Register) + COMMON_INSTR_1(call, blr, blx, Register) + + COMMON_INSTR_2(cbz_32, cbz_w, cbz, Register, Label&) + COMMON_INSTR_2(cbnz_32, cbnz_w, cbnz, Register, Label&) + + COMMON_INSTR_2(ldr_u32, ldr_w, ldr, Register, Address) + COMMON_INSTR_2(ldr_s32, ldrsw, ldr, Register, Address) + COMMON_INSTR_2(str_32, str_w, str, Register, Address) + + COMMON_INSTR_2(mvn_32, mvn_w, mvn, Register, Register) + COMMON_INSTR_2(cmp_32, cmp_w, cmp, Register, Register) + COMMON_INSTR_2(neg_32, neg_w, neg, Register, Register) + COMMON_INSTR_2(clz_32, clz_w, clz, Register, Register) + COMMON_INSTR_2(rbit_32, rbit_w, rbit, Register, Register) + + COMMON_INSTR_2(cmp_32, cmp_w, cmp, Register, int) + COMMON_INSTR_2(cmn_32, cmn_w, cmn, Register, int) + + COMMON_INSTR_3(add_32, add_w, add, Register, Register, Register) + COMMON_INSTR_3(sub_32, sub_w, sub, Register, Register, Register) + COMMON_INSTR_3(subs_32, subs_w, subs, Register, Register, Register) + COMMON_INSTR_3(mul_32, mul_w, mul, Register, Register, Register) + COMMON_INSTR_3(and_32, andr_w, andr, Register, Register, Register) + COMMON_INSTR_3(orr_32, orr_w, orr, Register, Register, Register) + COMMON_INSTR_3(eor_32, eor_w, eor, Register, Register, Register) + + COMMON_INSTR_3(add_32, add_w, add, Register, Register, AsmOperand) + COMMON_INSTR_3(sub_32, sub_w, sub, Register, Register, AsmOperand) + COMMON_INSTR_3(orr_32, orr_w, orr, Register, Register, AsmOperand) + COMMON_INSTR_3(eor_32, eor_w, eor, Register, Register, AsmOperand) + COMMON_INSTR_3(and_32, andr_w, andr, Register, Register, AsmOperand) + + + COMMON_INSTR_3(add_32, add_w, add, Register, Register, int) + COMMON_INSTR_3(adds_32, adds_w, adds, Register, Register, int) + COMMON_INSTR_3(sub_32, sub_w, sub, Register, Register, int) + COMMON_INSTR_3(subs_32, subs_w, subs, Register, Register, int) + + COMMON_INSTR_2(tst_32, tst_w, tst, Register, unsigned int) + COMMON_INSTR_2(tst_32, tst_w, tst, Register, AsmOperand) + + COMMON_INSTR_3(and_32, andr_w, andr, Register, Register, uint) + COMMON_INSTR_3(orr_32, orr_w, orr, Register, Register, uint) + COMMON_INSTR_3(eor_32, eor_w, eor, Register, Register, uint) + + COMMON_INSTR_1(cmp_zero_float, fcmp0_s, fcmpzs, FloatRegister) + COMMON_INSTR_1(cmp_zero_double, fcmp0_d, fcmpzd, FloatRegister) + + COMMON_INSTR_2(ldr_float, ldr_s, flds, FloatRegister, Address) + COMMON_INSTR_2(str_float, str_s, fsts, FloatRegister, Address) + COMMON_INSTR_2(mov_float, fmov_s, fcpys, FloatRegister, FloatRegister) + COMMON_INSTR_2(neg_float, fneg_s, fnegs, FloatRegister, FloatRegister) + COMMON_INSTR_2(abs_float, fabs_s, fabss, FloatRegister, FloatRegister) + COMMON_INSTR_2(sqrt_float, fsqrt_s, fsqrts, FloatRegister, FloatRegister) + COMMON_INSTR_2(cmp_float, fcmp_s, fcmps, FloatRegister, FloatRegister) + + COMMON_INSTR_3(add_float, fadd_s, fadds, FloatRegister, FloatRegister, FloatRegister) + COMMON_INSTR_3(sub_float, fsub_s, fsubs, FloatRegister, FloatRegister, FloatRegister) + COMMON_INSTR_3(mul_float, fmul_s, fmuls, FloatRegister, FloatRegister, FloatRegister) + COMMON_INSTR_3(div_float, fdiv_s, fdivs, FloatRegister, FloatRegister, FloatRegister) + + COMMON_INSTR_2(ldr_double, ldr_d, fldd, FloatRegister, Address) + COMMON_INSTR_2(str_double, str_d, fstd, FloatRegister, Address) + COMMON_INSTR_2(mov_double, fmov_d, fcpyd, FloatRegister, FloatRegister) + COMMON_INSTR_2(neg_double, fneg_d, fnegd, FloatRegister, FloatRegister) + COMMON_INSTR_2(cmp_double, fcmp_d, fcmpd, FloatRegister, FloatRegister) + COMMON_INSTR_2(abs_double, fabs_d, fabsd, FloatRegister, FloatRegister) + COMMON_INSTR_2(sqrt_double, fsqrt_d, fsqrtd, FloatRegister, FloatRegister) + + COMMON_INSTR_3(add_double, fadd_d, faddd, FloatRegister, FloatRegister, FloatRegister) + COMMON_INSTR_3(sub_double, fsub_d, fsubd, FloatRegister, FloatRegister, FloatRegister) + COMMON_INSTR_3(mul_double, fmul_d, fmuld, FloatRegister, FloatRegister, FloatRegister) + COMMON_INSTR_3(div_double, fdiv_d, fdivd, FloatRegister, FloatRegister, FloatRegister) + + COMMON_INSTR_2(convert_f2d, fcvt_ds, fcvtds, FloatRegister, FloatRegister) + COMMON_INSTR_2(convert_d2f, fcvt_sd, fcvtsd, FloatRegister, FloatRegister) + + COMMON_INSTR_2(mov_fpr2gpr_float, fmov_ws, fmrs, Register, FloatRegister) + +#undef COMMON_INSTR_1 +#undef COMMON_INSTR_2 +#undef COMMON_INSTR_3 + + +#ifdef AARCH64 + + void mov(Register dst, Register src, AsmCondition cond) { + if (cond == al) { + mov(dst, src); + } else { + csel(dst, src, dst, cond); + } + } + + // Propagate other overloaded "mov" methods from Assembler. + void mov(Register dst, Register src) { Assembler::mov(dst, src); } + void mov(Register rd, int imm) { Assembler::mov(rd, imm); } + + void mov(Register dst, int imm, AsmCondition cond) { + assert(imm == 0 || imm == 1, ""); + if (imm == 0) { + mov(dst, ZR, cond); + } else if (imm == 1) { + csinc(dst, dst, ZR, inverse(cond)); + } else if (imm == -1) { + csinv(dst, dst, ZR, inverse(cond)); + } else { + fatal("illegal mov(R%d,%d,cond)", dst->encoding(), imm); + } + } + + void movs(Register dst, Register src) { adds(dst, src, 0); } + +#else // AARCH64 + + void tbz(Register rt, int bit, Label& L) { + assert(0 <= bit && bit < BitsPerWord, "bit number is out of range"); + tst(rt, 1 << bit); + b(L, eq); + } + + void tbnz(Register rt, int bit, Label& L) { + assert(0 <= bit && bit < BitsPerWord, "bit number is out of range"); + tst(rt, 1 << bit); + b(L, ne); + } + + void cbz(Register rt, Label& L) { + cmp(rt, 0); + b(L, eq); + } + + void cbz(Register rt, address target) { + cmp(rt, 0); + b(target, eq); + } + + void cbnz(Register rt, Label& L) { + cmp(rt, 0); + b(L, ne); + } + + void ret(Register dst = LR) { + bx(dst); + } + +#endif // AARCH64 + + Register zero_register(Register tmp) { +#ifdef AARCH64 + return ZR; +#else + mov(tmp, 0); + return tmp; +#endif + } + + void logical_shift_left(Register dst, Register src, int shift) { +#ifdef AARCH64 + _lsl(dst, src, shift); +#else + mov(dst, AsmOperand(src, lsl, shift)); +#endif + } + + void logical_shift_left_32(Register dst, Register src, int shift) { +#ifdef AARCH64 + _lsl_w(dst, src, shift); +#else + mov(dst, AsmOperand(src, lsl, shift)); +#endif + } + + void logical_shift_right(Register dst, Register src, int shift) { +#ifdef AARCH64 + _lsr(dst, src, shift); +#else + mov(dst, AsmOperand(src, lsr, shift)); +#endif + } + + void arith_shift_right(Register dst, Register src, int shift) { +#ifdef AARCH64 + _asr(dst, src, shift); +#else + mov(dst, AsmOperand(src, asr, shift)); +#endif + } + + void asr_32(Register dst, Register src, int shift) { +#ifdef AARCH64 + _asr_w(dst, src, shift); +#else + mov(dst, AsmOperand(src, asr, shift)); +#endif + } + + // If holds, compares r1 and r2. Otherwise, flags are set so that does not hold. + void cond_cmp(Register r1, Register r2, AsmCondition cond) { +#ifdef AARCH64 + ccmp(r1, r2, flags_for_condition(inverse(cond)), cond); +#else + cmp(r1, r2, cond); +#endif + } + + // If holds, compares r and imm. Otherwise, flags are set so that does not hold. + void cond_cmp(Register r, int imm, AsmCondition cond) { +#ifdef AARCH64 + ccmp(r, imm, flags_for_condition(inverse(cond)), cond); +#else + cmp(r, imm, cond); +#endif + } + + void align_reg(Register dst, Register src, int align) { + assert (is_power_of_2(align), "should be"); +#ifdef AARCH64 + andr(dst, src, ~(uintx)(align-1)); +#else + bic(dst, src, align-1); +#endif + } + + void prefetch_read(Address addr) { +#ifdef AARCH64 + prfm(pldl1keep, addr); +#else + pld(addr); +#endif + } + + void raw_push(Register r1, Register r2) { +#ifdef AARCH64 + stp(r1, r2, Address(SP, -2*wordSize, pre_indexed)); +#else + assert(r1->encoding() < r2->encoding(), "should be ordered"); + push(RegisterSet(r1) | RegisterSet(r2)); +#endif + } + + void raw_pop(Register r1, Register r2) { +#ifdef AARCH64 + ldp(r1, r2, Address(SP, 2*wordSize, post_indexed)); +#else + assert(r1->encoding() < r2->encoding(), "should be ordered"); + pop(RegisterSet(r1) | RegisterSet(r2)); +#endif + } + + void raw_push(Register r1, Register r2, Register r3) { +#ifdef AARCH64 + raw_push(r1, r2); + raw_push(r3, ZR); +#else + assert(r1->encoding() < r2->encoding() && r2->encoding() < r3->encoding(), "should be ordered"); + push(RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3)); +#endif + } + + void raw_pop(Register r1, Register r2, Register r3) { +#ifdef AARCH64 + raw_pop(r3, ZR); + raw_pop(r1, r2); +#else + assert(r1->encoding() < r2->encoding() && r2->encoding() < r3->encoding(), "should be ordered"); + pop(RegisterSet(r1) | RegisterSet(r2) | RegisterSet(r3)); +#endif + } + + // Restores registers r1 and r2 previously saved by raw_push(r1, r2, ret_addr) and returns by ret_addr. Clobbers LR. + void raw_pop_and_ret(Register r1, Register r2) { +#ifdef AARCH64 + raw_pop(r1, r2, LR); + ret(); +#else + raw_pop(r1, r2, PC); +#endif + } + + void indirect_jump(Address addr, Register scratch) { +#ifdef AARCH64 + ldr(scratch, addr); + br(scratch); +#else + ldr(PC, addr); +#endif + } + + void indirect_jump(InlinedAddress& literal, Register scratch) { +#ifdef AARCH64 + ldr_literal(scratch, literal); + br(scratch); +#else + ldr_literal(PC, literal); +#endif + } + +#ifndef AARCH64 + void neg(Register dst, Register src) { + rsb(dst, src, 0); + } +#endif + + void branch_if_negative_32(Register r, Label& L) { + // Note about branch_if_negative_32() / branch_if_any_negative_32() implementation for AArch64: + // tbnz is not used instead of tst & b.mi because destination may be out of tbnz range (+-32KB) + // since these methods are used in LIR_Assembler::emit_arraycopy() to jump to stub entry. + tst_32(r, r); + b(L, mi); + } + + void branch_if_any_negative_32(Register r1, Register r2, Register tmp, Label& L) { +#ifdef AARCH64 + orr_32(tmp, r1, r2); + tst_32(tmp, tmp); +#else + orrs(tmp, r1, r2); +#endif + b(L, mi); + } + + void branch_if_any_negative_32(Register r1, Register r2, Register r3, Register tmp, Label& L) { + orr_32(tmp, r1, r2); +#ifdef AARCH64 + orr_32(tmp, tmp, r3); + tst_32(tmp, tmp); +#else + orrs(tmp, tmp, r3); +#endif + b(L, mi); + } + + void add_ptr_scaled_int32(Register dst, Register r1, Register r2, int shift) { +#ifdef AARCH64 + add(dst, r1, r2, ex_sxtw, shift); +#else + add(dst, r1, AsmOperand(r2, lsl, shift)); +#endif + } + + void sub_ptr_scaled_int32(Register dst, Register r1, Register r2, int shift) { +#ifdef AARCH64 + sub(dst, r1, r2, ex_sxtw, shift); +#else + sub(dst, r1, AsmOperand(r2, lsl, shift)); +#endif + } + + + // klass oop manipulations if compressed + +#ifdef AARCH64 + void load_klass(Register dst_klass, Register src_oop); +#else + void load_klass(Register dst_klass, Register src_oop, AsmCondition cond = al); +#endif // AARCH64 + + void store_klass(Register src_klass, Register dst_oop); + +#ifdef AARCH64 + void store_klass_gap(Register dst); +#endif // AARCH64 + + // oop manipulations + + void load_heap_oop(Register dst, Address src); + void store_heap_oop(Register src, Address dst); + void store_heap_oop(Address dst, Register src) { + store_heap_oop(src, dst); + } + void store_heap_oop_null(Register src, Address dst); + +#ifdef AARCH64 + void encode_heap_oop(Register dst, Register src); + void encode_heap_oop(Register r) { + encode_heap_oop(r, r); + } + void decode_heap_oop(Register dst, Register src); + void decode_heap_oop(Register r) { + decode_heap_oop(r, r); + } + +#ifdef COMPILER2 + void encode_heap_oop_not_null(Register dst, Register src); + void decode_heap_oop_not_null(Register dst, Register src); + + void set_narrow_klass(Register dst, Klass* k); + void set_narrow_oop(Register dst, jobject obj); +#endif + + void encode_klass_not_null(Register r); + void encode_klass_not_null(Register dst, Register src); + void decode_klass_not_null(Register r); + void decode_klass_not_null(Register dst, Register src); + + void reinit_heapbase(); + +#ifdef ASSERT + void verify_heapbase(const char* msg); +#endif // ASSERT + + static int instr_count_for_mov_slow(intptr_t c); + static int instr_count_for_mov_slow(address addr); + static int instr_count_for_decode_klass_not_null(); +#endif // AARCH64 + + void ldr_global_ptr(Register reg, address address_of_global); + void ldr_global_s32(Register reg, address address_of_global); + void ldrb_global(Register reg, address address_of_global); + + // address_placeholder_instruction is invalid instruction and is used + // as placeholder in code for address of label + enum { address_placeholder_instruction = 0xFFFFFFFF }; + + void emit_address(Label& L) { + assert(!L.is_bound(), "otherwise address will not be patched"); + target(L); // creates relocation which will be patched later + + assert ((offset() & (wordSize-1)) == 0, "should be aligned by word size"); + +#ifdef AARCH64 + emit_int32(address_placeholder_instruction); + emit_int32(address_placeholder_instruction); +#else + AbstractAssembler::emit_address((address)address_placeholder_instruction); +#endif + } + + void b(address target, AsmCondition cond = al) { + Assembler::b(target, cond); \ + } + void b(Label& L, AsmCondition cond = al) { + // internal jumps + Assembler::b(target(L), cond); + } + + void bl(address target NOT_AARCH64_ARG(AsmCondition cond = al)) { + Assembler::bl(target NOT_AARCH64_ARG(cond)); + } + void bl(Label& L NOT_AARCH64_ARG(AsmCondition cond = al)) { + // internal calls + Assembler::bl(target(L) NOT_AARCH64_ARG(cond)); + } + +#ifndef AARCH64 + void adr(Register dest, Label& L, AsmCondition cond = al) { + int delta = target(L) - pc() - 8; + if (delta >= 0) { + add(dest, PC, delta, cond); + } else { + sub(dest, PC, -delta, cond); + } + } +#endif // !AARCH64 + + // Variable-length jump and calls. We now distinguish only the + // patchable case from the other cases. Patchable must be + // distinguised from relocable. Relocable means the generated code + // containing the jump/call may move. Patchable means that the + // targeted address may be changed later. + + // Non patchable versions. + // - used only for relocInfo::runtime_call_type and relocInfo::none + // - may use relative or absolute format (do not use relocInfo::none + // if the generated code may move) + // - the implementation takes into account switch to THUMB mode if the + // destination is a THUMB address + // - the implementation supports far targets + // + // To reduce regression risk, scratch still defaults to noreg on + // arm32. This results in patchable instructions. However, if + // patching really matters, the call sites should be modified and + // use patchable_call or patchable_jump. If patching is not required + // and if a register can be cloberred, it should be explicitly + // specified to allow future optimizations. + void jump(address target, + relocInfo::relocType rtype = relocInfo::runtime_call_type, + Register scratch = AARCH64_ONLY(Rtemp) NOT_AARCH64(noreg) +#ifndef AARCH64 + , AsmCondition cond = al +#endif + ); + + void call(address target, + RelocationHolder rspec + NOT_AARCH64_ARG(AsmCondition cond = al)); + + void call(address target, + relocInfo::relocType rtype = relocInfo::runtime_call_type + NOT_AARCH64_ARG(AsmCondition cond = al)) { + call(target, Relocation::spec_simple(rtype) NOT_AARCH64_ARG(cond)); + } + + void jump(AddressLiteral dest) { + jump(dest.target(), dest.reloc()); + } +#ifndef AARCH64 + void jump(address dest, relocInfo::relocType rtype, AsmCondition cond) { + jump(dest, rtype, Rtemp, cond); + } +#endif + + void call(AddressLiteral dest) { + call(dest.target(), dest.reloc()); + } + + // Patchable version: + // - set_destination can be used to atomically change the target + // + // The targets for patchable_jump and patchable_call must be in the + // code cache. + // [ including possible extensions of the code cache, like AOT code ] + // + // To reduce regression risk, scratch still defaults to noreg on + // arm32. If a register can be cloberred, it should be explicitly + // specified to allow future optimizations. + void patchable_jump(address target, + relocInfo::relocType rtype = relocInfo::runtime_call_type, + Register scratch = AARCH64_ONLY(Rtemp) NOT_AARCH64(noreg) +#ifndef AARCH64 + , AsmCondition cond = al +#endif + ); + + // patchable_call may scratch Rtemp + int patchable_call(address target, + RelocationHolder const& rspec, + bool c2 = false); + + int patchable_call(address target, + relocInfo::relocType rtype, + bool c2 = false) { + return patchable_call(target, Relocation::spec_simple(rtype), c2); + } + +#if defined(AARCH64) && defined(COMPILER2) + static int call_size(address target, bool far, bool patchable); +#endif + +#ifdef AARCH64 + static bool page_reachable_from_cache(address target); +#endif + static bool _reachable_from_cache(address target); + static bool _cache_fully_reachable(); + bool cache_fully_reachable(); + bool reachable_from_cache(address target); + + void zero_extend(Register rd, Register rn, int bits); + void sign_extend(Register rd, Register rn, int bits); + + inline void zap_high_non_significant_bits(Register r) { +#ifdef AARCH64 + if(ZapHighNonSignificantBits) { + movk(r, 0xBAAD, 48); + movk(r, 0xF00D, 32); + } +#endif + } + +#ifndef AARCH64 + void long_move(Register rd_lo, Register rd_hi, + Register rn_lo, Register rn_hi, + AsmCondition cond = al); + void long_shift(Register rd_lo, Register rd_hi, + Register rn_lo, Register rn_hi, + AsmShift shift, Register count); + void long_shift(Register rd_lo, Register rd_hi, + Register rn_lo, Register rn_hi, + AsmShift shift, int count); + + void atomic_cas(Register tmpreg1, Register tmpreg2, Register oldval, Register newval, Register base, int offset); + void atomic_cas_bool(Register oldval, Register newval, Register base, int offset, Register tmpreg); + void atomic_cas64(Register temp_lo, Register temp_hi, Register temp_result, Register oldval_lo, Register oldval_hi, Register newval_lo, Register newval_hi, Register base, int offset); +#endif // !AARCH64 + + void cas_for_lock_acquire(Register oldval, Register newval, Register base, Register tmp, Label &slow_case, bool allow_fallthrough_on_failure = false, bool one_shot = false); + void cas_for_lock_release(Register oldval, Register newval, Register base, Register tmp, Label &slow_case, bool allow_fallthrough_on_failure = false, bool one_shot = false); + +#ifndef PRODUCT + // Preserves flags and all registers. + // On SMP the updated value might not be visible to external observers without a sychronization barrier + void cond_atomic_inc32(AsmCondition cond, int* counter_addr); +#endif // !PRODUCT + + // unconditional non-atomic increment + void inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2); + void inc_counter(int* counter_addr, Register tmpreg1, Register tmpreg2) { + inc_counter((address) counter_addr, tmpreg1, tmpreg2); + } + + void pd_patch_instruction(address branch, address target); + + // Loading and storing values by size and signed-ness; + // size must not exceed wordSize (i.e. 8-byte values are not supported on 32-bit ARM); + // each of these calls generates exactly one load or store instruction, + // so src can be pre- or post-indexed address. +#ifdef AARCH64 + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed); + void store_sized_value(Register src, Address dst, size_t size_in_bytes); +#else + // 32-bit ARM variants also support conditional execution + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, AsmCondition cond = al); + void store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond = al); +#endif + + void lookup_interface_method(Register recv_klass, + Register intf_klass, + Register itable_index, + Register method_result, + Register temp_reg1, + Register temp_reg2, + Label& L_no_such_interface); + + // Compare char[] arrays aligned to 4 bytes. + void char_arrays_equals(Register ary1, Register ary2, + Register limit, Register result, + Register chr1, Register chr2, Label& Ldone); + + + void floating_cmp(Register dst); + + // improved x86 portability (minimizing source code changes) + + void ldr_literal(Register rd, AddressLiteral addr) { + relocate(addr.rspec()); +#ifdef AARCH64 + ldr(rd, addr.target()); +#else + ldr(rd, Address(PC, addr.target() - pc() - 8)); +#endif + } + + void lea(Register Rd, AddressLiteral addr) { + // Never dereferenced, as on x86 (lval status ignored) + mov_address(Rd, addr.target(), addr.rspec()); + } + + void restore_default_fp_mode(); + +#ifdef COMPILER2 +#ifdef AARCH64 + // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. + void fast_lock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3); + void fast_unlock(Register obj, Register box, Register scratch, Register scratch2, Register scratch3); +#else + void fast_lock(Register obj, Register box, Register scratch, Register scratch2); + void fast_unlock(Register obj, Register box, Register scratch, Register scratch2); +#endif +#endif + +#ifdef AARCH64 + +#define F(mnemonic) \ + void mnemonic(Register rt, address target) { \ + Assembler::mnemonic(rt, target); \ + } \ + void mnemonic(Register rt, Label& L) { \ + Assembler::mnemonic(rt, target(L)); \ + } + + F(cbz_w); + F(cbnz_w); + F(cbz); + F(cbnz); + +#undef F + +#define F(mnemonic) \ + void mnemonic(Register rt, int bit, address target) { \ + Assembler::mnemonic(rt, bit, target); \ + } \ + void mnemonic(Register rt, int bit, Label& L) { \ + Assembler::mnemonic(rt, bit, target(L)); \ + } + + F(tbz); + F(tbnz); +#undef F + +#endif // AARCH64 + +}; + + +// The purpose of this class is to build several code fragments of the same size +// in order to allow fast table branch. + +class FixedSizeCodeBlock VALUE_OBJ_CLASS_SPEC { +public: + FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled); + ~FixedSizeCodeBlock(); + +private: + MacroAssembler* _masm; + address _start; + int _size_in_instrs; + bool _enabled; +}; + + +#endif // CPU_ARM_VM_MACROASSEMBLER_ARM_HPP + --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/macroAssembler_arm.inline.hpp 2016-12-02 11:22:01.110174630 -0500 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2008, 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_MACROASSEMBLER_ARM_INLINE_HPP +#define CPU_ARM_VM_MACROASSEMBLER_ARM_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" +#include "runtime/handles.inline.hpp" + +inline void MacroAssembler::pd_patch_instruction(address branch, address target) { + int instr = *(int*)branch; + int new_offset = (int)(target - branch NOT_AARCH64(- 8)); + assert((new_offset & 3) == 0, "bad alignment"); + +#ifdef AARCH64 + if ((instr & (0x1f << 26)) == (0b00101 << 26)) { + // Unconditional B or BL + assert (is_offset_in_range(new_offset, 26), "offset is too large"); + *(int*)branch = (instr & ~right_n_bits(26)) | encode_offset(new_offset, 26, 0); + } else if ((instr & (0xff << 24)) == (0b01010100 << 24) && (instr & (1 << 4)) == 0) { + // Conditional B + assert (is_offset_in_range(new_offset, 19), "offset is too large"); + *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_offset(new_offset, 19, 5); + } else if ((instr & (0b111111 << 25)) == (0b011010 << 25)) { + // Compare & branch CBZ/CBNZ + assert (is_offset_in_range(new_offset, 19), "offset is too large"); + *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_offset(new_offset, 19, 5); + } else if ((instr & (0b111111 << 25)) == (0b011011 << 25)) { + // Test & branch TBZ/TBNZ + assert (is_offset_in_range(new_offset, 14), "offset is too large"); + *(int*)branch = (instr & ~(right_n_bits(14) << 5)) | encode_offset(new_offset, 14, 5); + } else if ((instr & (0b111011 << 24)) == (0b011000 << 24)) { + // LDR (literal) + unsigned opc = ((unsigned)instr >> 30); + assert (opc != 0b01 || ((uintx)target & 7) == 0, "ldr target should be aligned"); + assert (is_offset_in_range(new_offset, 19), "offset is too large"); + *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_offset(new_offset, 19, 5); + } else if (((instr & (1 << 31)) == 0) && ((instr & (0b11111 << 24)) == (0b10000 << 24))) { + // ADR + assert (is_imm_in_range(new_offset, 21, 0), "offset is too large"); + instr = (instr & ~(right_n_bits(2) << 29)) | (new_offset & 3) << 29; + *(int*)branch = (instr & ~(right_n_bits(19) << 5)) | encode_imm(new_offset >> 2, 19, 0, 5); + } else if((unsigned int)instr == address_placeholder_instruction) { + // address + assert (*(unsigned int *)(branch + InstructionSize) == address_placeholder_instruction, "address placeholder occupies two instructions"); + *(intx*)branch = (intx)target; + } else { + ::tty->print_cr("=============== instruction: 0x%x ================\n", instr); + Unimplemented(); // TODO-AARCH64 + } +#else + if ((instr & 0x0e000000) == 0x0a000000) { + // B or BL instruction + assert(new_offset < 0x2000000 && new_offset > -0x2000000, "encoding constraint"); + *(int*)branch = (instr & 0xff000000) | ((unsigned int)new_offset << 6 >> 8); + } else if((unsigned int)instr == address_placeholder_instruction) { + // address + *(int*)branch = (int)target; + } else if ((instr & 0x0fff0000) == 0x028f0000 || ((instr & 0x0fff0000) == 0x024f0000)) { + // ADR + int encoding = 0x8 << 20; // ADD + if (new_offset < 0) { + encoding = 0x4 << 20; // SUB + new_offset = -new_offset; + } + AsmOperand o(new_offset); + *(int*)branch = (instr & 0xff0ff000) | encoding | o.encoding(); + } else { + // LDR Rd, [PC, offset] instruction + assert((instr & 0x0f7f0000) == 0x051f0000, "Must be ldr_literal"); + assert(new_offset < 4096 && new_offset > -4096, "encoding constraint"); + if (new_offset >= 0) { + *(int*)branch = (instr & 0xff0ff000) | 9 << 20 | new_offset; + } else { + *(int*)branch = (instr & 0xff0ff000) | 1 << 20 | -new_offset; + } + } +#endif // AARCH64 +} + +#endif // CPU_ARM_VM_MACROASSEMBLER_ARM_INLINE_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/metaspaceShared_arm.cpp 2016-12-02 11:22:06.350471801 -0500 @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "assembler_arm.inline.hpp" +#include "memory/metaspaceShared.hpp" + +// Generate the self-patching vtable method: +// +// This method will be called (as any other Klass virtual method) with +// the Klass itself as the first argument. Example: +// +// oop obj; +// int size = obj->klass()->oop_size(this); +// +// for which the virtual method call is Klass::oop_size(); +// +// The dummy method is called with the Klass object as the first +// operand, and an object as the second argument. +// + +//===================================================================== + +// All of the dummy methods in the vtable are essentially identical, +// differing only by an ordinal constant, and they bear no relationship +// to the original method which the caller intended. Also, there needs +// to be 'vtbl_list_size' instances of the vtable in order to +// differentiate between the 'vtable_list_size' original Klass objects. + +#define __ masm-> + +void MetaspaceShared::generate_vtable_methods(void** vtbl_list, + void** vtable, + char** md_top, + char* md_end, + char** mc_top, + char* mc_end) { + intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*); + *(intptr_t *)(*md_top) = vtable_bytes; + *md_top += sizeof(intptr_t); + void** dummy_vtable = (void**)*md_top; + *vtable = dummy_vtable; + *md_top += vtable_bytes; + + CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top); + MacroAssembler* masm = new MacroAssembler(&cb); + + for (int i = 0; i < vtbl_list_size; ++i) { + Label common_code; + for (int j = 0; j < num_virtuals; ++j) { + dummy_vtable[num_virtuals * i + j] = (void*) __ pc(); + __ mov(Rtemp, j); // Rtemp contains an index of a virtual method in the table + __ b(common_code); + } + + InlinedAddress vtable_address((address)&vtbl_list[i]); + __ bind(common_code); + const Register tmp2 = AARCH64_ONLY(Rtemp2) NOT_AARCH64(R4); + assert_different_registers(Rtemp, tmp2); +#ifndef AARCH64 + __ push(tmp2); +#endif // !AARCH64 + // Do not use ldr_global since the code must be portable across all ARM architectures + __ ldr_literal(tmp2, vtable_address); + __ ldr(tmp2, Address(tmp2)); // get correct vtable address + __ ldr(Rtemp, Address::indexed_ptr(tmp2, Rtemp)); // get real method pointer + __ str(tmp2, Address(R0)); // update vtable. R0 = "this" +#ifndef AARCH64 + __ pop(tmp2); +#endif // !AARCH64 + __ jump(Rtemp); + __ bind_literal(vtable_address); + } + + __ flush(); + *mc_top = (char*) __ pc(); +} --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/methodHandles_arm.cpp 2016-12-02 11:22:11.362756039 -0500 @@ -0,0 +1,587 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// This file mirror as much as possible methodHandles_x86.cpp to ease +// cross platform development for JSR292. +// Last synchronization: changeset f8c9417e3571 + +#include "precompiled.hpp" +#include "classfile/javaClasses.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" + +#define __ _masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp1, Register temp2) { + if (VerifyMethodHandles) { + verify_klass(_masm, klass_reg, temp1, temp2, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), + "MH argument is a Class"); + } + __ ldr(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); +} + +#ifdef ASSERT +static int check_nonzero(const char* xname, int x) { + assert(x != 0, "%s should be nonzero", xname); + return x; +} +#define NONZERO(x) check_nonzero(#x, x) +#else //ASSERT +#define NONZERO(x) (x) +#endif //ASSERT + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj, Register temp1, Register temp2, SystemDictionary::WKID klass_id, + const char* error_message) { + InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); + KlassHandle klass = SystemDictionary::well_known_klass(klass_id); + Label L_ok, L_bad; + BLOCK_COMMENT("verify_klass {"); + __ verify_oop(obj); + __ cbz(obj, L_bad); + __ load_klass(temp1, obj); + __ lea(temp2, ExternalAddress((address) klass_addr)); + __ ldr(temp2, temp2); // the cmpptr on x86 dereferences the AddressLiteral (not lea) + __ cmp(temp1, temp2); + __ b(L_ok, eq); + intptr_t super_check_offset = klass->super_check_offset(); + __ ldr(temp1, Address(temp1, super_check_offset)); + __ cmp(temp1, temp2); + __ b(L_ok, eq); + + __ bind(L_bad); + __ stop(error_message); + __ BIND(L_ok); + BLOCK_COMMENT("} verify_klass"); +} + +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { + Label L; + BLOCK_COMMENT("verify_ref_kind {"); + __ ldr_u32(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); + __ logical_shift_right(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); + __ andr(temp, temp, (unsigned)java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); + __ cmp(temp, ref_kind); + __ b(L, eq); + { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); + jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); + if (ref_kind == JVM_REF_invokeVirtual || + ref_kind == JVM_REF_invokeSpecial) + // could do this for all ref_kinds, but would explode assembly code size + trace_method_handle(_masm, buf); + __ stop(buf); + } + BLOCK_COMMENT("} verify_ref_kind"); + __ bind(L); +} + +#endif //ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, bool for_compiler_entry) { + Label L_no_such_method; + __ cbz(Rmethod, L_no_such_method); + + // Note: JVMTI overhead seems small enough compared to invocation + // cost and is not worth the complexity or code size overhead of + // supporting several variants of each adapter. + if (!for_compiler_entry && (JvmtiExport::can_post_interpreter_events())) { + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + __ ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); +#ifdef AARCH64 + Label L; + __ cbz(Rtemp, L); + __ indirect_jump(Address(Rmethod, Method::interpreter_entry_offset()), Rtemp); + __ bind(L); +#else + __ cmp(Rtemp, 0); + __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()), ne); +#endif // AARCH64 + } + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + Method::from_interpreted_offset(); + + __ indirect_jump(Address(Rmethod, entry_offset), Rtemp); + + __ bind(L_no_such_method); + // throw exception + __ jump(StubRoutines::throw_AbstractMethodError_entry(), relocInfo::runtime_call_type, Rtemp); +} + +void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register tmp, + bool for_compiler_entry) { + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. + assert_different_registers(recv, tmp, Rmethod); + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ load_heap_oop(tmp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes()))); + __ verify_oop(tmp); + + __ load_heap_oop(tmp, Address(tmp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes()))); + __ verify_oop(tmp); + + // the following assumes that a Method* is normally compressed in the vmtarget field: + __ ldr(Rmethod, Address(tmp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()))); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack + __ ldr(tmp, Address(Rmethod, Method::const_offset())); + __ load_sized_value(tmp, + Address(tmp, ConstMethod::size_of_parameters_offset()), + sizeof(u2), /*is_signed*/ false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + Label L; + __ ldr(tmp, __ receiver_argument_address(Rparams, tmp, tmp)); + __ cmp(tmp, recv); + __ b(L, eq); + __ stop("receiver not on stack"); + __ bind(L); + } + + jump_from_method_handle(_masm, for_compiler_entry); + BLOCK_COMMENT("} jump_to_lambda_form"); +} + + +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || + iid == vmIntrinsics::_compiledLambdaForm) { + // Perhaps surprisingly, the user-visible names, and linkToCallSite, are not directly used. + // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. + // They all require an extra argument. + __ should_not_reach_here(); // empty stubs make SG sick + return NULL; + } + + // Rmethod: Method* + // Rparams (SP on 32-bit ARM): pointer to parameters + // Rsender_sp (R4/R19): sender SP (must preserve; see prepare_to_jump_from_interpreted) + // R5_mh: receiver method handle (must load from sp[MethodTypeForm.vmslots]) + // R1, R2, Rtemp: garbage temp, blown away + + // Use same name as x86 to ease future merges + Register rdx_temp = R2_tmp; + Register rdx_param_size = rdx_temp; // size of parameters + Register rax_temp = R1_tmp; + Register rcx_mh = R5_mh; // MH receiver; dies quickly and is recycled + Register rbx_method = Rmethod; // eventual target of this invocation + Register rdi_temp = Rtemp; + + // here's where control starts out: + __ align(CodeEntryAlignment); + address entry_point = __ pc(); + + if (VerifyMethodHandles) { + Label L; + BLOCK_COMMENT("verify_intrinsic_id {"); + __ ldrh(rdi_temp, Address(rbx_method, Method::intrinsic_id_offset_in_bytes())); + __ sub_slow(rdi_temp, rdi_temp, (int) iid); + __ cbz(rdi_temp, L); + if (iid == vmIntrinsics::_linkToVirtual || + iid == vmIntrinsics::_linkToSpecial) { + // could do this for all kinds, but would explode assembly code size + trace_method_handle(_masm, "bad Method*::intrinsic_id"); + } + __ stop("bad Method*::intrinsic_id"); + __ bind(L); + BLOCK_COMMENT("} verify_intrinsic_id"); + } + + // First task: Find out how big the argument list is. + Address rdx_first_arg_addr; + int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); + assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); + if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + __ ldr(rdx_param_size, Address(rbx_method, Method::const_offset())); + __ load_sized_value(rdx_param_size, + Address(rdx_param_size, ConstMethod::size_of_parameters_offset()), + sizeof(u2), /*is_signed*/ false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + rdx_first_arg_addr = __ receiver_argument_address(Rparams, rdx_param_size, rdi_temp); + } else { + DEBUG_ONLY(rdx_param_size = noreg); + } + + if (!is_signature_polymorphic_static(iid)) { + __ ldr(rcx_mh, rdx_first_arg_addr); + DEBUG_ONLY(rdx_param_size = noreg); + } + + // rdx_first_arg_addr is live! + + trace_method_handle_interpreter_entry(_masm, iid); + + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, rcx_mh, noreg, not_for_compiler_entry); + + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register rcx_recv = noreg; + if (MethodHandles::ref_kind_has_receiver(ref_kind)) { + // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. + __ ldr(rcx_recv = rcx_mh, rdx_first_arg_addr); + DEBUG_ONLY(rdx_param_size = noreg); + } + Register rbx_member = rbx_method; // MemberName ptr; incoming method ptr is dead now +#ifdef AARCH64 + __ ldr(rbx_member, Address(Rparams, Interpreter::stackElementSize, post_indexed)); +#else + __ pop(rbx_member); +#endif + generate_method_handle_dispatch(_masm, iid, rcx_recv, rbx_member, not_for_compiler_entry); + } + return entry_point; +} + +void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + vmIntrinsics::ID iid, + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { + assert(is_signature_polymorphic(iid), "expected invoke iid"); + // Use same name as x86 to ease future merges + Register rbx_method = Rmethod; // eventual target of this invocation + // temps used in this code are not used in *either* compiled or interpreted calling sequences + Register temp1 = (for_compiler_entry ? saved_last_sp_register() : R1_tmp); + Register temp2 = AARCH64_ONLY(R9) NOT_AARCH64(R8); + Register temp3 = Rtemp; // R12/R16 + Register temp4 = AARCH64_ONLY(Rtemp2) NOT_AARCH64(R5); + if (for_compiler_entry) { + assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); +#ifdef AARCH64 + assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); + assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); + assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); + assert_different_registers(temp4, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); +#else + assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3); + assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3); + assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3); + assert_different_registers(temp4, j_rarg0, j_rarg1, j_rarg2, j_rarg3); +#endif // AARCH64 + } + assert_different_registers(temp1, temp2, temp3, receiver_reg); + assert_different_registers(temp1, temp2, temp3, temp4, member_reg); + if (!for_compiler_entry) + assert_different_registers(temp1, temp2, temp3, temp4, saved_last_sp_register()); // don't trash lastSP + + if (iid == vmIntrinsics::_invokeBasic) { + // indirect through MH.form.exactInvoker.vmtarget + jump_to_lambda_form(_masm, receiver_reg, temp3, for_compiler_entry); + + } else { + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) + verify_klass(_masm, member_reg, temp2, temp3, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), + "MemberName required for invokeVirtual etc."); + } + + Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); + Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); + Address member_vmtarget(member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { + if (iid == vmIntrinsics::_linkToSpecial) { + // Don't actually load the klass; just null-check the receiver. + __ null_check(receiver_reg, temp3); + } else { + // load receiver klass itself + __ null_check(receiver_reg, temp3, oopDesc::klass_offset_in_bytes()); + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + BLOCK_COMMENT("check_receiver {"); + // The receiver for the MemberName must be in receiver_reg. + // Check the receiver against the MemberName.clazz + if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { + // Did not load it above... + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + // Check the receiver against the MemberName.clazz + if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { + Label L_ok; + Register temp2_defc = temp2; + __ load_heap_oop(temp2_defc, member_clazz); + load_klass_from_Class(_masm, temp2_defc, temp3, temp4); + __ verify_klass_ptr(temp2_defc); +#ifdef AARCH64 + // TODO-AARCH64 + __ b(L_ok); +#else + __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, noreg, L_ok); +#endif + // If we get here, the type check failed! + __ stop("receiver class disagrees with MemberName.clazz"); + __ bind(L_ok); + } + BLOCK_COMMENT("} check_receiver"); + } + if (iid == vmIntrinsics::_linkToSpecial || + iid == vmIntrinsics::_linkToStatic) { + DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass + } + + // Live registers at this point: + // member_reg - MemberName that was the extra argument + // temp1_recv_klass - klass of stacked receiver, if needed + + Label L_incompatible_class_change_error; + switch (iid) { + case vmIntrinsics::_linkToSpecial: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); + } + __ ldr(Rmethod, member_vmtarget); + break; + + case vmIntrinsics::_linkToStatic: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); + } + __ ldr(Rmethod, member_vmtarget); + break; + + case vmIntrinsics::_linkToVirtual: + { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); + } + + // pick out the vtable index from the MemberName, and then we can discard it: + Register temp2_index = temp2; + __ ldr(temp2_index, member_vmindex); + + if (VerifyMethodHandles) { + Label L_index_ok; + __ cmp(temp2_index, 0); + __ b(L_index_ok, ge); + __ stop("no virtual index"); + __ bind(L_index_ok); + } + + // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget + // at this point. And VerifyMethodHandles has already checked clazz, if needed. + + // get target Method* & entry point + __ lookup_virtual_method(temp1_recv_klass, temp2_index, Rmethod); + break; + } + + case vmIntrinsics::_linkToInterface: + { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); + } + + Register temp3_intf = temp3; + __ load_heap_oop(temp3_intf, member_clazz); + load_klass_from_Class(_masm, temp3_intf, temp2, temp4); + __ verify_klass_ptr(temp3_intf); + + Register rbx_index = rbx_method; + __ ldr(rbx_index, member_vmindex); + if (VerifyMethodHandles) { + Label L; + __ cmp(rbx_index, 0); + __ b(L, ge); + __ stop("invalid vtable index for MH.invokeInterface"); + __ bind(L); + } + + // given intf, index, and recv klass, dispatch to the implementation method + Label L_no_such_interface; + __ lookup_interface_method(temp1_recv_klass, temp3_intf, + // note: next two args must be the same: + rbx_index, rbx_method, + temp2, temp4, + L_incompatible_class_change_error); + break; + } + + default: + fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); + break; + } + + // Live at this point: + // Rmethod (target method) + // Rsender_sp, Rparams (if interpreted) + // register arguments (if compiled) + + // After figuring out which concrete method to call, jump into it. + __ verify_method_ptr(Rmethod); + jump_from_method_handle(_masm, for_compiler_entry); + + if (iid == vmIntrinsics::_linkToInterface) { + __ bind(L_incompatible_class_change_error); + __ jump(StubRoutines::throw_IncompatibleClassChangeError_entry(), relocInfo::runtime_call_type, Rtemp); + } + } +} + + +#ifndef PRODUCT +enum { + ARG_LIMIT = 255, SLOP = 4, + // use this parameter for checking for garbage stack movements: + UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP) + // the slop defends against false alarms due to fencepost errors +}; + +#ifdef AARCH64 +const int trace_mh_nregs = 32; // R0-R30, PC +#else +const int trace_mh_nregs = 15; +const Register trace_mh_regs[trace_mh_nregs] = + {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; +#endif // AARCH64 + +void trace_method_handle_stub(const char* adaptername, + intptr_t* saved_regs, + intptr_t* saved_bp, + oop mh) { + // called as a leaf from native code: do not block the JVM! + bool has_mh = (strstr(adaptername, "/static") == NULL && + strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH + intptr_t* entry_sp = (intptr_t*) &saved_regs[trace_mh_nregs]; // just after the saved regs + intptr_t* saved_sp = (intptr_t*) saved_regs[Rsender_sp->encoding()]; // save of Rsender_sp + intptr_t* last_sp = (intptr_t*) saved_bp[AARCH64_ONLY(frame::interpreter_frame_stack_top_offset) NOT_AARCH64(frame::interpreter_frame_last_sp_offset)]; + intptr_t* base_sp = last_sp; + + intptr_t mh_reg = (intptr_t)saved_regs[R5_mh->encoding()]; + const char* mh_reg_name = "R5_mh"; + if (!has_mh) mh_reg_name = "R5"; + tty->print_cr("MH %s %s=" PTR_FORMAT " sp=(" PTR_FORMAT "+" INTX_FORMAT ") stack_size=" INTX_FORMAT " bp=" PTR_FORMAT, + adaptername, mh_reg_name, mh_reg, + (intptr_t)entry_sp, (intptr_t)saved_sp - (intptr_t)entry_sp, (intptr_t)(base_sp - last_sp), (intptr_t)saved_bp); + + if (last_sp != saved_sp && last_sp != NULL) + tty->print_cr("*** last_sp=" INTPTR_FORMAT, p2i(last_sp)); + if (Verbose) { + tty->print(" reg dump: "); + int i; + for (i = 0; i < trace_mh_nregs; i++) { + if (i > 0 && i % AARCH64_ONLY(2) NOT_AARCH64(4) == 0) + tty->print("\n + dump: "); +#ifdef AARCH64 + const char* reg_name = (i == trace_mh_nregs-1) ? "pc" : as_Register(i)->name(); +#else + const char* reg_name = trace_mh_regs[i]->name(); +#endif + tty->print(" %s: " INTPTR_FORMAT, reg_name, p2i((void *)saved_regs[i])); + } + tty->cr(); + } + + if (Verbose) { + // dump last frame (from JavaThread::print_frame_layout) + + // Note: code is robust but the dumped informationm may not be + // 100% correct, particularly with respect to the dumped + // "unextended_sp". Getting it right for all trace_method_handle + // call paths is not worth the complexity/risk. The correct slot + // will be identified by *Rsender_sp anyway in the dump. + JavaThread* p = JavaThread::active(); + + ResourceMark rm; + PRESERVE_EXCEPTION_MARK; + FrameValues values; + + intptr_t* dump_fp = (intptr_t *) saved_bp; + address dump_pc = (address) saved_regs[trace_mh_nregs-2]; // LR (with LR,PC last in saved_regs) + frame dump_frame((intptr_t *)entry_sp, dump_fp, dump_pc); + + dump_frame.describe(values, 1); + // mark Rsender_sp if seems valid + if (has_mh) { + if ((saved_sp >= entry_sp - UNREASONABLE_STACK_MOVE) && (saved_sp < dump_fp)) { + values.describe(-1, saved_sp, "*Rsender_sp"); + } + } + + // Note: the unextended_sp may not be correct + tty->print_cr(" stack layout:"); + values.print(p); + } + if (Verbose) { + if (has_mh && mh->is_oop()) { + mh->print(); + if (java_lang_invoke_MethodHandle::is_instance(mh)) { + if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) + java_lang_invoke_MethodHandle::form(mh)->print(); + } + } + } +} + +void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { + if (!TraceMethodHandles) return; + BLOCK_COMMENT("trace_method_handle {"); + // register saving + // must correspond to trace_mh_nregs and trace_mh_regs defined above + int push_size = __ save_all_registers(); + assert(trace_mh_nregs*wordSize == push_size,"saved register count mismatch"); + + __ mov_slow(R0, adaptername); + __ mov(R1, SP); // entry_sp (after pushes) + __ mov(R2, FP); + if (R5_mh != R3) { + assert_different_registers(R0, R1, R2, R5_mh); + __ mov(R3, R5_mh); + } + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), R0, R1, R2, R3); + + __ restore_all_registers(); + BLOCK_COMMENT("} trace_method_handle"); +} +#endif //PRODUCT --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/methodHandles_arm.hpp 2016-12-02 11:22:16.723060015 -0500 @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Platform-specific definitions for method handles. +// These definitions are inlined into class MethodHandles. + +// Adapters +enum /* platform_dependent_constants */ { + adapter_code_size = 18000 NOT_PRODUCT(+ 30000) +}; + +// Additional helper methods for MethodHandles code generation: +public: + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp1, Register temp2); + + static void verify_klass(MacroAssembler* _masm, + Register obj, Register temp1, Register temp2, SystemDictionary::WKID klass_id, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + + // Similar to InterpreterMacroAssembler::jump_from_interpreted. + // Takes care of special dispatch from single stepping too. + // Rmethod should contain target methodOop. + static void jump_from_method_handle(MacroAssembler* _masm, bool for_compiler_entry); + + static void jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register tmp, + bool for_compiler_entry); + + static Register saved_last_sp_register() { + // Should be in sharedRuntime, not here. + return Rsender_sp; + } --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/nativeInst_arm.hpp 2016-12-02 11:22:22.051362178 -0500 @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_NATIVEINST_ARM_HPP +#define CPU_ARM_VM_NATIVEINST_ARM_HPP + +#include "asm/macroAssembler.hpp" +#include "memory/allocation.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" + + +#ifdef AARCH64 +#include "nativeInst_arm_64.hpp" +#else +#include "nativeInst_arm_32.hpp" +#endif + + +#endif // CPU_ARM_VM_NATIVEINST_ARM_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/nativeInst_arm_32.cpp 2016-12-02 11:22:27.323661161 -0500 @@ -0,0 +1,339 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "assembler_arm.inline.hpp" +#include "code/codeCache.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_arm.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif +#include "code/icBuffer.hpp" + +int NativeMovRegMem::offset() const { + switch (kind()) { + case instr_ldr_str: + return encoding() & 0xfff; + case instr_ldrh_strh: + return (encoding() & 0x0f) | ((encoding() >> 4) & 0xf0); + case instr_fld_fst: + return (encoding() & 0xff) << 2; + default: + ShouldNotReachHere(); + return 0; + } +} + +void NativeMovRegMem::set_offset(int x) { + assert(x >= 0 && x < 65536, "encoding constraint"); + const int Rt = Rtemp->encoding(); + + // If offset is too large to be placed into single ldr/str instruction, we replace + // ldr Rd, [Rn, #offset] + // nop + // with + // add Rtemp, Rn, #offset_hi + // ldr Rd, [Rtemp, #offset_lo] + switch (kind()) { + case instr_ldr_str: + if (x < 4096) { + set_encoding((encoding() & 0xfffff000) | x); + } else { + NativeInstruction* next = nativeInstruction_at(next_raw_instruction_address()); + assert(next->is_nop(), "must be"); + next->set_encoding((encoding() & 0xfff0f000) | Rt << 16 | (x & 0xfff)); + this->set_encoding((encoding() & 0x000f0000) | Rt << 12 | x >> 12 | 0xe2800a00); + } + break; + case instr_ldrh_strh: + if (x < 256) { + set_encoding((encoding() & 0xfffff0f0) | (x & 0x0f) | (x & 0xf0) << 4); + } else { + NativeInstruction* next = nativeInstruction_at(next_raw_instruction_address()); + assert(next->is_nop(), "must be"); + next->set_encoding((encoding() & 0xfff0f0f0) | Rt << 16 | (x & 0x0f) | (x & 0xf0) << 4); + this->set_encoding((encoding() & 0x000f0000) | Rt << 12 | x >> 8 | 0xe2800c00); + } + break; + case instr_fld_fst: + if (x < 1024) { + set_encoding((encoding() & 0xffffff00) | (x >> 2)); + } else { + NativeInstruction* next = nativeInstruction_at(next_raw_instruction_address()); + assert(next->is_nop(), "must be"); + next->set_encoding((encoding() & 0xfff0ff00) | Rt << 16 | ((x >> 2) & 0xff)); + this->set_encoding((encoding() & 0x000f0000) | Rt << 12 | x >> 10 | 0xe2800b00); + } + break; + default: + ShouldNotReachHere(); + } +} + +intptr_t NativeMovConstReg::data() const { + RawNativeInstruction* next = next_raw(); + if (is_movw()) { + // Oop embedded in movw/movt instructions + assert(VM_Version::supports_movw(), "must be"); + return (this->encoding() & 0x00000fff) | (this->encoding() & 0x000f0000) >> 4 | + (next->encoding() & 0x00000fff) << 16 | (next->encoding() & 0x000f0000) << 12; + } else { + // Oop is loaded from oops section or inlined in the code + int oop_offset; + if (is_ldr_literal()) { + // ldr Rd, [PC, #offset] + oop_offset = ldr_offset(); + } else { + assert(next->is_ldr(), "must be"); + oop_offset = (this->encoding() & 0xff) << 12 | (next->encoding() & 0xfff); + if (is_add_pc()) { + // add Rd, PC, #offset_hi + // ldr Rd, [Rd, #offset_lo] + assert(next->encoding() & (1 << 23), "sign mismatch"); + // offset OK (both positive) + } else { + assert(is_sub_pc(), "must be"); + // sub Rd, PC, #offset_hi + // ldr Rd, [Rd, -#offset_lo] + assert(!(next->encoding() & (1 << 23)), "sign mismatch"); + // negative offsets + oop_offset = -oop_offset; + } + } + return *(int*)(instruction_address() + 8 + oop_offset); + } +} + +void NativeMovConstReg::set_data(intptr_t x, address pc) { + // Find and replace the oop corresponding to this instruction in oops section + RawNativeInstruction* next = next_raw(); + oop* oop_addr = NULL; + Metadata** metadata_addr = NULL; + CodeBlob* cb = CodeCache::find_blob(instruction_address()); + if (cb != NULL) { + nmethod* nm = cb->as_nmethod_or_null(); + if (nm != NULL) { + RelocIterator iter(nm, instruction_address(), next->instruction_address()); + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop_addr = iter.oop_reloc()->oop_addr(); + *oop_addr = cast_to_oop(x); + break; + } else if (iter.type() == relocInfo::metadata_type) { + metadata_addr = iter.metadata_reloc()->metadata_addr(); + *metadata_addr = (Metadata*)x; + break; + } + } + } + } + + if (is_movw()) { + // data embedded in movw/movt instructions + assert(VM_Version::supports_movw(), "must be"); + unsigned int lo = (unsigned int)x; + unsigned int hi = (unsigned int)(x >> 16); + this->set_encoding((this->encoding() & 0xfff0f000) | (lo & 0xf000) << 4 | (lo & 0xfff)); + next->set_encoding((next->encoding() & 0xfff0f000) | (hi & 0xf000) << 4 | (hi & 0xfff)); + } else if (oop_addr == NULL & metadata_addr == NULL) { + // A static ldr_literal (without oop or metadata relocation) + assert(is_ldr_literal(), "must be"); + int offset = ldr_offset(); + oop_addr = (oop*)(instruction_address() + 8 + offset); + *oop_addr = cast_to_oop(x); + } else { + // data is loaded from oop or metadata section + int offset; + + address addr = oop_addr != NULL ? (address)oop_addr : (address)metadata_addr; + + if(pc == 0) { + offset = addr - instruction_address() - 8; + } else { + offset = addr - pc - 8; + } + + int sign = (offset >= 0) ? (1 << 23) : 0; + int delta = (offset >= 0) ? offset : (-offset); + assert(delta < 0x100000, "within accessible range"); + if (is_ldr_literal()) { + // fix the ldr with the real offset to the oop/metadata table + assert(next->is_nop(), "must be"); + if (delta < 4096) { + // ldr Rd, [PC, #offset] + set_encoding((encoding() & 0xff7ff000) | delta | sign); + assert(ldr_offset() == offset, "check encoding"); + } else { + int cc = encoding() & 0xf0000000; + int Rd = (encoding() >> 12) & 0xf; + int Rt = Rd; + assert(Rt != 0xf, "Illegal destination register"); // or fix by using Rtemp + // move the ldr, fixing delta_lo and the source register + next->set_encoding((encoding() & 0xff70f000) | (Rt << 16) | (delta & 0xfff) | sign); + assert(next->is_ldr(), "must be"); + if (offset > 0) { + // add Rt, PC, #delta_hi + // ldr Rd, [Rt, #delta_lo] + this->set_encoding((Rt << 12) | (delta >> 12) | 0x028f0a00 | cc); + assert(is_add_pc(), "must be"); + } else { + // sub Rt, PC, #delta_hi + // ldr Rd, [Rt, -#delta_lo] + this->set_encoding((Rt << 12) | (delta >> 12) | 0x024f0a00 | cc); + assert(is_sub_pc(), "must be"); + } + } + } else { + assert(is_pc_rel(), "must be"); + assert(next->is_ldr(), "must be"); + if (offset > 0) { + // add Rt, PC, #delta_hi + this->set_encoding((this->encoding() & 0xf00ff000) | 0x02800a00 | (delta >> 12)); + assert(is_add_pc(), "must be"); + } else { + // sub Rt, PC, #delta_hi + this->set_encoding((this->encoding() & 0xf00ff000) | 0x02400a00 | (delta >> 12)); + assert(is_sub_pc(), "must be"); + } + // ldr Rd, Rt, #delta_lo (or -#delta_lo) + next->set_encoding((next->encoding() & 0xff7ff000) | (delta & 0xfff) | sign); + } + } +} + +void NativeMovConstReg::set_pc_relative_offset(address addr, address pc) { + int offset; + if (pc == 0) { + offset = addr - instruction_address() - 8; + } else { + offset = addr - pc - 8; + } + + RawNativeInstruction* next = next_raw(); + + int sign = (offset >= 0) ? (1 << 23) : 0; + int delta = (offset >= 0) ? offset : (-offset); + assert(delta < 0x100000, "within accessible range"); + if (is_ldr_literal()) { + if (delta < 4096) { + // ldr Rd, [PC, #offset] + set_encoding((encoding() & 0xff7ff000) | delta | sign); + assert(ldr_offset() == offset, "check encoding"); + } else { + assert(next->is_nop(), "must be"); + int cc = encoding() & 0xf0000000; + int Rd = (encoding() >> 12) & 0xf; + int Rt = Rd; + assert(Rt != 0xf, "Illegal destination register"); // or fix by using Rtemp + // move the ldr, fixing delta_lo and the source register + next->set_encoding((encoding() & 0xff70f000) | (Rt << 16) | (delta & 0xfff) | sign); + assert(next->is_ldr(), "must be"); + if (offset > 0) { + // add Rt, PC, #delta_hi + // ldr Rd, [Rt, #delta_lo] + this->set_encoding((Rt << 12) | (delta >> 12) | 0x028f0a00 | cc); + assert(is_add_pc(), "must be"); + } else { + // sub Rt, PC, #delta_hi + // ldr Rd, [Rt, -#delta_lo] + this->set_encoding((Rt << 12) | (delta >> 12) | 0x024f0a00 | cc); + assert(is_sub_pc(), "must be"); + } + } + } else { + assert(is_pc_rel(), "must be"); + assert(next->is_ldr(), "must be"); + if (offset > 0) { + // add Rt, PC, #delta_hi + this->set_encoding((this->encoding() & 0xf00ff000) | 0x02800a00 | (delta >> 12)); + assert(is_add_pc(), "must be"); + } else { + // sub Rt, PC, #delta_hi + this->set_encoding((this->encoding() & 0xf00ff000) | 0x02400a00 | (delta >> 12)); + assert(is_sub_pc(), "must be"); + } + // ldr Rd, Rt, #delta_lo (or -#delta_lo) + next->set_encoding((next->encoding() & 0xff7ff000) | (delta & 0xfff) | sign); + } +} + +void RawNativeJump::check_verified_entry_alignment(address entry, address verified_entry) { +} + +void RawNativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "should be"); + int *a = (int *)verified_entry; + a[0] = zombie_illegal_instruction; // always illegal + ICache::invalidate_range((address)&a[0], sizeof a[0]); +} + +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + int offset = (int)(entry - code_pos - 8); + assert(offset < 0x2000000 && offset > -0x2000000, "encoding constraint"); + nativeInstruction_at(code_pos)->set_encoding(0xea000000 | ((unsigned int)offset << 6 >> 8)); +} + +static address raw_call_for(address return_address) { + CodeBlob* cb = CodeCache::find_blob(return_address); + nmethod* nm = cb->as_nmethod_or_null(); + if (nm == NULL) { + ShouldNotReachHere(); + return NULL; + } + // Look back 4 instructions, to allow for ic_call + address begin = MAX2(return_address - 4*NativeInstruction::instruction_size, nm->code_begin()); + RelocIterator iter(nm, begin, return_address); + while (iter.next()) { + Relocation* reloc = iter.reloc(); + if (reloc->is_call()) { + address call = reloc->addr(); + if (nativeInstruction_at(call)->is_call()) { + if (nativeCall_at(call)->return_address() == return_address) { + return call; + } + } else { + // Some "calls" are really jumps + assert(nativeInstruction_at(call)->is_jump(), "must be call or jump"); + } + } + } + return NULL; +} + +bool RawNativeCall::is_call_before(address return_address) { + return (raw_call_for(return_address) != NULL); +} + +NativeCall* rawNativeCall_before(address return_address) { + address call = raw_call_for(return_address); + assert(call != NULL, "must be"); + return nativeCall_at(call); +} + --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/nativeInst_arm_32.hpp 2016-12-02 11:22:32.923978750 -0500 @@ -0,0 +1,432 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_NATIVEINST_ARM_32_HPP +#define CPU_ARM_VM_NATIVEINST_ARM_32_HPP + +#include "asm/macroAssembler.hpp" +#include "code/codeCache.hpp" +#include "memory/allocation.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" +#include "runtime/thread.hpp" +#include "register_arm.hpp" + +// ------------------------------------------------------------------- + +// Some experimental projects extend the ARM back-end by implementing +// what the front-end usually assumes is a single native instruction +// with a sequence of instructions. +// +// The 'Raw' variants are the low level initial code (usually one +// instruction wide but some of them were already composed +// instructions). They should be used only by the back-end. +// +// The non-raw classes are the front-end entry point, hiding potential +// back-end extensions or the actual instructions size. +class NativeInstruction; + +class RawNativeInstruction VALUE_OBJ_CLASS_SPEC { + public: + + enum ARM_specific { + instruction_size = Assembler::InstructionSize + }; + + enum InstructionKind { + instr_ldr_str = 0x50, + instr_ldrh_strh = 0x10, + instr_fld_fst = 0xd0 + }; + + // illegal instruction used by NativeJump::patch_verified_entry + // permanently undefined (UDF): 0xe << 28 | 0b1111111 << 20 | 0b1111 << 4 + static const int zombie_illegal_instruction = 0xe7f000f0; + + static int decode_rotated_imm12(int encoding) { + int base = encoding & 0xff; + int right_rotation = (encoding & 0xf00) >> 7; + int left_rotation = 32 - right_rotation; + int val = (base >> right_rotation) | (base << left_rotation); + return val; + } + + address addr_at(int offset) const { return (address)this + offset; } + address instruction_address() const { return addr_at(0); } + address next_raw_instruction_address() const { return addr_at(instruction_size); } + + static RawNativeInstruction* at(address address) { + return (RawNativeInstruction*)address; + } + RawNativeInstruction* next_raw() const { + return at(next_raw_instruction_address()); + } + + public: + int encoding() const { return *(int*)this; } + + void set_encoding(int value) { + int old = *(int*)this; + if (old != value) { + *(int*)this = value; + ICache::invalidate_word((address)this); + } + } + + InstructionKind kind() const { + return (InstructionKind) ((encoding() >> 20) & 0xf2); + } + + bool is_nop() const { return encoding() == (int)0xe1a00000; } + bool is_b() const { return (encoding() & 0x0f000000) == 0x0a000000; } + bool is_bx() const { return (encoding() & 0x0ffffff0) == 0x012fff10; } + bool is_bl() const { return (encoding() & 0x0f000000) == 0x0b000000; } + bool is_blx() const { return (encoding() & 0x0ffffff0) == 0x012fff30; } + bool is_fat_call() const { + return (is_add_lr() && next_raw()->is_jump()); + } + bool is_ldr_call() const { + return (is_add_lr() && next_raw()->is_ldr_pc()); + } + bool is_jump() const { return is_b() || is_ldr_pc(); } + bool is_call() const { return is_bl() || is_fat_call(); } + bool is_branch() const { return is_b() || is_bl(); } + bool is_far_branch() const { return is_movw() || is_ldr_literal(); } + bool is_ldr_literal() const { + // ldr Rx, [PC, #offset] for positive or negative offsets + return (encoding() & 0x0f7f0000) == 0x051f0000; + } + bool is_ldr() const { + // ldr Rd, [Rn, #offset] for positive or negative offsets + return (encoding() & 0x0f700000) == 0x05100000; + } + int ldr_offset() const { + assert(is_ldr(), "must be"); + int offset = encoding() & 0xfff; + if (encoding() & (1 << 23)) { + // positive offset + } else { + // negative offset + offset = -offset; + } + return offset; + } + // is_ldr_pc: ldr PC, PC, #offset + bool is_ldr_pc() const { return (encoding() & 0x0f7ff000) == 0x051ff000; } + // is_setting_pc(): ldr PC, Rxx, #offset + bool is_setting_pc() const { return (encoding() & 0x0f70f000) == 0x0510f000; } + bool is_add_lr() const { return (encoding() & 0x0ffff000) == 0x028fe000; } + bool is_add_pc() const { return (encoding() & 0x0fff0000) == 0x028f0000; } + bool is_sub_pc() const { return (encoding() & 0x0fff0000) == 0x024f0000; } + bool is_pc_rel() const { return is_add_pc() || is_sub_pc(); } + bool is_movw() const { return (encoding() & 0x0ff00000) == 0x03000000; } + bool is_movt() const { return (encoding() & 0x0ff00000) == 0x03400000; } + // c2 doesn't use fixed registers for safepoint poll address + bool is_safepoint_poll() const { return (encoding() & 0xfff0ffff) == 0xe590c000; } + // For unit tests + static void test() {} + +}; + +inline RawNativeInstruction* rawNativeInstruction_at(address address) { + return (RawNativeInstruction*)address; +} + +// Base class exported to the front-end +class NativeInstruction: public RawNativeInstruction { +public: + static NativeInstruction* at(address address) { + return (NativeInstruction*)address; + } + +public: + // No need to consider indirections while parsing NativeInstruction + address next_instruction_address() const { + return next_raw_instruction_address(); + } + + // next() is no longer defined to avoid confusion. + // + // The front end and most classes except for those defined in nativeInst_arm + // or relocInfo_arm should only use next_instruction_address(), skipping + // over composed instruction and ignoring back-end extensions. + // + // The back-end can use next_raw() when it knows the instruction sequence + // and only wants to skip a single native instruction. +}; + +inline NativeInstruction* nativeInstruction_at(address address) { + return (NativeInstruction*)address; +} + +// ------------------------------------------------------------------- +// Raw b() or bl() instructions, not used by the front-end. +class RawNativeBranch: public RawNativeInstruction { + public: + + address destination(int adj = 0) const { + return instruction_address() + (encoding() << 8 >> 6) + 8 + adj; + } + + void set_destination(address dest) { + int new_offset = (int)(dest - instruction_address() - 8); + assert(new_offset < 0x2000000 && new_offset > -0x2000000, "encoding constraint"); + set_encoding((encoding() & 0xff000000) | ((unsigned int)new_offset << 6 >> 8)); + } +}; + +inline RawNativeBranch* rawNativeBranch_at(address address) { + assert(rawNativeInstruction_at(address)->is_branch(), "must be"); + return (RawNativeBranch*)address; +} + +class NativeBranch: public RawNativeBranch { +}; + +inline NativeBranch* nativeBranch_at(address address) { + return (NativeBranch *) rawNativeBranch_at(address); +} + +// ------------------------------------------------------------------- +// NativeGeneralJump is for patchable internal (near) jumps +// It is used directly by the front-end and must be a single instruction wide +// (to support patching to other kind of instructions). +class NativeGeneralJump: public RawNativeInstruction { + public: + + address jump_destination() const { + return rawNativeBranch_at(instruction_address())->destination(); + } + + void set_jump_destination(address dest) { + return rawNativeBranch_at(instruction_address())->set_destination(dest); + } + + static void insert_unconditional(address code_pos, address entry); + + static void replace_mt_safe(address instr_addr, address code_buffer) { + assert(((int)instr_addr & 3) == 0 && ((int)code_buffer & 3) == 0, "must be aligned"); + // Writing a word is atomic on ARM, so no MT-safe tricks are needed + rawNativeInstruction_at(instr_addr)->set_encoding(*(int*)code_buffer); + } +}; + +inline NativeGeneralJump* nativeGeneralJump_at(address address) { + assert(rawNativeInstruction_at(address)->is_jump(), "must be"); + return (NativeGeneralJump*)address; +} + +// ------------------------------------------------------------------- +class RawNativeJump: public NativeInstruction { + public: + + address jump_destination(int adj = 0) const { + address a; + if (is_b()) { + a = rawNativeBranch_at(instruction_address())->destination(adj); + // Jump destination -1 is encoded as a jump to self + if (a == instruction_address()) { + return (address)-1; + } + } else { + assert(is_ldr_pc(), "must be"); + int offset = this->ldr_offset(); + a = *(address*)(instruction_address() + 8 + offset); + } + return a; + } + + void set_jump_destination(address dest) { + address a; + if (is_b()) { + // Jump destination -1 is encoded as a jump to self + if (dest == (address)-1) { + dest = instruction_address(); + } + rawNativeBranch_at(instruction_address())->set_destination(dest); + } else { + assert(is_ldr_pc(), "must be"); + int offset = this->ldr_offset(); + *(address*)(instruction_address() + 8 + offset) = dest; + OrderAccess::storeload(); // overkill if caller holds lock? + } + } + + static void check_verified_entry_alignment(address entry, address verified_entry); + + static void patch_verified_entry(address entry, address verified_entry, address dest); + +}; + +inline RawNativeJump* rawNativeJump_at(address address) { + assert(rawNativeInstruction_at(address)->is_jump(), "must be"); + return (RawNativeJump*)address; +} + +// ------------------------------------------------------------------- +class RawNativeCall: public NativeInstruction { + // See IC calls in LIR_Assembler::ic_call(): ARM v5/v6 doesn't use a + // single bl for IC calls. + + public: + + address return_address() const { + if (is_bl()) { + return addr_at(instruction_size); + } else { + assert(is_fat_call(), "must be"); + int offset = encoding() & 0xff; + return addr_at(offset + 8); + } + } + + address destination(int adj = 0) const { + if (is_bl()) { + return rawNativeBranch_at(instruction_address())->destination(adj); + } else { + assert(is_add_lr(), "must be"); // fat_call + RawNativeJump *next = rawNativeJump_at(next_raw_instruction_address()); + return next->jump_destination(adj); + } + } + + void set_destination(address dest) { + if (is_bl()) { + return rawNativeBranch_at(instruction_address())->set_destination(dest); + } else { + assert(is_add_lr(), "must be"); // fat_call + RawNativeJump *next = rawNativeJump_at(next_raw_instruction_address()); + return next->set_jump_destination(dest); + } + } + + void set_destination_mt_safe(address dest) { + assert(CodeCache::contains(dest), "external destination might be too far"); + set_destination(dest); + } + + void verify() { + assert(RawNativeInstruction::is_call() || (!VM_Version::supports_movw() && RawNativeInstruction::is_jump()), "must be"); + } + + void verify_alignment() { + // Nothing to do on ARM + } + + static bool is_call_before(address return_address); +}; + +inline RawNativeCall* rawNativeCall_at(address address) { + assert(rawNativeInstruction_at(address)->is_call(), "must be"); + return (RawNativeCall*)address; +} + +NativeCall* rawNativeCall_before(address return_address); + +// ------------------------------------------------------------------- +// NativeMovRegMem need not be extended with indirection support. +// (field access patching is handled differently in that case) +class NativeMovRegMem: public NativeInstruction { + public: + + int offset() const; + void set_offset(int x); + + void add_offset_in_bytes(int add_offset) { + set_offset(offset() + add_offset); + } + +}; + +inline NativeMovRegMem* nativeMovRegMem_at(address address) { + NativeMovRegMem* instr = (NativeMovRegMem*)address; + assert(instr->kind() == NativeInstruction::instr_ldr_str || + instr->kind() == NativeInstruction::instr_ldrh_strh || + instr->kind() == NativeInstruction::instr_fld_fst, "must be"); + return instr; +} + +// ------------------------------------------------------------------- +// NativeMovConstReg is primarily for loading oops and metadata +class NativeMovConstReg: public NativeInstruction { + public: + + intptr_t data() const; + void set_data(intptr_t x, address pc = 0); + bool is_pc_relative() { + return !is_movw(); + } + void set_pc_relative_offset(address addr, address pc); + address next_instruction_address() const { + // NOTE: CompiledStaticCall::set_to_interpreted() calls this but + // are restricted to single-instruction ldr. No need to jump over + // several instructions. + assert(is_ldr_literal(), "Should only use single-instructions load"); + return next_raw_instruction_address(); + } +}; + +inline NativeMovConstReg* nativeMovConstReg_at(address address) { + NativeInstruction* ni = nativeInstruction_at(address); + assert(ni->is_ldr_literal() || ni->is_pc_rel() || + ni->is_movw() && VM_Version::supports_movw(), "must be"); + return (NativeMovConstReg*)address; +} + +// ------------------------------------------------------------------- +// Front end classes, hiding experimental back-end extensions. + +// Extension to support indirections +class NativeJump: public RawNativeJump { + public: +}; + +inline NativeJump* nativeJump_at(address address) { + assert(nativeInstruction_at(address)->is_jump(), "must be"); + return (NativeJump*)address; +} + +class NativeCall: public RawNativeCall { +public: + // NativeCall::next_instruction_address() is used only to define the + // range where to look for the relocation information. We need not + // walk over composed instructions (as long as the relocation information + // is associated to the first instruction). + address next_instruction_address() const { + return next_raw_instruction_address(); + } + +}; + +inline NativeCall* nativeCall_at(address address) { + assert(nativeInstruction_at(address)->is_call() || + (!VM_Version::supports_movw() && nativeInstruction_at(address)->is_jump()), "must be"); + return (NativeCall*)address; +} + +inline NativeCall* nativeCall_before(address return_address) { + return (NativeCall *) rawNativeCall_before(return_address); +} + +#endif // CPU_ARM_VM_NATIVEINST_ARM_32_HPP --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/nativeInst_arm_64.cpp 2016-12-02 11:22:38.000266619 -0500 @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "assembler_arm.inline.hpp" +#include "code/codeCache.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_arm.hpp" +#include "oops/klass.inline.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif + +void RawNativeInstruction::verify() { + // make sure code pattern is actually an instruction address + address addr = instruction_address(); + if (addr == NULL || ((intptr_t)addr & (instruction_size - 1)) != 0) { + fatal("not an instruction address"); + } +} + +void NativeMovRegMem::set_offset(int x) { + int scale = get_offset_scale(); + assert((x & right_n_bits(scale)) == 0, "offset should be aligned"); + guarantee((x >> 24) == 0, "encoding constraint"); + + if (Assembler::is_unsigned_imm_in_range(x, 12, scale)) { + set_unsigned_imm(x, 12, get_offset_scale(), 10); + return; + } + + // If offset is too large to be placed into single ldr/str instruction, we replace + // ldr/str Rt, [Rn, #offset] + // nop + // with + // add LR, Rn, #offset_hi + // ldr/str Rt, [LR, #offset_lo] + + // Note: Rtemp cannot be used as a temporary register as it could be used + // for value being stored (see LIR_Assembler::reg2mem). + // Patchable NativeMovRegMem instructions are generated in LIR_Assembler::mem2reg and LIR_Assembler::reg2mem + // which do not use LR, so it is free. Also, it does not conflict with LR usages in c1_LIRGenerator_arm.cpp. + const int tmp = LR->encoding(); + const int rn = (encoding() >> 5) & 0x1f; + + NativeInstruction* next = nativeInstruction_at(next_raw_instruction_address()); + assert(next->is_nop(), "must be"); + + next->set_encoding((encoding() & 0xffc0001f) | Assembler::encode_unsigned_imm((x & 0xfff), 12, scale, 10) | tmp << 5); + this->set_encoding(0x91400000 | Assembler::encode_unsigned_imm((x >> 12), 12, 0, 10) | rn << 5 | tmp); +} + +intptr_t NativeMovConstReg::_data() const { +#ifdef COMPILER2 + if (is_movz()) { + // narrow constant or ic call cached value + RawNativeInstruction* ni = next_raw(); + assert(ni->is_movk(), "movz;movk expected"); + uint lo16 = (encoding() >> 5) & 0xffff; + intptr_t hi = 0; + int i = 0; + while (ni->is_movk() && i < 3) { + uint hi16 = (ni->encoding() >> 5) & 0xffff; + int shift = ((ni->encoding() >> 21) & 0x3) << 4; + hi |= (intptr_t)hi16 << shift; + ni = ni->next_raw(); + ++i; + } + return lo16 | hi; + } +#endif + return (intptr_t)(nativeLdrLiteral_at(instruction_address())->literal_value()); +} + +static void raw_set_data(RawNativeInstruction* si, intptr_t x, oop* oop_addr, Metadata** metadata_addr) { +#ifdef COMPILER2 + if (si->is_movz()) { + // narrow constant or ic call cached value + uintptr_t nx = 0; + int val_size = 32; + if (oop_addr != NULL) { + narrowOop encoded_oop = oopDesc::encode_heap_oop(*oop_addr); + nx = encoded_oop; + } else if (metadata_addr != NULL) { + assert((*metadata_addr)->is_klass(), "expected Klass"); + narrowKlass encoded_k = Klass::encode_klass((Klass *)*metadata_addr); + nx = encoded_k; + } else { + nx = x; + val_size = 64; + } + RawNativeInstruction* ni = si->next_raw(); + uint lo16 = nx & 0xffff; + int shift = 16; + int imm16 = 0xffff << 5; + si->set_encoding((si->encoding() & ~imm16) | (lo16 << 5)); + while (shift < val_size) { + assert(ni->is_movk(), "movk expected"); + assert((((ni->encoding() >> 21) & 0x3) << 4) == shift, "wrong shift"); + uint hi16 = (nx >> shift) & 0xffff; + ni->set_encoding((ni->encoding() & ~imm16) | (hi16 << 5)); + shift += 16; + ni = ni->next_raw(); + } + return; + } +#endif + + assert(si->is_ldr_literal(), "should be"); + + if (oop_addr == NULL && metadata_addr == NULL) { + // A static ldr_literal without oop_relocation + nativeLdrLiteral_at(si->instruction_address())->set_literal_value((address)x); + } else { + // Oop is loaded from oops section + address addr = oop_addr != NULL ? (address)oop_addr : (address)metadata_addr; + int offset = addr - si->instruction_address(); + + assert((((intptr_t)addr) & 0x7) == 0, "target address should be aligned"); + assert((offset & 0x3) == 0, "offset should be aligned"); + + guarantee(Assembler::is_offset_in_range(offset, 19), "offset is not in range"); + nativeLdrLiteral_at(si->instruction_address())->set_literal_address(si->instruction_address() + offset); + } +} + +void NativeMovConstReg::set_data(intptr_t x) { + // Find and replace the oop corresponding to this instruction in oops section + oop* oop_addr = NULL; + Metadata** metadata_addr = NULL; + CodeBlob* cb = CodeCache::find_blob(instruction_address()); + { + nmethod* nm = cb->as_nmethod_or_null(); + if (nm != NULL) { + RelocIterator iter(nm, instruction_address(), next_raw()->instruction_address()); + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop_addr = iter.oop_reloc()->oop_addr(); + *oop_addr = cast_to_oop(x); + break; + } else if (iter.type() == relocInfo::metadata_type) { + metadata_addr = iter.metadata_reloc()->metadata_addr(); + *metadata_addr = (Metadata*)x; + break; + } + } + } + } + raw_set_data(adjust(this), x, oop_addr, metadata_addr); +} + +void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { +} + +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "should be"); + + NativeInstruction* instr = nativeInstruction_at(verified_entry); + assert(instr->is_nop() || instr->encoding() == zombie_illegal_instruction, "required for MT-safe patching"); + instr->set_encoding(zombie_illegal_instruction); +} + +void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { + assert (nativeInstruction_at(instr_addr)->is_b(), "MT-safe patching of arbitrary instructions is not allowed"); + assert (nativeInstruction_at(code_buffer)->is_nop(), "MT-safe patching of arbitrary instructions is not allowed"); + nativeInstruction_at(instr_addr)->set_encoding(*(int*)code_buffer); +} + +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + // Insert at code_pos unconditional B instruction jumping to entry + intx offset = entry - code_pos; + assert (Assembler::is_offset_in_range(offset, 26), "offset is out of range"); + + NativeInstruction* instr = nativeInstruction_at(code_pos); + assert (instr->is_b() || instr->is_nop(), "MT-safe patching of arbitrary instructions is not allowed"); + + instr->set_encoding(0x5 << 26 | Assembler::encode_offset(offset, 26, 0)); +} + +static address call_for(address return_address) { + CodeBlob* cb = CodeCache::find_blob(return_address); + nmethod* nm = cb->as_nmethod_or_null(); + if (nm == NULL) { + ShouldNotReachHere(); + return NULL; + } + + // Look back 8 instructions (for LIR_Assembler::ic_call and MacroAssembler::patchable_call) + address begin = return_address - 8*NativeInstruction::instruction_size; + if (begin < nm->code_begin()) { + begin = nm->code_begin(); + } + RelocIterator iter(nm, begin, return_address); + while (iter.next()) { + Relocation* reloc = iter.reloc(); + if (reloc->is_call()) { + address call = reloc->addr(); + if (nativeInstruction_at(call)->is_call()) { + if (nativeCall_at(call)->return_address() == return_address) { + return call; + } + } + } + } + + return NULL; +} + +bool NativeCall::is_call_before(address return_address) { + return (call_for(return_address) != NULL); +} + +NativeCall* nativeCall_before(address return_address) { + assert(NativeCall::is_call_before(return_address), "must be"); + return nativeCall_at(call_for(return_address)); +} + --- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/nativeInst_arm_64.hpp 2016-12-02 11:22:43.952604165 -0500 @@ -0,0 +1,772 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_ARM_VM_NATIVEINST_ARM_64_HPP +#define CPU_ARM_VM_NATIVEINST_ARM_64_HPP + +#include "asm/macroAssembler.hpp" +#include "code/codeCache.hpp" +#include "memory/allocation.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" + +// ------------------------------------------------------------------- + +// Some experimental projects extend the ARM back-end by implementing +// what the front-end usually assumes is a single native instruction +// with a sequence of instructions. +// +// The 'Raw' variants are the low level initial code (usually one +// instruction wide but some of them were already composed +// instructions). They should be used only by the back-end. +// +// The non-raw classes are the front-end entry point, hiding potential +// back-end extensions or the actual instructions size. +class NativeInstruction; + +class RawNativeInstruction VALUE_OBJ_CLASS_SPEC { + public: + + enum ARM_specific { + instruction_size = Assembler::InstructionSize, + instruction_size_in_bits = instruction_size * BitsPerByte, + }; + + // illegal instruction used by NativeJump::patch_verified_entry + static const int zombie_illegal_instruction = 0xd4000542; // hvc #42 + + address addr_at(int offset) const { return (address)this + offset; } + address instruction_address() const { return addr_at(0); } + address next_raw_instruction_address() const { return addr_at(instruction_size); } + + static RawNativeInstruction* at(address address) { + return (RawNativeInstruction*)address; + } + + RawNativeInstruction* next_raw() const { + return at(next_raw_instruction_address()); + } + + int encoding() const { + return *(int*)this; + } + + void set_encoding(int value) { + int old = encoding(); + if (old != value) { + *(int*)this = value; + ICache::invalidate_word((address)this); + } + } + + bool is_nop() const { return encoding() == (int)0xd503201f; } + bool is_b() const { return (encoding() & 0xfc000000) == 0x14000000; } // unconditional branch + bool is_b_cond() const { return (encoding() & 0xff000010) == 0x54000000; } // conditional branch + bool is_bl() const { return (encoding() & 0xfc000000) == 0x94000000; } + bool is_br() const { return (encoding() & 0xfffffc1f) == 0xd61f0000; } + bool is_blr() const { return (encoding() & 0xfffffc1f) == 0xd63f0000; } + bool is_ldr_literal() const { return (encoding() & 0xff000000) == 0x58000000; } + bool is_adr_aligned() const { return (encoding() & 0xff000000) == 0x10000000; } // adr Xn,