--- old/agent/make/Makefile 2021-01-25 19:30:05.908521834 +0000 +++ new/agent/make/Makefile 2021-01-25 19:30:05.788520573 +0000 @@ -58,11 +58,13 @@ sun.jvm.hotspot.debugger.dummy \ sun.jvm.hotspot.debugger.linux \ sun.jvm.hotspot.debugger.linux.amd64 \ +sun.jvm.hotspot.debugger.linux.aarch64 \ sun.jvm.hotspot.debugger.linux.x86 \ sun.jvm.hotspot.debugger.posix \ sun.jvm.hotspot.debugger.posix.elf \ sun.jvm.hotspot.debugger.proc \ sun.jvm.hotspot.debugger.proc.amd64 \ +sun.jvm.hotspot.debugger.proc.aarch64 \ sun.jvm.hotspot.debugger.proc.sparc \ sun.jvm.hotspot.debugger.proc.x86 \ sun.jvm.hotspot.debugger.remote \ @@ -88,11 +90,13 @@ sun.jvm.hotspot.prims \ sun.jvm.hotspot.runtime \ sun.jvm.hotspot.runtime.amd64 \ +sun.jvm.hotspot.runtime.aarch64 \ sun.jvm.hotspot.runtime.bsd \ sun.jvm.hotspot.runtime.bsd_amd64 \ sun.jvm.hotspot.runtime.bsd_x86 \ sun.jvm.hotspot.runtime.linux \ sun.jvm.hotspot.runtime.linux_amd64 \ +sun.jvm.hotspot.runtime.linux_aarch64 \ sun.jvm.hotspot.runtime.linux_sparc \ sun.jvm.hotspot.runtime.linux_x86 \ sun.jvm.hotspot.runtime.posix \ @@ -143,12 +147,14 @@ sun/jvm/hotspot/debugger/dummy/*.java \ sun/jvm/hotspot/debugger/linux/*.java \ sun/jvm/hotspot/debugger/linux/x86/*.java \ +sun/jvm/hotspot/debugger/linux/aarch64/*.java \ sun/jvm/hotspot/debugger/posix/*.java \ sun/jvm/hotspot/debugger/posix/elf/*.java \ sun/jvm/hotspot/debugger/proc/*.java \ sun/jvm/hotspot/debugger/proc/amd64/*.java \ sun/jvm/hotspot/debugger/proc/sparc/*.java \ sun/jvm/hotspot/debugger/proc/x86/*.java \ +sun/jvm/hotspot/debugger/proc/aarch64/*.java \ sun/jvm/hotspot/debugger/remote/*.java \ sun/jvm/hotspot/debugger/remote/amd64/*.java \ sun/jvm/hotspot/debugger/remote/sparc/*.java \ @@ -169,11 +175,13 @@ sun/jvm/hotspot/prims/*.java \ sun/jvm/hotspot/runtime/*.java \ sun/jvm/hotspot/runtime/amd64/*.java \ +sun/jvm/hotspot/runtime/aarch64/*.java \ sun/jvm/hotspot/runtime/bsd/*.java \ sun/jvm/hotspot/runtime/bsd_amd64/*.java \ sun/jvm/hotspot/runtime/bsd_x86/*.java \ sun/jvm/hotspot/runtime/linux/*.java \ sun/jvm/hotspot/runtime/linux_amd64/*.java \ +sun/jvm/hotspot/runtime/linux_aarch64/*.java \ sun/jvm/hotspot/runtime/linux_sparc/*.java \ sun/jvm/hotspot/runtime/linux_x86/*.java \ sun/jvm/hotspot/runtime/posix/*.java \ --- old/agent/src/os/linux/LinuxDebuggerLocal.c 2021-01-25 19:30:06.375526743 +0000 +++ new/agent/src/os/linux/LinuxDebuggerLocal.c 2021-01-25 19:30:06.258525513 +0000 @@ -49,6 +49,10 @@ #include "sun_jvm_hotspot_debugger_sparc_SPARCThreadContext.h" #endif +#ifdef aarch64 +#include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" +#endif + static jfieldID p_ps_prochandle_ID = 0; static jfieldID threadList_ID = 0; static jfieldID loadObjectList_ID = 0; @@ -333,7 +337,7 @@ return (err == PS_OK)? array : 0; } -#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) +#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(aarch64) JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 (JNIEnv *env, jobject this_obj, jint lwp_id) { @@ -355,6 +359,9 @@ #ifdef amd64 #define NPRGREG sun_jvm_hotspot_debugger_amd64_AMD64ThreadContext_NPRGREG #endif +#ifdef aarch64 +#define NPRGREG sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_NPRGREG +#endif #if defined(sparc) || defined(sparcv9) #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG #endif @@ -450,6 +457,19 @@ regs[REG_INDEX(R_O7)] = gregs.u_regs[14]; #endif /* sparc */ +#if defined(aarch64) + +#define REG_INDEX(reg) sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_##reg + + { + int i; + for (i = 0; i < 31; i++) + regs[i] = gregs.regs[i]; + regs[REG_INDEX(SP)] = gregs.sp; + regs[REG_INDEX(PC)] = gregs.pc; + } +#endif /* aarch64 */ + (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT); return array; --- old/agent/src/os/linux/Makefile 2021-01-25 19:30:06.831531535 +0000 +++ new/agent/src/os/linux/Makefile 2021-01-25 19:30:06.712530285 +0000 @@ -53,14 +53,15 @@ $(JAVAH) -jni -classpath ../../../build/classes -d $(ARCH) \ sun.jvm.hotspot.debugger.x86.X86ThreadContext \ sun.jvm.hotspot.debugger.sparc.SPARCThreadContext \ - sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext + sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext \ + sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext $(GCC) $(CFLAGS) $< -o $@ $(ARCH)/sadis.o: ../../share/native/sadis.c $(JAVAH) -jni -classpath ../../../build/classes -d $(ARCH) \ sun.jvm.hotspot.asm.Disassembler $(GCC) $(CFLAGS) $< -o $@ - + $(ARCH)/%.o: %.c $(GCC) $(CFLAGS) $< -o $@ --- old/agent/src/os/linux/libproc.h 2021-01-25 19:30:07.263536076 +0000 +++ new/agent/src/os/linux/libproc.h 2021-01-25 19:30:07.147534857 +0000 @@ -36,6 +36,10 @@ #include +#if defined(aarch64) +#include "asm/ptrace.h" +#endif + /************************************************************************************ 0. This is very minimal subset of Solaris libproc just enough for current application. @@ -72,6 +76,9 @@ #include #define user_regs_struct pt_regs #endif +#if defined(aarch64) +#define user_regs_struct user_pt_regs +#endif // This C bool type must be int for compatibility with Linux calls and // it would be a mistake to equivalence it to C++ bool on many platforms --- old/agent/src/share/classes/sun/jvm/hotspot/HSDB.java 2021-01-25 19:30:07.743541121 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/HSDB.java 2021-01-25 19:30:07.611539734 +0000 @@ -992,7 +992,8 @@ curFrame.getFP(), anno)); } else { - if (VM.getVM().getCPU().equals("x86") || VM.getVM().getCPU().equals("amd64")) { + if (VM.getVM().getCPU().equals("x86") || VM.getVM().getCPU().equals("amd64") || + VM.getVM().getCPU().equals("aarch64")) { // For C2, which has null frame pointers on x86/amd64 CodeBlob cb = VM.getVM().getCodeCache().findBlob(curFrame.getPC()); Address sp = curFrame.getSP(); --- old/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java 2021-01-25 19:30:08.232546261 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java 2021-01-25 19:30:08.103544905 +0000 @@ -1,5 +1,6 @@ /* * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,6 +34,8 @@ import sun.jvm.hotspot.debugger.sparc.*; import sun.jvm.hotspot.debugger.linux.x86.*; import sun.jvm.hotspot.debugger.linux.amd64.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.linux.aarch64.*; import sun.jvm.hotspot.debugger.linux.sparc.*; import sun.jvm.hotspot.utilities.*; @@ -96,6 +99,13 @@ Address pc = context.getRegisterAsAddress(SPARCThreadContext.R_O7); if (pc == null) return null; return new LinuxSPARCCFrame(dbg, sp, pc, LinuxDebuggerLocal.getAddressSize()); + } else if (cpu.equals("aarch64")) { + AARCH64ThreadContext context = (AARCH64ThreadContext) thread.getContext(); + Address fp = context.getRegisterAsAddress(AARCH64ThreadContext.FP); + if (fp == null) return null; + Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); + if (pc == null) return null; + return new LinuxAARCH64CFrame(dbg, fp, pc); } else { // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu ThreadContext context = (ThreadContext) thread.getContext(); --- old/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java 2021-01-25 19:30:08.678550948 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java 2021-01-25 19:30:08.559549697 +0000 @@ -31,9 +31,11 @@ import sun.jvm.hotspot.debugger.*; import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.proc.amd64.*; +import sun.jvm.hotspot.debugger.proc.aarch64.*; import sun.jvm.hotspot.debugger.proc.sparc.*; import sun.jvm.hotspot.debugger.proc.x86.*; import sun.jvm.hotspot.debugger.amd64.*; +import sun.jvm.hotspot.debugger.aarch64.*; import sun.jvm.hotspot.debugger.sparc.*; import sun.jvm.hotspot.debugger.x86.*; import sun.jvm.hotspot.utilities.*; @@ -86,6 +88,10 @@ threadFactory = new ProcAMD64ThreadFactory(this); pcRegIndex = AMD64ThreadContext.RIP; fpRegIndex = AMD64ThreadContext.RBP; + } else if (cpu.equals("aarch64")) { + threadFactory = new ProcAARCH64ThreadFactory(this); + pcRegIndex = AARCH64ThreadContext.PC; + fpRegIndex = AARCH64ThreadContext.FP; } else { try { Class tfc = Class.forName("sun.jvm.hotspot.debugger.proc." + --- old/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java 2021-01-25 19:30:09.178556204 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java 2021-01-25 19:30:09.058554942 +0000 @@ -35,6 +35,7 @@ import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; +import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_amd64.BsdAMD64JavaThreadPDAccess; import sun.jvm.hotspot.utilities.*; @@ -87,6 +88,8 @@ access = new LinuxAMD64JavaThreadPDAccess(); } else if (cpu.equals("sparc")) { access = new LinuxSPARCJavaThreadPDAccess(); + } else if (cpu.equals("aarch64")) { + access = new LinuxAARCH64JavaThreadPDAccess(); } else { try { access = (JavaThreadPDAccess) --- old/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java 2021-01-25 19:30:09.634560996 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java 2021-01-25 19:30:09.517559767 +0000 @@ -63,6 +63,8 @@ return "sparc"; } else if (cpu.equals("ia64") || cpu.equals("amd64") || cpu.equals("x86_64")) { return cpu; + } else if (cpu.equals("aarch64")) { + return cpu; } else { try { Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed"); --- old/make/defs.make 2021-01-25 19:30:10.121566115 +0000 +++ new/make/defs.make 2021-01-25 19:30:10.000564843 +0000 @@ -285,7 +285,7 @@ # Use uname output for SRCARCH, but deal with platform differences. If ARCH # is not explicitly listed below, it is treated as x86. - SRCARCH ?= $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 ppc ppc64 ppc64le zero,$(ARCH))) + SRCARCH ?= $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 ppc ppc64 ppc64le zero aarch64,$(ARCH))) ARCH/ = x86 ARCH/sparc = sparc ARCH/sparc64= sparc @@ -296,6 +296,7 @@ ARCH/ppc64le= ppc ARCH/ppc = ppc ARCH/zero = zero + ARCH/aarch64 = aarch64 # BUILDARCH is usually the same as SRCARCH, except for sparcv9 BUILDARCH ?= $(SRCARCH) @@ -326,13 +327,14 @@ LIBARCH ?= $(LIBARCH/$(BUILDARCH)) LIBARCH/i486 = i386 LIBARCH/amd64 = amd64 + LIBARCH/aarch64 = aarch64 LIBARCH/sparc = sparc LIBARCH/sparcv9 = sparcv9 LIBARCH/ia64 = ia64 LIBARCH/ppc64 = ppc64 LIBARCH/zero = $(ZERO_LIBARCH) - LP64_ARCH += sparcv9 amd64 ia64 ppc64 zero + LP64_ARCH += sparcv9 amd64 ia64 ppc64 aarch64 zero endif # Required make macro settings for all platforms --- old/make/linux/makefiles/buildtree.make 2021-01-25 19:30:10.593571076 +0000 +++ new/make/linux/makefiles/buildtree.make 2021-01-25 19:30:10.474569825 +0000 @@ -201,6 +201,7 @@ DATA_MODE/sparcv9 = 64 DATA_MODE/amd64 = 64 DATA_MODE/ppc64 = 64 +DATA_MODE/aarch64 = 64 DATA_MODE = $(DATA_MODE/$(BUILDARCH)) --- old/make/linux/makefiles/defs.make 2021-01-25 19:30:11.044575816 +0000 +++ new/make/linux/makefiles/defs.make 2021-01-25 19:30:10.921574523 +0000 @@ -114,6 +114,15 @@ HS_ARCH = ppc endif +# AARCH64 +ifeq ($(ARCH), aarch64) + ARCH_DATA_MODEL = 64 + MAKE_ARGS += LP64=1 + PLATFORM = linux-aarch64 + VM_PLATFORM = linux_aarch64 + HS_ARCH = aarch64 +endif + # On 32 bit linux we build server and client, on 64 bit just server. ifeq ($(JVM_VARIANTS),) ifeq ($(ARCH_DATA_MODEL), 32) @@ -300,6 +309,8 @@ $(EXPORT_LIB_DIR)/sa-jdi.jar ADD_SA_BINARIES/sparc = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ $(EXPORT_LIB_DIR)/sa-jdi.jar +ADD_SA_BINARIES/aarch64 = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ + $(EXPORT_LIB_DIR)/sa-jdi.jar ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifneq ($(STRIP_POLICY),no_strip) ifeq ($(ZIP_DEBUGINFO_FILES),1) --- old/make/linux/makefiles/gcc.make 2021-01-25 19:30:11.482580420 +0000 +++ new/make/linux/makefiles/gcc.make 2021-01-25 19:30:11.364579180 +0000 @@ -173,6 +173,7 @@ ARCHFLAG = $(ARCHFLAG/$(BUILDARCH)) ARCHFLAG/i486 = -m32 -march=i586 ARCHFLAG/amd64 = -m64 $(STACK_ALIGNMENT_OPT) +ARCHFLAG/aarch64 = ARCHFLAG/ia64 = ARCHFLAG/sparc = -m32 -mcpu=v9 ARCHFLAG/sparcv9 = -m64 -mcpu=v9 --- old/make/linux/makefiles/sa.make 2021-01-25 19:30:11.921585034 +0000 +++ new/make/linux/makefiles/sa.make 2021-01-25 19:30:11.804583804 +0000 @@ -108,6 +108,7 @@ $(QUIETLY) $(REMOTE) $(RUN.JAR) uf $@ -C $(AGENT_SRC_DIR) META-INF/services/com.sun.jdi.connect.Connector $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.x86.X86ThreadContext $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext + $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.sparc.SPARCThreadContext $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.asm.Disassembler --- old/make/sa.files 2021-01-25 19:30:12.367589722 +0000 +++ new/make/sa.files 2021-01-25 19:30:12.251588502 +0000 @@ -43,6 +43,7 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/compiler/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/amd64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/bsd/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/bsd/amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/bsd/x86/*.java \ @@ -52,17 +53,20 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/x86/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/elf/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/amd64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/x86/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/win32/coff/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/windbg/*.java \ @@ -83,11 +87,13 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/prims/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/amd64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd_amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd_x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_amd64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/posix/*.java \ --- old/src/os/linux/vm/os_linux.cpp 2021-01-25 19:30:12.847594767 +0000 +++ new/src/os/linux/vm/os_linux.cpp 2021-01-25 19:30:12.717593400 +0000 @@ -1418,8 +1418,8 @@ #ifndef SYS_clock_getres -#if defined(IA32) || defined(AMD64) -#define SYS_clock_getres IA32_ONLY(266) AMD64_ONLY(229) +#if defined(IA32) || defined(AMD64) || defined(AARCH64) +#define SYS_clock_getres IA32_ONLY(266) AMD64_ONLY(229) AARCH64_ONLY(114) #define sys_clock_getres(x,y) ::syscall(SYS_clock_getres, x, y) #else #warning "SYS_clock_getres not defined for this platform, disabling fast_thread_cpu_time" @@ -2010,7 +2010,7 @@ static Elf32_Half running_arch_code=EM_AARCH64; #else #error Method os::dll_load requires that one of following is defined:\ - IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K + IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K, AARCH64 #endif // Identify compatability class for VM's architecture and library's architecture @@ -2949,12 +2949,7 @@ unsigned int cpu = 0; int retval = -1; -#if defined(IA32) -# ifndef SYS_getcpu -# define SYS_getcpu 318 -# endif - retval = syscall(SYS_getcpu, &cpu, NULL, NULL); -#elif defined(AMD64) +#if defined(AMD64) // Unfortunately we have to bring all these macros here from vsyscall.h // to be able to compile on old linuxes. # define __NR_vgetcpu 2 @@ -2964,6 +2959,11 @@ typedef long (*vgetcpu_t)(unsigned int *cpu, unsigned int *node, unsigned long *tcache); vgetcpu_t vgetcpu = (vgetcpu_t)VSYSCALL_ADDR(__NR_vgetcpu); retval = vgetcpu(&cpu, NULL, NULL); +#elif defined(IA32) || defined(AARCH64) +# ifndef SYS_getcpu +# define SYS_getcpu AARCH64_ONLY(168) IA32_ONLY(318) +# endif + retval = syscall(SYS_getcpu, &cpu, NULL, NULL); #endif return (retval == -1) ? retval : cpu; @@ -3517,7 +3517,7 @@ #ifndef ZERO large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) - ARM_ONLY(2 * M) PPC_ONLY(4 * M); + ARM_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M); #endif // ZERO FILE *fp = fopen("/proc/meminfo", "r"); --- old/src/os/linux/vm/os_perf_linux.cpp 2021-01-25 19:30:13.385600421 +0000 +++ new/src/os/linux/vm/os_perf_linux.cpp 2021-01-25 19:30:13.254599044 +0000 @@ -32,6 +32,9 @@ #ifdef TARGET_ARCH_aarch32 # include "vm_version_ext_aarch32.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "vm_version_ext_aarch64.hpp" +#endif #ifdef TARGET_ARCH_x86 # include "vm_version_ext_x86.hpp" #endif --- old/src/share/tools/hsdis/hsdis.c 2021-01-25 19:30:13.881605635 +0000 +++ new/src/share/tools/hsdis/hsdis.c 2021-01-25 19:30:13.757604331 +0000 @@ -492,6 +492,9 @@ #if defined(LIBARCH_ppc64) || defined(LIBARCH_ppc64le) res = "powerpc:common64"; #endif +#ifdef LIBARCH_aarch64 + res = "aarch64"; +#endif if (res == NULL) res = "architecture not set in Makefile!"; return res; --- old/src/share/vm/adlc/formssel.cpp 2021-01-25 19:30:14.367610743 +0000 +++ new/src/share/vm/adlc/formssel.cpp 2021-01-25 19:30:14.232609324 +0000 @@ -23,6 +23,7 @@ */ // FORMS.CPP - Definitions for ADL Parser Forms Classes +#include "utilities/macros.hpp" #include "adlc.hpp" //==============================Instructions=================================== @@ -1242,7 +1243,8 @@ !is_short_branch() && // Don't match another short branch variant reduce_result() != NULL && strcmp(reduce_result(), short_branch->reduce_result()) == 0 && - _matrule->equivalent(AD.globalNames(), short_branch->_matrule)) { + _matrule->equivalent(AD.globalNames(), short_branch->_matrule) + AARCH64_ONLY(&& equivalent_predicates(this, short_branch))) { // The instructions are equivalent. // Now verify that both instructions have the same parameters and --- old/src/share/vm/adlc/main.cpp 2021-01-25 19:30:14.855615872 +0000 +++ new/src/share/vm/adlc/main.cpp 2021-01-25 19:30:14.730614558 +0000 @@ -234,6 +234,11 @@ AD.addInclude(AD._CPP_file, "nativeInst_x86.hpp"); AD.addInclude(AD._CPP_file, "vmreg_x86.inline.hpp"); #endif +#ifdef TARGET_ARCH_aarch64 + AD.addInclude(AD._CPP_file, "assembler_aarch64.inline.hpp"); + AD.addInclude(AD._CPP_file, "nativeInst_aarch64.hpp"); + AD.addInclude(AD._CPP_file, "vmreg_aarch64.inline.hpp"); +#endif #ifdef TARGET_ARCH_sparc AD.addInclude(AD._CPP_file, "nativeInst_sparc.hpp"); AD.addInclude(AD._CPP_file, "vmreg_sparc.inline.hpp"); --- old/src/share/vm/asm/assembler.hpp 2021-01-25 19:30:15.313620686 +0000 +++ new/src/share/vm/asm/assembler.hpp 2021-01-25 19:30:15.193619424 +0000 @@ -53,6 +53,10 @@ # include "register_ppc.hpp" # include "vm_version_ppc.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "register_aarch64.hpp" +# include "vm_version_aarch64.hpp" +#endif // This file contains platform-independent assembler declarations. @@ -448,6 +452,9 @@ #ifdef TARGET_ARCH_x86 # include "assembler_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "assembler_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "assembler_sparc.hpp" #endif --- old/src/share/vm/asm/assembler.inline.hpp 2021-01-25 19:30:15.776625552 +0000 +++ new/src/share/vm/asm/assembler.inline.hpp 2021-01-25 19:30:15.659624322 +0000 @@ -42,5 +42,8 @@ #ifdef TARGET_ARCH_ppc # include "assembler_ppc.inline.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "assembler_aarch64.inline.hpp" +#endif #endif // SHARE_VM_ASM_ASSEMBLER_INLINE_HPP --- old/src/share/vm/asm/codeBuffer.hpp 2021-01-25 19:30:16.261630650 +0000 +++ new/src/share/vm/asm/codeBuffer.hpp 2021-01-25 19:30:16.121629178 +0000 @@ -620,6 +620,9 @@ #ifdef TARGET_ARCH_x86 # include "codeBuffer_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "codeBuffer_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "codeBuffer_sparc.hpp" #endif --- old/src/share/vm/asm/macroAssembler.hpp 2021-01-25 19:30:16.713635400 +0000 +++ new/src/share/vm/asm/macroAssembler.hpp 2021-01-25 19:30:16.593634139 +0000 @@ -42,5 +42,8 @@ #ifdef TARGET_ARCH_ppc # include "macroAssembler_ppc.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "macroAssembler_aarch64.hpp" +#endif #endif // SHARE_VM_ASM_MACROASSEMBLER_HPP --- old/src/share/vm/asm/macroAssembler.inline.hpp 2021-01-25 19:30:17.212640645 +0000 +++ new/src/share/vm/asm/macroAssembler.inline.hpp 2021-01-25 19:30:17.079639247 +0000 @@ -42,5 +42,8 @@ #ifdef TARGET_ARCH_ppc # include "macroAssembler_ppc.inline.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "macroAssembler_aarch64.inline.hpp" +#endif #endif // SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP --- old/src/share/vm/asm/register.hpp 2021-01-25 19:30:17.703645806 +0000 +++ new/src/share/vm/asm/register.hpp 2021-01-25 19:30:17.567644376 +0000 @@ -108,6 +108,9 @@ #ifdef TARGET_ARCH_ppc # include "register_ppc.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "register_aarch64.hpp" +#endif // Debugging support --- old/src/share/vm/c1/c1_Canonicalizer.cpp 2021-01-25 19:30:18.162650630 +0000 +++ new/src/share/vm/c1/c1_Canonicalizer.cpp 2021-01-25 19:30:18.031649253 +0000 @@ -938,6 +938,13 @@ *log2_scale = 0; } +// AARCH64 cannot handle shifts which are not either 0, or log2 of the type size +#ifdef AARCH64 + if (*log2_scale != 0 && + (1 << *log2_scale) != type2aelembytes(x->basic_type(), true)) + return false; +#endif + // If the value is pinned then it will be always be computed so // there's no profit to reshaping the expression. return !root->is_pinned(); --- old/src/share/vm/c1/c1_Defs.hpp 2021-01-25 19:30:18.645655707 +0000 +++ new/src/share/vm/c1/c1_Defs.hpp 2021-01-25 19:30:18.524654435 +0000 @@ -29,6 +29,9 @@ #ifdef TARGET_ARCH_x86 # include "register_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "register_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "register_sparc.hpp" #endif @@ -53,6 +56,9 @@ #ifdef TARGET_ARCH_x86 # include "c1_Defs_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "c1_Defs_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "c1_Defs_sparc.hpp" #endif --- old/src/share/vm/c1/c1_FpuStackSim.hpp 2021-01-25 19:30:19.132660825 +0000 +++ new/src/share/vm/c1/c1_FpuStackSim.hpp 2021-01-25 19:30:19.002659459 +0000 @@ -35,6 +35,9 @@ #ifdef TARGET_ARCH_x86 # include "c1_FpuStackSim_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "c1_FpuStackSim_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "c1_FpuStackSim_sparc.hpp" #endif --- old/src/share/vm/c1/c1_FrameMap.cpp 2021-01-25 19:30:19.586665597 +0000 +++ new/src/share/vm/c1/c1_FrameMap.cpp 2021-01-25 19:30:19.473664409 +0000 @@ -29,6 +29,9 @@ #ifdef TARGET_ARCH_x86 # include "vmreg_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "vmreg_aarch64.inline.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "vmreg_sparc.inline.hpp" #endif --- old/src/share/vm/c1/c1_FrameMap.hpp 2021-01-25 19:30:20.083670821 +0000 +++ new/src/share/vm/c1/c1_FrameMap.hpp 2021-01-25 19:30:19.940669318 +0000 @@ -85,6 +85,9 @@ #ifdef TARGET_ARCH_x86 # include "c1_FrameMap_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "c1_FrameMap_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "c1_FrameMap_sparc.hpp" #endif --- old/src/share/vm/c1/c1_LIR.cpp 2021-01-25 19:30:20.532675540 +0000 +++ new/src/share/vm/c1/c1_LIR.cpp 2021-01-25 19:30:20.413674289 +0000 @@ -67,7 +67,7 @@ #endif -#ifdef ARM +#if defined(ARM) || defined(AARCH64) FloatRegister LIR_OprDesc::as_float_reg() const { return as_FloatRegister(fpu_regnr()); @@ -149,7 +149,11 @@ #endif #ifdef _LP64 assert(base()->is_cpu_register(), "wrong base operand"); +#ifndef AARCH64 assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand"); +#else + assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand"); +#endif assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, "wrong type for addresses"); #else @@ -556,7 +560,7 @@ assert(opConvert->_info == NULL, "must be"); if (opConvert->_opr->is_valid()) do_input(opConvert->_opr); if (opConvert->_result->is_valid()) do_output(opConvert->_result); -#ifdef PPC +#if defined(PPC) || defined(AARCH64) if (opConvert->_tmp1->is_valid()) do_temp(opConvert->_tmp1); if (opConvert->_tmp2->is_valid()) do_temp(opConvert->_tmp2); #endif @@ -1574,7 +1578,12 @@ } else if (is_double_cpu()) { out->print("%s", as_register_hi()->name()); out->print("%s", as_register_lo()->name()); -#if defined(X86) +#if defined(AARCH64) + } else if (is_single_fpu()) { + out->print("fpu%d", fpu_regnr()); + } else if (is_double_fpu()) { + out->print("fpu%d", fpu_regnrLo()); +#elif defined(X86) } else if (is_single_xmm()) { out->print("%s", as_xmm_float_reg()->name()); } else if (is_double_xmm()) { @@ -1971,7 +1980,7 @@ print_bytecode(out, bytecode()); in_opr()->print(out); out->print(" "); result_opr()->print(out); out->print(" "); -#ifdef PPC +#if defined(PPC) || defined(AARCH64) if(tmp1()->is_valid()) { tmp1()->print(out); out->print(" "); tmp2()->print(out); out->print(" "); --- old/src/share/vm/c1/c1_LIR.hpp 2021-01-25 19:30:21.011680575 +0000 +++ new/src/share/vm/c1/c1_LIR.hpp 2021-01-25 19:30:20.883679229 +0000 @@ -446,13 +446,13 @@ return as_register(); } -#ifdef X86 +#if defined(X86) XMMRegister as_xmm_float_reg() const; XMMRegister as_xmm_double_reg() const; // for compatibility with RInfo int fpu () const { return lo_reg_half(); } -#endif // X86 -#if defined(SPARC) || defined(ARM) || defined(PPC) +#endif +#if defined(SPARC) || defined(ARM) || defined(PPC) || defined(AARCH64) FloatRegister as_float_reg () const; FloatRegister as_double_reg () const; #endif @@ -542,7 +542,7 @@ , _type(type) , _disp(0) { verify(); } -#if defined(X86) || defined(ARM) +#if defined(X86) || defined(ARM) || defined(AARCH64) LIR_Address(LIR_Opr base, LIR_Opr index, Scale scale, intx disp, BasicType type): _base(base) , _index(index) @@ -625,7 +625,7 @@ LIR_OprDesc::double_type | LIR_OprDesc::fpu_register | LIR_OprDesc::double_size); } -#elif defined(X86) +#elif defined(X86) || defined(AARCH64) static LIR_Opr double_fpu(int reg) { return (LIR_Opr)(intptr_t)((reg << LIR_OprDesc::reg1_shift) | (reg << LIR_OprDesc::reg2_shift) | LIR_OprDesc::double_type | @@ -1474,7 +1474,7 @@ private: Bytecodes::Code _bytecode; ConversionStub* _stub; -#ifdef PPC +#if defined(PPC) || defined(AARCH64) LIR_Opr _tmp1; LIR_Opr _tmp2; #endif @@ -1489,7 +1489,7 @@ #endif , _bytecode(code) {} -#ifdef PPC +#if defined(PPC) || defined(AARCH64) LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub ,LIR_Opr tmp1, LIR_Opr tmp2) : LIR_Op1(lir_convert, opr, result) @@ -1501,7 +1501,7 @@ Bytecodes::Code bytecode() const { return _bytecode; } ConversionStub* stub() const { return _stub; } -#ifdef PPC +#if defined(PPC) || defined(AARCH64) LIR_Opr tmp1() const { return _tmp1; } LIR_Opr tmp2() const { return _tmp2; } #endif @@ -2144,7 +2144,14 @@ #ifdef PPC void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_OpConvert(code, left, dst, NULL, tmp1, tmp2)); } #endif +#if defined(AARCH64) + void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, + ConversionStub* stub = NULL, LIR_Opr tmp1 = LIR_OprDesc::illegalOpr()) { + append(new LIR_OpConvert(code, left, dst, stub, tmp1, LIR_OprDesc::illegalOpr())); + } +#else void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); } +#endif void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and, left, right, dst)); } void logical_or (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or, left, right, dst)); } --- old/src/share/vm/c1/c1_LIRAssembler.cpp 2021-01-25 19:30:21.482685525 +0000 +++ new/src/share/vm/c1/c1_LIRAssembler.cpp 2021-01-25 19:30:21.356684201 +0000 @@ -34,6 +34,10 @@ # include "nativeInst_x86.hpp" # include "vmreg_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "nativeInst_aarch64.hpp" +# include "vmreg_aarch64.inline.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "nativeInst_sparc.hpp" # include "vmreg_sparc.inline.hpp" @@ -124,6 +128,9 @@ , _pending_non_safepoint_offset(0) { _slow_case_stubs = new CodeStubList(); +#ifdef TARGET_ARCH_aarch64 + init(); // Target-dependent initialization +#endif } @@ -163,8 +170,10 @@ #endif s->emit_code(this); #ifdef ASSERT +#ifndef AARCH64 s->assert_no_unbound_labels(); #endif +#endif } } --- old/src/share/vm/c1/c1_LIRAssembler.hpp 2021-01-25 19:30:21.943690371 +0000 +++ new/src/share/vm/c1/c1_LIRAssembler.hpp 2021-01-25 19:30:21.824689120 +0000 @@ -265,6 +265,9 @@ #ifdef TARGET_ARCH_x86 # include "c1_LIRAssembler_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "c1_LIRAssembler_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "c1_LIRAssembler_sparc.hpp" #endif --- old/src/share/vm/c1/c1_LIRGenerator.cpp 2021-01-25 19:30:22.406695237 +0000 +++ new/src/share/vm/c1/c1_LIRGenerator.cpp 2021-01-25 19:30:22.283693944 +0000 @@ -1607,6 +1607,11 @@ } else { __ unsigned_shift_right(addr, CardTableModRefBS::card_shift, tmp); } + + if (UseConcMarkSweepGC && CMSPrecleaningEnabled) { + __ membar_storestore(); + } + if (can_inline_as_constant(card_table_base)) { __ move(LIR_OprFact::intConst(0), new LIR_Address(tmp, card_table_base->as_jint(), T_BYTE)); @@ -2105,7 +2110,7 @@ assert(index_op->type() == T_INT, "only int constants supported"); addr = new LIR_Address(base_op, index_op->as_jint(), dst_type); } else { -#ifdef X86 +#if defined(X86) || defined(AARCH64) addr = new LIR_Address(base_op, index_op, LIR_Address::Scale(log2_scale), 0, dst_type); #elif defined(GENERATE_ADDRESS_IS_PREFERRED) addr = generate_address(base_op, index_op, log2_scale, 0, dst_type); --- old/src/share/vm/c1/c1_LinearScan.cpp 2021-01-25 19:30:22.909700524 +0000 +++ new/src/share/vm/c1/c1_LinearScan.cpp 2021-01-25 19:30:22.782699189 +0000 @@ -35,6 +35,9 @@ #ifdef TARGET_ARCH_x86 # include "vmreg_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "vmreg_aarch64.inline.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "vmreg_sparc.inline.hpp" #endif @@ -1093,7 +1096,7 @@ } -#ifdef X86 +#if defined(X86) if (op->code() == lir_cmove) { // conditional moves can handle stack operands assert(op->result_opr()->is_register(), "result must always be in a register"); @@ -2195,7 +2198,7 @@ LIR_Opr res = operand_for_interval(interval); -#ifdef X86 +#if defined(X86) || defined(AARCH64) // new semantic for is_last_use: not only set on definite end of interval, // but also before hole // This may still miss some cases (e.g. for dead values), but it is not necessary that the @@ -4538,7 +4541,9 @@ opr = LIR_OprFact::single_xmm(assigned_reg() - pd_first_xmm_reg); #endif } else { +#if !defined(AARCH64) ShouldNotReachHere(); +#endif } } else { type_name = type2name(type()); @@ -5612,7 +5617,7 @@ } bool LinearScanWalker::no_allocation_possible(Interval* cur) { -#ifdef X86 +#if defined(X86) // fast calculation of intervals that can never get a register because the // the next instruction is a call that blocks all registers // Note: this does not work if callee-saved registers are available (e.g. on Sparc) --- old/src/share/vm/c1/c1_LinearScan.hpp 2021-01-25 19:30:23.445706158 +0000 +++ new/src/share/vm/c1/c1_LinearScan.hpp 2021-01-25 19:30:23.320704844 +0000 @@ -976,6 +976,9 @@ #ifdef TARGET_ARCH_x86 # include "c1_LinearScan_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "c1_LinearScan_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "c1_LinearScan_sparc.hpp" #endif --- old/src/share/vm/c1/c1_MacroAssembler.hpp 2021-01-25 19:30:23.969711665 +0000 +++ new/src/share/vm/c1/c1_MacroAssembler.hpp 2021-01-25 19:30:23.832710225 +0000 @@ -50,6 +50,9 @@ #ifdef TARGET_ARCH_x86 # include "c1_MacroAssembler_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "c1_MacroAssembler_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "c1_MacroAssembler_sparc.hpp" #endif --- old/src/share/vm/c1/c1_Runtime1.cpp 2021-01-25 19:30:24.432716531 +0000 +++ new/src/share/vm/c1/c1_Runtime1.cpp 2021-01-25 19:30:24.302715165 +0000 @@ -801,6 +801,11 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_id )) NOT_PRODUCT(_patch_code_slowcase_cnt++;) +#ifdef AARCH64 + // AArch64 does not patch C1-generated code. + ShouldNotReachHere(); +#endif + ResourceMark rm(thread); RegisterMap reg_map(thread, false); frame runtime_frame = thread->last_frame(); @@ -947,7 +952,6 @@ } // Now copy code back - { MutexLockerEx ml_patch (Patching_lock, Mutex::_no_safepoint_check_flag); // @@ -1190,6 +1194,7 @@ // completes we can check for deoptimization. This simplifies the // assembly code in the cpu directories. // +#ifndef TARGET_ARCH_aarch64 int Runtime1::move_klass_patching(JavaThread* thread) { // // NOTE: we are still in Java @@ -1274,7 +1279,7 @@ return caller_is_deopted(); JRT_END - +#endif JRT_LEAF(void, Runtime1::trace_block_entry(jint block_id)) // for now we just print out the block id --- old/src/share/vm/c1/c1_Runtime1.hpp 2021-01-25 19:30:24.898721429 +0000 +++ new/src/share/vm/c1/c1_Runtime1.hpp 2021-01-25 19:30:24.777720157 +0000 @@ -164,6 +164,9 @@ static int move_appendix_patching(JavaThread* thread); static void patch_code(JavaThread* thread, StubID stub_id); +#ifdef TARGET_ARCH_aarch64 + static void patch_code_aarch64(JavaThread* thread, StubID stub_id); +#endif public: // initialization --- old/src/share/vm/c1/c1_globals.hpp 2021-01-25 19:30:25.389726590 +0000 +++ new/src/share/vm/c1/c1_globals.hpp 2021-01-25 19:30:25.248725108 +0000 @@ -29,6 +29,9 @@ #ifdef TARGET_ARCH_x86 # include "c1_globals_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "c1_globals_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "c1_globals_sparc.hpp" #endif --- old/src/share/vm/classfile/bytecodeAssembler.cpp 2021-01-25 19:30:25.843731361 +0000 +++ new/src/share/vm/classfile/bytecodeAssembler.cpp 2021-01-25 19:30:25.727730142 +0000 @@ -44,6 +44,9 @@ #ifdef TARGET_ARCH_ppc # include "bytes_ppc.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif u2 BytecodeConstantPool::find_or_add(BytecodeCPEntry const& bcpe) { u2 index; --- old/src/share/vm/classfile/classFileStream.hpp 2021-01-25 19:30:26.344736627 +0000 +++ new/src/share/vm/classfile/classFileStream.hpp 2021-01-25 19:30:26.204735155 +0000 @@ -29,6 +29,9 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytes_sparc.hpp" #endif --- old/src/share/vm/classfile/stackMapTable.hpp 2021-01-25 19:30:26.787741284 +0000 +++ new/src/share/vm/classfile/stackMapTable.hpp 2021-01-25 19:30:26.672740075 +0000 @@ -34,6 +34,9 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytes_sparc.hpp" #endif --- old/src/share/vm/classfile/verifier.cpp 2021-01-25 19:30:27.293746602 +0000 +++ new/src/share/vm/classfile/verifier.cpp 2021-01-25 19:30:27.168745288 +0000 @@ -48,6 +48,9 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytes_sparc.hpp" #endif --- old/src/share/vm/code/codeBlob.cpp 2021-01-25 19:30:27.808752015 +0000 +++ new/src/share/vm/code/codeBlob.cpp 2021-01-25 19:30:27.677750638 +0000 @@ -42,6 +42,9 @@ #ifdef TARGET_ARCH_x86 # include "nativeInst_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "nativeInst_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "nativeInst_sparc.hpp" #endif --- old/src/share/vm/code/compiledIC.hpp 2021-01-25 19:30:28.289757070 +0000 +++ new/src/share/vm/code/compiledIC.hpp 2021-01-25 19:30:28.167755788 +0000 @@ -30,6 +30,9 @@ #ifdef TARGET_ARCH_x86 # include "nativeInst_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "nativeInst_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "nativeInst_sparc.hpp" #endif @@ -320,7 +323,11 @@ friend CompiledStaticCall* compiledStaticCall_at(Relocation* call_site); // Code +#if defined(AARCH64) && !defined(ZERO) + static address emit_to_interp_stub(CodeBuffer &cbuf, address mark); +#else static address emit_to_interp_stub(CodeBuffer &cbuf); +#endif static int to_interp_stub_size(); static int reloc_to_interp_stub(); --- old/src/share/vm/code/relocInfo.hpp 2021-01-25 19:30:28.760762021 +0000 +++ new/src/share/vm/code/relocInfo.hpp 2021-01-25 19:30:28.619760539 +0000 @@ -418,6 +418,9 @@ #ifdef TARGET_ARCH_x86 # include "relocInfo_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "relocInfo_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "relocInfo_sparc.hpp" #endif --- old/src/share/vm/code/vmreg.hpp 2021-01-25 19:30:29.258767255 +0000 +++ new/src/share/vm/code/vmreg.hpp 2021-01-25 19:30:29.123765836 +0000 @@ -38,6 +38,8 @@ # include "adfiles/adGlobals_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/adGlobals_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/adGlobals_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/adGlobals_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero @@ -156,6 +158,9 @@ #ifdef TARGET_ARCH_x86 # include "vmreg_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "vmreg_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "vmreg_sparc.hpp" #endif --- old/src/share/vm/compiler/disassembler.cpp 2021-01-25 19:30:29.721772121 +0000 +++ new/src/share/vm/compiler/disassembler.cpp 2021-01-25 19:30:29.600770850 +0000 @@ -35,6 +35,9 @@ #ifdef TARGET_ARCH_x86 # include "depChecker_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "depChecker_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "depChecker_sparc.hpp" #endif --- old/src/share/vm/compiler/disassembler.hpp 2021-01-25 19:30:30.192777072 +0000 +++ new/src/share/vm/compiler/disassembler.hpp 2021-01-25 19:30:30.073775821 +0000 @@ -81,6 +81,9 @@ #ifdef TARGET_ARCH_x86 # include "disassembler_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "disassembler_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "disassembler_sparc.hpp" #endif --- old/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp 2021-01-25 19:30:30.671782106 +0000 +++ new/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp 2021-01-25 19:30:30.552780856 +0000 @@ -205,7 +205,9 @@ } void G1CodeRootSet::allocate_small_table() { - _table = new CodeRootSetTable(SmallSize); + CodeRootSetTable* temp = new CodeRootSetTable(SmallSize); + + OrderAccess::release_store_ptr(&_table, temp); } void CodeRootSetTable::purge_list_append(CodeRootSetTable* table) { --- old/src/share/vm/interpreter/abstractInterpreter.hpp 2021-01-25 19:30:31.128786910 +0000 +++ new/src/share/vm/interpreter/abstractInterpreter.hpp 2021-01-25 19:30:31.002785585 +0000 @@ -34,6 +34,8 @@ # include INTERP_MASM_MD_HPP #elif defined TARGET_ARCH_x86 # include "interp_masm_x86.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "interp_masm_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "interp_masm_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/interpreter/bytecode.hpp 2021-01-25 19:30:31.606791934 +0000 +++ new/src/share/vm/interpreter/bytecode.hpp 2021-01-25 19:30:31.472790525 +0000 @@ -31,6 +31,9 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytes_sparc.hpp" #endif --- old/src/share/vm/interpreter/bytecodeInterpreter.hpp 2021-01-25 19:30:32.082796937 +0000 +++ new/src/share/vm/interpreter/bytecodeInterpreter.hpp 2021-01-25 19:30:31.953795581 +0000 @@ -35,6 +35,9 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytes_sparc.hpp" #endif @@ -589,6 +592,9 @@ #ifdef TARGET_ARCH_x86 # include "bytecodeInterpreter_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytecodeInterpreter_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytecodeInterpreter_sparc.hpp" #endif --- old/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp 2021-01-25 19:30:32.561801971 +0000 +++ new/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp 2021-01-25 19:30:32.436800657 +0000 @@ -46,6 +46,9 @@ #ifdef TARGET_ARCH_x86 # include "bytecodeInterpreter_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytecodeInterpreter_aarch64.inline.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytecodeInterpreter_sparc.inline.hpp" #endif --- old/src/share/vm/interpreter/bytecodeStream.hpp 2021-01-25 19:30:33.033806932 +0000 +++ new/src/share/vm/interpreter/bytecodeStream.hpp 2021-01-25 19:30:32.912805660 +0000 @@ -32,6 +32,9 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytes_sparc.hpp" #endif --- old/src/share/vm/interpreter/bytecodes.cpp 2021-01-25 19:30:33.524812093 +0000 +++ new/src/share/vm/interpreter/bytecodes.cpp 2021-01-25 19:30:33.407810863 +0000 @@ -29,6 +29,9 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytes_sparc.hpp" #endif --- old/src/share/vm/interpreter/bytecodes.hpp 2021-01-25 19:30:34.010817201 +0000 +++ new/src/share/vm/interpreter/bytecodes.hpp 2021-01-25 19:30:33.889815929 +0000 @@ -292,6 +292,9 @@ #ifdef TARGET_ARCH_x86 # include "bytecodes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytecodes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytecodes_sparc.hpp" #endif --- old/src/share/vm/interpreter/cppInterpreter.hpp 2021-01-25 19:30:34.475822088 +0000 +++ new/src/share/vm/interpreter/cppInterpreter.hpp 2021-01-25 19:30:34.349820764 +0000 @@ -84,6 +84,9 @@ #ifdef TARGET_ARCH_x86 # include "cppInterpreter_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "cppInterpreter_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "cppInterpreter_sparc.hpp" #endif --- old/src/share/vm/interpreter/cppInterpreterGenerator.hpp 2021-01-25 19:30:34.920826766 +0000 +++ new/src/share/vm/interpreter/cppInterpreterGenerator.hpp 2021-01-25 19:30:34.805825557 +0000 @@ -50,6 +50,9 @@ #ifdef TARGET_ARCH_x86 # include "cppInterpreterGenerator_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "cppInterpreterGenerator_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "cppInterpreterGenerator_sparc.hpp" #endif --- old/src/share/vm/interpreter/interpreter.hpp 2021-01-25 19:30:35.372831516 +0000 +++ new/src/share/vm/interpreter/interpreter.hpp 2021-01-25 19:30:35.257830308 +0000 @@ -148,6 +148,9 @@ #ifdef TARGET_ARCH_x86 # include "interpreter_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "interpreter_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "interpreter_sparc.hpp" #endif --- old/src/share/vm/interpreter/interpreterGenerator.hpp 2021-01-25 19:30:35.865836698 +0000 +++ new/src/share/vm/interpreter/interpreterGenerator.hpp 2021-01-25 19:30:35.729835269 +0000 @@ -44,6 +44,9 @@ #ifdef TARGET_ARCH_x86 # include "interpreterGenerator_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "interpreterGenerator_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "interpreterGenerator_sparc.hpp" #endif --- old/src/share/vm/interpreter/interpreterRuntime.cpp 2021-01-25 19:30:36.329841575 +0000 +++ new/src/share/vm/interpreter/interpreterRuntime.cpp 2021-01-25 19:30:36.206840282 +0000 @@ -59,6 +59,9 @@ #ifdef TARGET_ARCH_x86 # include "vm_version_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "vm_version_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "vm_version_sparc.hpp" #endif @@ -1286,7 +1289,7 @@ // preparing the same method will be sure to see non-null entry & mirror. IRT_END -#if defined(IA32) || defined(AMD64) || defined(ARM) +#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address)) if (src_address == dest_address) { return; --- old/src/share/vm/interpreter/interpreterRuntime.hpp 2021-01-25 19:30:36.786846378 +0000 +++ new/src/share/vm/interpreter/interpreterRuntime.hpp 2021-01-25 19:30:36.667845128 +0000 @@ -156,7 +156,7 @@ Method* method, intptr_t* from, intptr_t* to); -#if defined(IA32) || defined(AMD64) || defined(ARM) +#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) // Popframe support (only needed on x86, AMD64 and ARM) static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address); #endif @@ -165,6 +165,9 @@ #ifdef TARGET_ARCH_x86 # include "interpreterRT_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "interpreterRT_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "interpreterRT_sparc.hpp" #endif --- old/src/share/vm/interpreter/templateInterpreter.hpp 2021-01-25 19:30:37.248851234 +0000 +++ new/src/share/vm/interpreter/templateInterpreter.hpp 2021-01-25 19:30:37.120849889 +0000 @@ -190,6 +190,9 @@ #ifdef TARGET_ARCH_x86 # include "templateInterpreter_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "templateInterpreter_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "templateInterpreter_sparc.hpp" #endif --- old/src/share/vm/interpreter/templateInterpreterGenerator.hpp 2021-01-25 19:30:37.718856174 +0000 +++ new/src/share/vm/interpreter/templateInterpreterGenerator.hpp 2021-01-25 19:30:37.585854776 +0000 @@ -89,6 +89,9 @@ #ifdef TARGET_ARCH_x86 # include "templateInterpreterGenerator_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "templateInterpreterGenerator_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "templateInterpreterGenerator_sparc.hpp" #endif --- old/src/share/vm/interpreter/templateTable.hpp 2021-01-25 19:30:38.178861009 +0000 +++ new/src/share/vm/interpreter/templateTable.hpp 2021-01-25 19:30:38.056859727 +0000 @@ -32,6 +32,8 @@ # include INTERP_MASM_MD_HPP #elif defined TARGET_ARCH_x86 # include "interp_masm_x86.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "interp_masm_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "interp_masm_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero @@ -357,6 +359,8 @@ # include "templateTable_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "templateTable_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "templateTable_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "templateTable_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/jfr/writers/jfrEncoders.hpp 2021-01-25 19:30:38.661866086 +0000 +++ new/src/share/vm/jfr/writers/jfrEncoders.hpp 2021-01-25 19:30:38.529864698 +0000 @@ -43,6 +43,9 @@ #ifdef TARGET_ARCH_ppc # include "bytes_ppc.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif // // The Encoding policy prescribes a template --- old/src/share/vm/memory/allocation.inline.hpp 2021-01-25 19:30:39.178871520 +0000 +++ new/src/share/vm/memory/allocation.inline.hpp 2021-01-25 19:30:39.050870174 +0000 @@ -37,8 +37,8 @@ #ifndef PRODUCT // Increments unsigned long value for statistics (not atomic on MP). inline void inc_stat_counter(volatile julong* dest, julong add_value) { -#if defined(SPARC) || defined(X86) - // Sparc and X86 have atomic jlong (8 bytes) instructions +#if defined(SPARC) || defined(X86) || defined(AARCH64) + // Sparc, X86 and AArch64 have atomic jlong (8 bytes) instructions julong value = Atomic::load((volatile jlong*)dest); value += add_value; Atomic::store((jlong)value, (volatile jlong*)dest); --- old/src/share/vm/memory/metaspace.cpp 2021-01-25 19:30:39.660876586 +0000 +++ new/src/share/vm/memory/metaspace.cpp 2021-01-25 19:30:39.538875303 +0000 @@ -3065,10 +3065,50 @@ // Don't use large pages for the class space. bool large_pages = false; +#ifndef AARCH64 ReservedSpace metaspace_rs = ReservedSpace(compressed_class_space_size(), _reserve_alignment, large_pages, requested_addr, 0); +#else // AARCH64 + ReservedSpace metaspace_rs; + + // Our compressed klass pointers may fit nicely into the lower 32 + // bits. + if ((uint64_t)requested_addr + compressed_class_space_size() < 4*G) + metaspace_rs = ReservedSpace(compressed_class_space_size(), + _reserve_alignment, + large_pages, + requested_addr, 0); + + if (! metaspace_rs.is_reserved()) { + // Try to align metaspace so that we can decode a compressed klass + // with a single MOVK instruction. We can do this iff the + // compressed class base is a multiple of 4G. + for (char *a = (char*)align_ptr_up(requested_addr, 4*G); + a < (char*)(1024*G); + a += 4*G) { + if (UseSharedSpaces + && ! can_use_cds_with_metaspace_addr(a, cds_base)) { + // We failed to find an aligned base that will reach. Fall + // back to using our requested addr. + metaspace_rs = ReservedSpace(compressed_class_space_size(), + _reserve_alignment, + large_pages, + requested_addr, 0); + break; + } + metaspace_rs = ReservedSpace(compressed_class_space_size(), + _reserve_alignment, + large_pages, + a, 0); + if (metaspace_rs.is_reserved()) + break; + } + } + +#endif // AARCH64 + if (!metaspace_rs.is_reserved()) { #if INCLUDE_CDS if (UseSharedSpaces) { --- old/src/share/vm/oops/constantPool.hpp 2021-01-25 19:30:40.164881883 +0000 +++ new/src/share/vm/oops/constantPool.hpp 2021-01-25 19:30:40.033880506 +0000 @@ -35,6 +35,9 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytes_sparc.hpp" #endif --- old/src/share/vm/oops/oop.inline.hpp 2021-01-25 19:30:40.644886928 +0000 +++ new/src/share/vm/oops/oop.inline.hpp 2021-01-25 19:30:40.525885677 +0000 @@ -45,6 +45,9 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytes_sparc.hpp" #endif --- old/src/share/vm/opto/buildOopMap.cpp 2021-01-25 19:30:41.122891952 +0000 +++ new/src/share/vm/opto/buildOopMap.cpp 2021-01-25 19:30:40.998890649 +0000 @@ -35,6 +35,9 @@ #ifdef TARGET_ARCH_x86 # include "vmreg_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "vmreg_aarch64.inline.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "vmreg_sparc.inline.hpp" #endif --- old/src/share/vm/opto/c2_globals.hpp 2021-01-25 19:30:41.608897060 +0000 +++ new/src/share/vm/opto/c2_globals.hpp 2021-01-25 19:30:41.486895778 +0000 @@ -29,6 +29,9 @@ #ifdef TARGET_ARCH_x86 # include "c2_globals_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "c2_globals_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "c2_globals_sparc.hpp" #endif --- old/src/share/vm/opto/c2compiler.cpp 2021-01-25 19:30:42.077901990 +0000 +++ new/src/share/vm/opto/c2compiler.cpp 2021-01-25 19:30:41.961900770 +0000 @@ -31,6 +31,8 @@ # include "adfiles/ad_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/ad_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/ad_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/ad_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/opto/callnode.hpp 2021-01-25 19:30:42.542906877 +0000 +++ new/src/share/vm/opto/callnode.hpp 2021-01-25 19:30:42.411905500 +0000 @@ -896,6 +896,20 @@ // Convenience for initialization->maybe_set_complete(phase) bool maybe_set_complete(PhaseGVN* phase); + +#ifdef AARCH64 + // Return true if allocation doesn't escape thread, its escape state + // needs be noEscape or ArgEscape. InitializeNode._does_not_escape + // is true when its allocation's escape state is noEscape or + // ArgEscape. In case allocation's InitializeNode is NULL, check + // AlllocateNode._is_non_escaping flag. + // AlllocateNode._is_non_escaping is true when its escape state is + // noEscape. + bool does_not_escape_thread() { + InitializeNode* init = NULL; + return _is_non_escaping || (((init = initialization()) != NULL) && init->does_not_escape()); + } +#endif }; //------------------------------AllocateArray--------------------------------- --- old/src/share/vm/opto/compile.cpp 2021-01-25 19:30:43.018911880 +0000 +++ new/src/share/vm/opto/compile.cpp 2021-01-25 19:30:42.885910482 +0000 @@ -73,6 +73,8 @@ # include "adfiles/ad_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/ad_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/ad_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/ad_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero @@ -2673,6 +2675,17 @@ LoadNode::is_immutable_value(n->in(MemNode::Address))), "raw memory operations should have control edge"); } + if (n->is_MemBar()) { + MemBarNode* mb = n->as_MemBar(); + if (mb->trailing_store() || mb->trailing_load_store()) { + assert(mb->leading_membar()->trailing_membar() == mb, "bad membar pair"); + Node* mem = mb->in(MemBarNode::Precedent); + assert((mb->trailing_store() && mem->is_Store() && mem->as_Store()->is_release()) || + (mb->trailing_load_store() && mem->is_LoadStore()), "missing mem op"); + } else if (mb->leading()) { + assert(mb->trailing_membar()->leading_membar() == mb, "bad membar pair"); + } + } #endif // Count FPU ops and common calls, implements item (3) switch( nop ) { --- old/src/share/vm/opto/gcm.cpp 2021-01-25 19:30:43.555917524 +0000 +++ new/src/share/vm/opto/gcm.cpp 2021-01-25 19:30:43.427916179 +0000 @@ -41,6 +41,8 @@ # include "adfiles/ad_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/ad_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/ad_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/ad_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/opto/graphKit.cpp 2021-01-25 19:30:44.111923368 +0000 +++ new/src/share/vm/opto/graphKit.cpp 2021-01-25 19:30:43.984922033 +0000 @@ -3848,7 +3848,11 @@ // Smash zero into card if( !UseConcMarkSweepGC ) { +#if defined(AARCH64) + __ store(__ ctrl(), card_adr, zero, bt, adr_type, MemNode::unordered); +#else __ store(__ ctrl(), card_adr, zero, bt, adr_type, MemNode::release); +#endif } else { // Specialized path for CM store barrier __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, bt, adr_type); --- old/src/share/vm/opto/lcm.cpp 2021-01-25 19:30:44.605928560 +0000 +++ new/src/share/vm/opto/lcm.cpp 2021-01-25 19:30:44.481927257 +0000 @@ -36,6 +36,8 @@ # include "adfiles/ad_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/ad_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/ad_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/ad_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/opto/library_call.cpp 2021-01-25 19:30:45.089933647 +0000 +++ new/src/share/vm/opto/library_call.cpp 2021-01-25 19:30:44.953932218 +0000 @@ -2712,6 +2712,9 @@ // and it is not possible to fully distinguish unintended nulls // from intended ones in this API. + Node* load = NULL; + Node* store = NULL; + Node* leading_membar = NULL; if (is_volatile) { // We need to emit leading and trailing CPU membars (see below) in // addition to memory membars when is_volatile. This is a little @@ -2722,10 +2725,10 @@ need_mem_bar = true; // For Stores, place a memory ordering barrier now. if (is_store) { - insert_mem_bar(Op_MemBarRelease); + leading_membar = insert_mem_bar(Op_MemBarRelease); } else { if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - insert_mem_bar(Op_MemBarVolatile); + leading_membar = insert_mem_bar(Op_MemBarVolatile); } } } @@ -2742,7 +2745,7 @@ MemNode::MemOrd mo = is_volatile ? MemNode::acquire : MemNode::unordered; // To be valid, unsafe loads may depend on other conditions than // the one that guards them: pin the Load node - Node* p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile, unaligned, mismatched); + load = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile, unaligned, mismatched); // load value switch (type) { case T_BOOLEAN: @@ -2756,13 +2759,13 @@ break; case T_OBJECT: if (need_read_barrier) { - insert_pre_barrier(heap_base_oop, offset, p, !(is_volatile || need_mem_bar)); + insert_pre_barrier(heap_base_oop, offset, load, !(is_volatile || need_mem_bar)); } break; case T_ADDRESS: // Cast to an int type. - p = _gvn.transform(new (C) CastP2XNode(NULL, p)); - p = ConvX2UL(p); + load = _gvn.transform(new (C) CastP2XNode(NULL, load)); + load = ConvX2UL(load); break; default: fatal(err_msg_res("unexpected type %d: %s", type, type2name(type))); @@ -2772,7 +2775,7 @@ // following nodes will have the control of the MemBarCPUOrder inserted at // the end of this method. So, pushing the load onto the stack at a later // point is fine. - set_result(p); + set_result(load); } else { // place effect of store into memory switch (type) { @@ -2788,18 +2791,20 @@ MemNode::MemOrd mo = is_volatile ? MemNode::release : MemNode::unordered; if (type == T_OBJECT ) { - (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched); + store = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched); } else { - (void) store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile, unaligned, mismatched); + store = store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile, unaligned, mismatched); } } if (is_volatile) { if (!is_store) { - insert_mem_bar(Op_MemBarAcquire); + Node* mb = insert_mem_bar(Op_MemBarAcquire, load); + mb->as_MemBar()->set_trailing_load(); } else { if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { - insert_mem_bar(Op_MemBarVolatile); + Node* mb = insert_mem_bar(Op_MemBarVolatile, store); + MemBarNode::set_store_pair(leading_membar->as_MemBar(), mb->as_MemBar()); } } } @@ -2999,7 +3004,7 @@ // into actual barriers on most machines, but we still need rest of // compiler to respect ordering. - insert_mem_bar(Op_MemBarRelease); + Node* leading_membar = insert_mem_bar(Op_MemBarRelease); insert_mem_bar(Op_MemBarCPUOrder); // 4984716: MemBars must be inserted before this @@ -3098,6 +3103,8 @@ Node* proj = _gvn.transform(new (C) SCMemProjNode(load_store)); set_memory(proj, alias_idx); + Node* access = load_store; + if (type == T_OBJECT && kind == LS_xchg) { #ifdef _LP64 if (adr->bottom_type()->is_ptr_to_narrowoop()) { @@ -3117,7 +3124,8 @@ // Add the trailing membar surrounding the access insert_mem_bar(Op_MemBarCPUOrder); - insert_mem_bar(Op_MemBarAcquire); + Node* mb = insert_mem_bar(Op_MemBarAcquire, access); + MemBarNode::set_load_store_pair(leading_membar->as_MemBar(), mb->as_MemBar()); assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match"); set_result(load_store); @@ -6357,8 +6365,9 @@ type = Type::get_const_basic_type(bt); } + Node* leading_membar = NULL; if (support_IRIW_for_not_multiple_copy_atomic_cpu && is_vol) { - insert_mem_bar(Op_MemBarVolatile); // StoreLoad barrier + leading_membar = insert_mem_bar(Op_MemBarVolatile); // StoreLoad barrier } // Build the load. MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered; @@ -6368,7 +6377,8 @@ // another volatile read. if (is_vol) { // Memory barrier includes bogus read of value to force load BEFORE membar - insert_mem_bar(Op_MemBarAcquire, loadedField); + Node* mb = insert_mem_bar(Op_MemBarAcquire, loadedField); + mb->as_MemBar()->set_trailing_load(); } return loadedField; } --- old/src/share/vm/opto/locknode.hpp 2021-01-25 19:30:45.637939407 +0000 +++ new/src/share/vm/opto/locknode.hpp 2021-01-25 19:30:45.516938135 +0000 @@ -34,6 +34,8 @@ # include "adfiles/ad_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/ad_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/ad_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/ad_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/opto/macro.cpp 2021-01-25 19:30:46.107944347 +0000 +++ new/src/share/vm/opto/macro.cpp 2021-01-25 19:30:45.974942949 +0000 @@ -1385,7 +1385,12 @@ // MemBarStoreStore so that stores that initialize this object // can't be reordered with a subsequent store that makes this // object accessible by other threads. +#ifndef AARCH64 if (init == NULL || (!init->is_complete_with_arraycopy() && !init->does_not_escape())) { +#else + if (!alloc->does_not_escape_thread() && + (init == NULL || !init->is_complete_with_arraycopy())) { +#endif if (init == NULL || init->req() < InitializeNode::RawStores) { // No InitializeNode or no stores captured by zeroing // elimination. Simply add the MemBarStoreStore after object --- old/src/share/vm/opto/matcher.cpp 2021-01-25 19:30:46.597949497 +0000 +++ new/src/share/vm/opto/matcher.cpp 2021-01-25 19:30:46.471948173 +0000 @@ -44,6 +44,8 @@ # include "adfiles/ad_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/ad_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/ad_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/ad_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/opto/memnode.cpp 2021-01-25 19:30:47.098954763 +0000 +++ new/src/share/vm/opto/memnode.cpp 2021-01-25 19:30:46.972953438 +0000 @@ -2551,12 +2551,14 @@ Node* adr = in(MemNode::Address); Node* val = in(MemNode::ValueIn); + Node* result = this; + // Load then Store? Then the Store is useless if (val->is_Load() && val->in(MemNode::Address)->eqv_uncast(adr) && val->in(MemNode::Memory )->eqv_uncast(mem) && val->as_Load()->store_Opcode() == Opcode()) { - return mem; + result = mem; } // Two stores in a row of the same value? @@ -2564,32 +2566,47 @@ mem->in(MemNode::Address)->eqv_uncast(adr) && mem->in(MemNode::ValueIn)->eqv_uncast(val) && mem->Opcode() == Opcode()) { - return mem; + result = mem; } // Store of zero anywhere into a freshly-allocated object? // Then the store is useless. // (It must already have been captured by the InitializeNode.) - if (ReduceFieldZeroing && phase->type(val)->is_zero_type()) { + if (result == this && + ReduceFieldZeroing && phase->type(val)->is_zero_type()) { // a newly allocated object is already all-zeroes everywhere if (mem->is_Proj() && mem->in(0)->is_Allocate()) { - return mem; + result = mem; } - // the store may also apply to zero-bits in an earlier object - Node* prev_mem = find_previous_store(phase); - // Steps (a), (b): Walk past independent stores to find an exact match. - if (prev_mem != NULL) { - Node* prev_val = can_see_stored_value(prev_mem, phase); - if (prev_val != NULL && phase->eqv(prev_val, val)) { - // prev_val and val might differ by a cast; it would be good - // to keep the more informative of the two. - return mem; + if (result == this) { + // the store may also apply to zero-bits in an earlier object + Node* prev_mem = find_previous_store(phase); + // Steps (a), (b): Walk past independent stores to find an exact match. + if (prev_mem != NULL) { + Node* prev_val = can_see_stored_value(prev_mem, phase); + if (prev_val != NULL && phase->eqv(prev_val, val)) { + // prev_val and val might differ by a cast; it would be good + // to keep the more informative of the two. + result = mem; + } } } } - return this; + if (result != this && phase->is_IterGVN() != NULL) { + MemBarNode* trailing = trailing_membar(); + if (trailing != NULL) { +#ifdef ASSERT + const TypeOopPtr* t_oop = phase->type(in(Address))->isa_oopptr(); + assert(t_oop == NULL || t_oop->is_known_instance_field(), "only for non escaping objects"); +#endif + PhaseIterGVN* igvn = phase->is_IterGVN(); + trailing->remove(igvn); + } + } + + return result; } //------------------------------match_edge------------------------------------- @@ -2668,6 +2685,32 @@ return true; } +MemBarNode* StoreNode::trailing_membar() const { + if (is_release()) { + MemBarNode* trailing_mb = NULL; + for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { + Node* u = fast_out(i); + if (u->is_MemBar()) { + if (u->as_MemBar()->trailing_store()) { + assert(u->Opcode() == Op_MemBarVolatile, ""); + assert(trailing_mb == NULL, "only one"); + trailing_mb = u->as_MemBar(); +#ifdef ASSERT + Node* leading = u->as_MemBar()->leading_membar(); + assert(leading->Opcode() == Op_MemBarRelease, "incorrect membar"); + assert(leading->as_MemBar()->leading_store(), "incorrect membar pair"); + assert(leading->as_MemBar()->trailing_membar() == u, "incorrect membar pair"); +#endif + } else { + assert(u->as_MemBar()->standalone(), ""); + } + } + } + return trailing_mb; + } + return NULL; +} + //============================================================================= //------------------------------Ideal------------------------------------------ // If the store is from an AND mask that leaves the low bits untouched, then @@ -2780,6 +2823,30 @@ return true; } +MemBarNode* LoadStoreNode::trailing_membar() const { + MemBarNode* trailing = NULL; + for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { + Node* u = fast_out(i); + if (u->is_MemBar()) { + if (u->as_MemBar()->trailing_load_store()) { + assert(u->Opcode() == Op_MemBarAcquire, ""); + assert(trailing == NULL, "only one"); + trailing = u->as_MemBar(); +#ifdef ASSERT + Node* leading = trailing->leading_membar(); + assert(support_IRIW_for_not_multiple_copy_atomic_cpu || leading->Opcode() == Op_MemBarRelease, "incorrect membar"); + assert(leading->as_MemBar()->leading_load_store(), "incorrect membar pair"); + assert(leading->as_MemBar()->trailing_membar() == trailing, "incorrect membar pair"); +#endif + } else { + assert(u->as_MemBar()->standalone(), "wrong barrier kind"); + } + } + } + + return trailing; +} + uint LoadStoreNode::size_of() const { return sizeof(*this); } //============================================================================= @@ -3014,7 +3081,10 @@ //============================================================================= MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent) : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)), - _adr_type(C->get_adr_type(alias_idx)) + _adr_type(C->get_adr_type(alias_idx)), _kind(Standalone) +#ifdef ASSERT + , _pair_idx(0) +#endif { init_class_id(Class_MemBar); Node* top = C->top(); @@ -3048,6 +3118,21 @@ } } +void MemBarNode::remove(PhaseIterGVN *igvn) { + if (outcnt() != 2) { + return; + } + if (trailing_store() || trailing_load_store()) { + MemBarNode* leading = leading_membar(); + if (leading != NULL) { + assert(leading->trailing_membar() == this, "inconsistent leading/trailing membars"); + leading->remove(igvn); + } + } + igvn->replace_node(proj_out(TypeFunc::Memory), in(TypeFunc::Memory)); + igvn->replace_node(proj_out(TypeFunc::Control), in(TypeFunc::Control)); +} + //------------------------------Ideal------------------------------------------ // Return a node which is more "ideal" than the current node. Strip out // control copies @@ -3094,7 +3179,9 @@ // Final field stores. Node* alloc = AllocateNode::Ideal_allocation(in(MemBarNode::Precedent), phase); if ((alloc != NULL) && alloc->is_Allocate() && - alloc->as_Allocate()->_is_non_escaping) { + AARCH64_ONLY ( alloc->as_Allocate()->does_not_escape_thread() ) + NOT_AARCH64 ( alloc->as_Allocate()->_is_non_escaping ) + ) { // The allocated object does not escape. eliminate = true; } @@ -3102,8 +3189,7 @@ if (eliminate) { // Replace MemBar projections by its inputs. PhaseIterGVN* igvn = phase->is_IterGVN(); - igvn->replace_node(proj_out(TypeFunc::Memory), in(TypeFunc::Memory)); - igvn->replace_node(proj_out(TypeFunc::Control), in(TypeFunc::Control)); + remove(igvn); // Must return either the original node (now dead) or a new node // (Do not return a top here, since that would break the uniqueness of top.) return new (phase->C) ConINode(TypeInt::ZERO); @@ -3132,6 +3218,137 @@ return NULL; } +void MemBarNode::set_store_pair(MemBarNode* leading, MemBarNode* trailing) { + trailing->_kind = TrailingStore; + leading->_kind = LeadingStore; +#ifdef ASSERT + trailing->_pair_idx = leading->_idx; + leading->_pair_idx = leading->_idx; +#endif +} + +void MemBarNode::set_load_store_pair(MemBarNode* leading, MemBarNode* trailing) { + trailing->_kind = TrailingLoadStore; + leading->_kind = LeadingLoadStore; +#ifdef ASSERT + trailing->_pair_idx = leading->_idx; + leading->_pair_idx = leading->_idx; +#endif +} + +MemBarNode* MemBarNode::trailing_membar() const { + ResourceMark rm; + Node* trailing = (Node*)this; + VectorSet seen(Thread::current()->resource_area()); + + Node_Stack multis(0); + do { + Node* c = trailing; + uint i = 0; + do { + trailing = NULL; + for (; i < c->outcnt(); i++) { + Node* next = c->raw_out(i); + if (next != c && next->is_CFG()) { + if (c->is_MultiBranch()) { + if (multis.node() == c) { + multis.set_index(i+1); + } else { + multis.push(c, i+1); + } + } + trailing = next; + break; + } + } + if (trailing != NULL && !seen.test_set(trailing->_idx)) { + break; + } + while (multis.size() > 0) { + c = multis.node(); + i = multis.index(); + if (i < c->req()) { + break; + } + multis.pop(); + } + } while (multis.size() > 0); + } while (!trailing->is_MemBar() || !trailing->as_MemBar()->trailing()); + + MemBarNode* mb = trailing->as_MemBar(); + assert((mb->_kind == TrailingStore && _kind == LeadingStore) || + (mb->_kind == TrailingLoadStore && _kind == LeadingLoadStore), "bad trailing membar"); + assert(mb->_pair_idx == _pair_idx, "bad trailing membar"); + return mb; +} + +MemBarNode* MemBarNode::leading_membar() const { + ResourceMark rm; + VectorSet seen(Thread::current()->resource_area()); + Node_Stack regions(0); + Node* leading = in(0); + while (leading != NULL && (!leading->is_MemBar() || !leading->as_MemBar()->leading())) { + while (leading == NULL || leading->is_top() || seen.test_set(leading->_idx)) { + leading = NULL; + while (regions.size() > 0 && leading == NULL) { + Node* r = regions.node(); + uint i = regions.index(); + if (i < r->req()) { + leading = r->in(i); + regions.set_index(i+1); + } else { + regions.pop(); + } + } + if (leading == NULL) { + assert(regions.size() == 0, "all paths should have been tried"); + return NULL; + } + } + if (leading->is_Region()) { + regions.push(leading, 2); + leading = leading->in(1); + } else { + leading = leading->in(0); + } + } +#ifdef ASSERT + Unique_Node_List wq; + wq.push((Node*)this); + uint found = 0; + for (uint i = 0; i < wq.size(); i++) { + Node* n = wq.at(i); + if (n->is_Region()) { + for (uint j = 1; j < n->req(); j++) { + Node* in = n->in(j); + if (in != NULL && !in->is_top()) { + wq.push(in); + } + } + } else { + if (n->is_MemBar() && n->as_MemBar()->leading()) { + assert(n == leading, "consistency check failed"); + found++; + } else { + Node* in = n->in(0); + if (in != NULL && !in->is_top()) { + wq.push(in); + } + } + } + } + assert(found == 1 || (found == 0 && leading == NULL), "consistency check failed"); +#endif + if (leading == NULL) { + return NULL; + } + MemBarNode* mb = leading->as_MemBar(); + assert((mb->_kind == LeadingStore && _kind == TrailingStore) || + (mb->_kind == LeadingLoadStore && _kind == TrailingLoadStore), "bad leading membar"); + assert(mb->_pair_idx == _pair_idx, "bad leading membar"); + return mb; +} + //===========================InitializeNode==================================== // SUMMARY: // This node acts as a memory barrier on raw memory, after some raw stores. --- old/src/share/vm/opto/memnode.hpp 2021-01-25 19:30:47.603960071 +0000 +++ new/src/share/vm/opto/memnode.hpp 2021-01-25 19:30:47.485958830 +0000 @@ -586,6 +586,8 @@ // have all possible loads of the value stored been optimized away? bool value_never_loaded(PhaseTransform *phase) const; + + MemBarNode* trailing_membar() const; }; //------------------------------StoreBNode------------------------------------- @@ -791,6 +793,7 @@ virtual const class TypePtr *adr_type() const { return _adr_type; } // returns bottom_type of address bool result_not_used() const; + MemBarNode* trailing_membar() const; }; class LoadStoreConditionalNode : public LoadStoreNode { @@ -1044,6 +1047,20 @@ // Memory type this node is serializing. Usually either rawptr or bottom. const TypePtr* _adr_type; + // How is this membar related to a nearby memory access? + enum { + Standalone, + TrailingLoad, + TrailingStore, + LeadingStore, + TrailingLoadStore, + LeadingLoadStore + } _kind; + +#ifdef ASSERT + uint _pair_idx; +#endif + public: enum { Precedent = TypeFunc::Parms // optional edge to force precedence @@ -1061,6 +1078,24 @@ static MemBarNode* make(Compile* C, int opcode, int alias_idx = Compile::AliasIdxBot, Node* precedent = NULL); + + MemBarNode* trailing_membar() const; + MemBarNode* leading_membar() const; + + void set_trailing_load() { _kind = TrailingLoad; } + bool trailing_load() const { return _kind == TrailingLoad; } + bool trailing_store() const { return _kind == TrailingStore; } + bool leading_store() const { return _kind == LeadingStore; } + bool trailing_load_store() const { return _kind == TrailingLoadStore; } + bool leading_load_store() const { return _kind == LeadingLoadStore; } + bool trailing() const { return _kind == TrailingLoad || _kind == TrailingStore || _kind == TrailingLoadStore; } + bool leading() const { return _kind == LeadingStore || _kind == LeadingLoadStore; } + bool standalone() const { return _kind == Standalone; } + + static void set_store_pair(MemBarNode* leading, MemBarNode* trailing); + static void set_load_store_pair(MemBarNode* leading, MemBarNode* trailing); + + void remove(PhaseIterGVN *igvn); }; // "Acquire" - no following ref can move before (but earlier refs can --- old/src/share/vm/opto/output.hpp 2021-01-25 19:30:48.088965168 +0000 +++ new/src/share/vm/opto/output.hpp 2021-01-25 19:30:47.959963812 +0000 @@ -33,6 +33,8 @@ # include "adfiles/ad_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/ad_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/ad_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/ad_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/opto/parse3.cpp 2021-01-25 19:30:48.552970045 +0000 +++ new/src/share/vm/opto/parse3.cpp 2021-01-25 19:30:48.436968826 +0000 @@ -196,6 +196,7 @@ } } + Node* leading_membar = NULL; ciType* field_klass = field->type(); bool is_vol = field->is_volatile(); @@ -228,7 +229,7 @@ type = Type::get_const_basic_type(bt); } if (support_IRIW_for_not_multiple_copy_atomic_cpu && field->is_volatile()) { - insert_mem_bar(Op_MemBarVolatile); // StoreLoad barrier + leading_membar = insert_mem_bar(Op_MemBarVolatile); // StoreLoad barrier } // Build the load. // @@ -272,16 +273,21 @@ // another volatile read. if (field->is_volatile()) { // Memory barrier includes bogus read of value to force load BEFORE membar - insert_mem_bar(Op_MemBarAcquire, ld); + assert(leading_membar == NULL || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected"); + Node* mb = insert_mem_bar(Op_MemBarAcquire, ld); + mb->as_MemBar()->set_trailing_load(); } } void Parse::do_put_xxx(Node* obj, ciField* field, bool is_field) { + Node* leading_membar = NULL; bool is_vol = field->is_volatile(); // If reference is volatile, prevent following memory ops from // floating down past the volatile write. Also prevents commoning // another volatile read. - if (is_vol) insert_mem_bar(Op_MemBarRelease); + if (is_vol) { + leading_membar = insert_mem_bar(Op_MemBarRelease); + } // Compute address and memory type. int offset = field->offset_in_bytes(); @@ -322,7 +328,8 @@ if (is_vol) { // If not multiple copy atomic, we do the MemBarVolatile before the load. if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { - insert_mem_bar(Op_MemBarVolatile); // Use fat membar + Node* mb = insert_mem_bar(Op_MemBarVolatile, store); // Use fat membar + MemBarNode::set_store_pair(leading_membar->as_MemBar(), mb->as_MemBar()); } // Remember we wrote a volatile field. // For not multiple copy atomic cpu (ppc64) a barrier should be issued --- old/src/share/vm/opto/phaseX.hpp 2021-01-25 19:30:49.037975143 +0000 +++ new/src/share/vm/opto/phaseX.hpp 2021-01-25 19:30:48.913973839 +0000 @@ -327,6 +327,8 @@ const Type* limit_type) const { ShouldNotCallThis(); return NULL; } + virtual PhaseIterGVN *is_IterGVN() { return 0; } + #ifndef PRODUCT void dump_old2new_map() const; void dump_new( uint new_lidx ) const; --- old/src/share/vm/opto/regmask.cpp 2021-01-25 19:30:49.496979967 +0000 +++ new/src/share/vm/opto/regmask.cpp 2021-01-25 19:30:49.366978601 +0000 @@ -31,6 +31,8 @@ # include "adfiles/ad_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/ad_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/ad_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/ad_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/opto/regmask.hpp 2021-01-25 19:30:49.988985138 +0000 +++ new/src/share/vm/opto/regmask.hpp 2021-01-25 19:30:49.866983856 +0000 @@ -34,6 +34,8 @@ # include "adfiles/adGlobals_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/adGlobals_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/adGlobals_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/adGlobals_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/opto/runtime.cpp 2021-01-25 19:30:50.476990267 +0000 +++ new/src/share/vm/opto/runtime.cpp 2021-01-25 19:30:50.348988922 +0000 @@ -74,6 +74,8 @@ # include "adfiles/ad_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/ad_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/ad_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/ad_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/prims/jniCheck.cpp 2021-01-25 19:30:50.939995134 +0000 +++ new/src/share/vm/prims/jniCheck.cpp 2021-01-25 19:30:50.821993893 +0000 @@ -40,6 +40,9 @@ #ifdef TARGET_ARCH_x86 # include "jniTypes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "jniTypes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "jniTypes_sparc.hpp" #endif --- old/src/share/vm/prims/jni_md.h 2021-01-25 19:30:51.416000137 +0000 +++ new/src/share/vm/prims/jni_md.h 2021-01-25 19:30:51.298998907 +0000 @@ -27,6 +27,9 @@ #ifdef TARGET_ARCH_x86 # include "jni_x86.h" #endif +#ifdef TARGET_ARCH_aarch64 +# include "jni_aarch64.h" +#endif #ifdef TARGET_ARCH_sparc # include "jni_sparc.h" #endif --- old/src/share/vm/prims/jvmtiClassFileReconstituter.cpp 2021-01-25 19:30:51.906005287 +0000 +++ new/src/share/vm/prims/jvmtiClassFileReconstituter.cpp 2021-01-25 19:30:51.784004004 +0000 @@ -31,6 +31,9 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytes_sparc.hpp" #endif --- old/src/share/vm/prims/jvmtiExport.cpp 2021-01-25 19:30:52.372010185 +0000 +++ new/src/share/vm/prims/jvmtiExport.cpp 2021-01-25 19:30:52.250008902 +0000 @@ -1239,7 +1239,14 @@ } } +#ifdef AARCH64 + // FIXME: this is just a kludge to get JVMTI going. Compiled + // MethodHandle code doesn't call the JVMTI notify routines, so the + // stack depth we see here is wrong. + state->invalidate_cur_stack_depth(); +#else state->decr_cur_stack_depth(); +#endif } --- old/src/share/vm/prims/methodHandles.hpp 2021-01-25 19:30:52.866015377 +0000 +++ new/src/share/vm/prims/methodHandles.hpp 2021-01-25 19:30:52.748014137 +0000 @@ -182,6 +182,9 @@ #ifdef TARGET_ARCH_x86 # include "methodHandles_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "methodHandles_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "methodHandles_sparc.hpp" #endif --- old/src/share/vm/runtime/advancedThresholdPolicy.cpp 2021-01-25 19:30:53.330020254 +0000 +++ new/src/share/vm/runtime/advancedThresholdPolicy.cpp 2021-01-25 19:30:53.204018929 +0000 @@ -57,7 +57,7 @@ FLAG_SET_ERGO(intx, CICompilerCount, c1_count() + c2_count()); // Some inlining tuning -#ifdef X86 +#if defined(X86) || defined(AARCH64) if (FLAG_IS_DEFAULT(InlineSmallCode)) { FLAG_SET_DEFAULT(InlineSmallCode, 2000); } --- old/src/share/vm/runtime/arguments.cpp 2021-01-25 19:30:53.883026066 +0000 +++ new/src/share/vm/runtime/arguments.cpp 2021-01-25 19:30:53.720024353 +0000 @@ -1161,7 +1161,12 @@ } // Increase the code cache size - tiered compiles a lot more. if (FLAG_IS_DEFAULT(ReservedCodeCacheSize)) { +#ifndef AARCH64 FLAG_SET_DEFAULT(ReservedCodeCacheSize, ReservedCodeCacheSize * 5); +#else + FLAG_SET_DEFAULT(ReservedCodeCacheSize, + MIN2(CODE_CACHE_DEFAULT_LIMIT, ReservedCodeCacheSize * 5)); +#endif } if (!UseInterpreter) { // -Xcomp Tier3InvokeNotifyFreqLog = 0; --- old/src/share/vm/runtime/atomic.inline.hpp 2021-01-25 19:30:54.378031269 +0000 +++ new/src/share/vm/runtime/atomic.inline.hpp 2021-01-25 19:30:54.258030007 +0000 @@ -43,6 +43,9 @@ #ifdef TARGET_OS_ARCH_linux_ppc # include "atomic_linux_ppc.inline.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_aarch64 +# include "atomic_linux_aarch64.inline.hpp" +#endif // Solaris #ifdef TARGET_OS_ARCH_solaris_x86 --- old/src/share/vm/runtime/deoptimization.cpp 2021-01-25 19:30:54.861036345 +0000 +++ new/src/share/vm/runtime/deoptimization.cpp 2021-01-25 19:30:54.734035010 +0000 @@ -53,6 +53,9 @@ #ifdef TARGET_ARCH_x86 # include "vmreg_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "vmreg_aarch64.inline.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "vmreg_sparc.inline.hpp" #endif @@ -72,6 +75,8 @@ # include "adfiles/ad_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/ad_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/ad_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/ad_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/runtime/dtraceJSDT.hpp 2021-01-25 19:30:55.364041632 +0000 +++ new/src/share/vm/runtime/dtraceJSDT.hpp 2021-01-25 19:30:55.235040276 +0000 @@ -29,6 +29,9 @@ #ifdef TARGET_ARCH_x86 # include "nativeInst_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "nativeInst_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "nativeInst_sparc.hpp" #endif --- old/src/share/vm/runtime/frame.cpp 2021-01-25 19:30:55.848046719 +0000 +++ new/src/share/vm/runtime/frame.cpp 2021-01-25 19:30:55.727045447 +0000 @@ -49,6 +49,9 @@ #ifdef TARGET_ARCH_x86 # include "nativeInst_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "nativeInst_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "nativeInst_sparc.hpp" #endif --- old/src/share/vm/runtime/frame.hpp 2021-01-25 19:30:56.356052058 +0000 +++ new/src/share/vm/runtime/frame.hpp 2021-01-25 19:30:56.220050629 +0000 @@ -37,6 +37,8 @@ # include "adfiles/adGlobals_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/adGlobals_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/adGlobals_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/adGlobals_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero @@ -487,6 +489,9 @@ #ifdef TARGET_ARCH_x86 # include "frame_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "frame_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "frame_sparc.hpp" #endif --- old/src/share/vm/runtime/frame.inline.hpp 2021-01-25 19:30:56.812056851 +0000 +++ new/src/share/vm/runtime/frame.inline.hpp 2021-01-25 19:30:56.690055569 +0000 @@ -34,6 +34,9 @@ #ifdef TARGET_ARCH_x86 # include "jniTypes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "jniTypes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "jniTypes_sparc.hpp" #endif @@ -97,6 +100,9 @@ #ifdef TARGET_ARCH_x86 # include "frame_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "frame_aarch64.inline.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "frame_sparc.inline.hpp" #endif --- old/src/share/vm/runtime/globals.hpp 2021-01-25 19:30:57.268061644 +0000 +++ new/src/share/vm/runtime/globals.hpp 2021-01-25 19:30:57.146060362 +0000 @@ -40,6 +40,9 @@ #ifdef TARGET_ARCH_x86 # include "globals_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "globals_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "globals_sparc.hpp" #endif @@ -70,6 +73,9 @@ #ifdef TARGET_OS_ARCH_linux_x86 # include "globals_linux_x86.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_aarch64 +# include "globals_linux_aarch64.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_sparc # include "globals_linux_sparc.hpp" #endif @@ -104,6 +110,9 @@ #ifdef TARGET_ARCH_x86 # include "c1_globals_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "c1_globals_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "c1_globals_sparc.hpp" #endif @@ -133,6 +142,9 @@ #ifdef TARGET_ARCH_x86 # include "c2_globals_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "c2_globals_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "c2_globals_sparc.hpp" #endif @@ -3864,7 +3876,7 @@ product(uintx, SharedMiscDataSize, NOT_LP64(2*M) LP64_ONLY(4*M), \ "Size of the shared miscellaneous data area (in bytes)") \ \ - product(uintx, SharedMiscCodeSize, 120*K, \ + product(uintx, SharedMiscCodeSize, AARCH64_ONLY(192*K) NOT_AARCH64(120*K), \ "Size of the shared miscellaneous code area (in bytes)") \ \ product(uintx, SharedBaseAddress, LP64_ONLY(32*G) \ --- old/src/share/vm/runtime/icache.hpp 2021-01-25 19:30:57.786067088 +0000 +++ new/src/share/vm/runtime/icache.hpp 2021-01-25 19:30:57.668065848 +0000 @@ -71,6 +71,9 @@ #ifdef TARGET_ARCH_x86 # include "icache_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "icache_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "icache_sparc.hpp" #endif --- old/src/share/vm/runtime/java.cpp 2021-01-25 19:30:58.245071913 +0000 +++ new/src/share/vm/runtime/java.cpp 2021-01-25 19:30:58.116070557 +0000 @@ -69,6 +69,9 @@ #ifdef TARGET_ARCH_x86 # include "vm_version_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "vm_version_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "vm_version_sparc.hpp" #endif --- old/src/share/vm/runtime/javaCalls.hpp 2021-01-25 19:30:58.712076821 +0000 +++ new/src/share/vm/runtime/javaCalls.hpp 2021-01-25 19:30:58.586075497 +0000 @@ -34,6 +34,9 @@ #ifdef TARGET_ARCH_x86 # include "jniTypes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "jniTypes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "jniTypes_sparc.hpp" #endif --- old/src/share/vm/runtime/javaFrameAnchor.hpp 2021-01-25 19:30:59.187081814 +0000 +++ new/src/share/vm/runtime/javaFrameAnchor.hpp 2021-01-25 19:30:59.063080510 +0000 @@ -80,6 +80,9 @@ #ifdef TARGET_ARCH_x86 # include "javaFrameAnchor_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "javaFrameAnchor_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "javaFrameAnchor_sparc.hpp" #endif --- old/src/share/vm/runtime/orderAccess.inline.hpp 2021-01-25 19:30:59.644086617 +0000 +++ new/src/share/vm/runtime/orderAccess.inline.hpp 2021-01-25 19:30:59.524085356 +0000 @@ -41,6 +41,9 @@ #ifdef TARGET_OS_ARCH_linux_arm # include "orderAccess_linux_arm.inline.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_aarch64 +# include "orderAccess_linux_aarch64.inline.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_ppc # include "orderAccess_linux_ppc.inline.hpp" #endif --- old/src/share/vm/runtime/os.hpp 2021-01-25 19:31:00.128091704 +0000 +++ new/src/share/vm/runtime/os.hpp 2021-01-25 19:30:59.994090296 +0000 @@ -857,6 +857,9 @@ #ifdef TARGET_OS_ARCH_linux_x86 # include "os_linux_x86.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_aarch64 +# include "os_linux_aarch64.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_sparc # include "os_linux_sparc.hpp" #endif --- old/src/share/vm/runtime/prefetch.inline.hpp 2021-01-25 19:31:00.579096444 +0000 +++ new/src/share/vm/runtime/prefetch.inline.hpp 2021-01-25 19:31:00.459095183 +0000 @@ -40,6 +40,9 @@ #ifdef TARGET_OS_ARCH_linux_arm # include "prefetch_linux_arm.inline.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_aarch64 +# include "prefetch_linux_aarch64.inline.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_ppc # include "prefetch_linux_ppc.inline.hpp" #endif --- old/src/share/vm/runtime/registerMap.hpp 2021-01-25 19:31:01.045101342 +0000 +++ new/src/share/vm/runtime/registerMap.hpp 2021-01-25 19:31:00.919100018 +0000 @@ -30,6 +30,9 @@ #ifdef TARGET_ARCH_x86 # include "register_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "register_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "register_sparc.hpp" #endif @@ -138,6 +141,9 @@ #ifdef TARGET_ARCH_x86 # include "registerMap_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "registerMap_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "registerMap_sparc.hpp" #endif --- old/src/share/vm/runtime/relocator.hpp 2021-01-25 19:31:01.503106156 +0000 +++ new/src/share/vm/runtime/relocator.hpp 2021-01-25 19:31:01.382104884 +0000 @@ -30,6 +30,9 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "bytes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytes_sparc.hpp" #endif --- old/src/share/vm/runtime/safepoint.cpp 2021-01-25 19:31:01.975111117 +0000 +++ new/src/share/vm/runtime/safepoint.cpp 2021-01-25 19:31:01.852109824 +0000 @@ -58,6 +58,10 @@ # include "nativeInst_x86.hpp" # include "vmreg_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "nativeInst_aarch64.hpp" +# include "vmreg_aarch64.inline.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "nativeInst_sparc.hpp" # include "vmreg_sparc.inline.hpp" --- old/src/share/vm/runtime/sharedRuntime.cpp 2021-01-25 19:31:02.427115868 +0000 +++ new/src/share/vm/runtime/sharedRuntime.cpp 2021-01-25 19:31:02.307114607 +0000 @@ -62,6 +62,10 @@ # include "nativeInst_x86.hpp" # include "vmreg_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "nativeInst_aarch64.hpp" +# include "vmreg_aarch64.inline.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "nativeInst_sparc.hpp" # include "vmreg_sparc.inline.hpp" --- old/src/share/vm/runtime/stackValueCollection.cpp 2021-01-25 19:31:02.938121239 +0000 +++ new/src/share/vm/runtime/stackValueCollection.cpp 2021-01-25 19:31:02.805119841 +0000 @@ -27,6 +27,9 @@ #ifdef TARGET_ARCH_x86 # include "jniTypes_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "jniTypes_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "jniTypes_sparc.hpp" #endif --- old/src/share/vm/runtime/statSampler.cpp 2021-01-25 19:31:03.409126189 +0000 +++ new/src/share/vm/runtime/statSampler.cpp 2021-01-25 19:31:03.284124875 +0000 @@ -36,6 +36,9 @@ #ifdef TARGET_ARCH_x86 # include "vm_version_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "vm_version_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "vm_version_sparc.hpp" #endif --- old/src/share/vm/runtime/stubRoutines.hpp 2021-01-25 19:31:03.899131339 +0000 +++ new/src/share/vm/runtime/stubRoutines.hpp 2021-01-25 19:31:03.770129984 +0000 @@ -34,6 +34,9 @@ #ifdef TARGET_ARCH_x86 # include "nativeInst_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "nativeInst_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "nativeInst_sparc.hpp" #endif @@ -105,6 +108,8 @@ # include "stubRoutines_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "stubRoutines_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "stubRoutines_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "stubRoutines_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/runtime/thread.hpp 2021-01-25 19:31:04.352136101 +0000 +++ new/src/share/vm/runtime/thread.hpp 2021-01-25 19:31:04.232134839 +0000 @@ -1043,7 +1043,7 @@ address last_Java_pc(void) { return _anchor.last_Java_pc(); } // Safepoint support -#ifndef PPC64 +#if !(defined(PPC64) || defined(AARCH64)) JavaThreadState thread_state() const { return _thread_state; } void set_thread_state(JavaThreadState s) { _thread_state = s; } #else @@ -1701,6 +1701,9 @@ #ifdef TARGET_OS_ARCH_linux_x86 # include "thread_linux_x86.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_aarch64 +# include "thread_linux_aarch64.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_sparc # include "thread_linux_sparc.hpp" #endif --- old/src/share/vm/runtime/thread.inline.hpp 2021-01-25 19:31:04.835141177 +0000 +++ new/src/share/vm/runtime/thread.inline.hpp 2021-01-25 19:31:04.705139811 +0000 @@ -59,7 +59,7 @@ return allocated_bytes; } -#ifdef PPC64 +#if defined(PPC64) || defined (AARCH64) inline JavaThreadState JavaThread::thread_state() const { return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state); } --- old/src/share/vm/runtime/threadLocalStorage.hpp 2021-01-25 19:31:05.300146065 +0000 +++ new/src/share/vm/runtime/threadLocalStorage.hpp 2021-01-25 19:31:05.177144772 +0000 @@ -51,6 +51,9 @@ #ifdef TARGET_OS_ARCH_linux_x86 # include "threadLS_linux_x86.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_aarch64 +# include "threadLS_linux_aarch64.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_sparc # include "threadLS_linux_sparc.hpp" #endif --- old/src/share/vm/runtime/vframeArray.cpp 2021-01-25 19:31:05.782151131 +0000 +++ new/src/share/vm/runtime/vframeArray.cpp 2021-01-25 19:31:05.658149827 +0000 @@ -477,7 +477,7 @@ // Copy registers for callee-saved registers if (reg_map != NULL) { for(int i = 0; i < RegisterMap::reg_count; i++) { -#ifdef AMD64 +#if defined(AMD64) || defined(AARCH64) // The register map has one entry for every int (32-bit value), so // 64-bit physical registers have two entries in the map, one for // each half. Ignore the high halves of 64-bit registers, just like --- old/src/share/vm/runtime/vmStructs.cpp 2021-01-25 19:31:06.272156281 +0000 +++ new/src/share/vm/runtime/vmStructs.cpp 2021-01-25 19:31:06.139154883 +0000 @@ -107,6 +107,9 @@ #ifdef TARGET_ARCH_x86 # include "vmStructs_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "vmStructs_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "vmStructs_sparc.hpp" #endif @@ -122,6 +125,9 @@ #ifdef TARGET_OS_ARCH_linux_x86 # include "vmStructs_linux_x86.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_aarch64 +# include "vmStructs_linux_aarch64.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_sparc # include "vmStructs_linux_sparc.hpp" #endif @@ -194,6 +200,8 @@ # include "adfiles/adGlobals_x86_32.hpp" #elif defined TARGET_ARCH_MODEL_x86_64 # include "adfiles/adGlobals_x86_64.hpp" +#elif defined TARGET_ARCH_MODEL_aarch64 +# include "adfiles/adGlobals_aarch64.hpp" #elif defined TARGET_ARCH_MODEL_sparc # include "adfiles/adGlobals_sparc.hpp" #elif defined TARGET_ARCH_MODEL_zero --- old/src/share/vm/runtime/vm_version.cpp 2021-01-25 19:31:06.802161851 +0000 +++ new/src/share/vm/runtime/vm_version.cpp 2021-01-25 19:31:06.672160485 +0000 @@ -29,6 +29,9 @@ #ifdef TARGET_ARCH_x86 # include "vm_version_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "vm_version_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "vm_version_sparc.hpp" #endif @@ -194,6 +197,7 @@ #define CPU IA32_ONLY("x86") \ IA64_ONLY("ia64") \ AMD64_ONLY("amd64") \ + AARCH64_ONLY("aarch64") \ SPARC_ONLY("sparc") #endif // ZERO #endif --- old/src/share/vm/utilities/copy.hpp 2021-01-25 19:31:07.263166697 +0000 +++ new/src/share/vm/utilities/copy.hpp 2021-01-25 19:31:07.144165446 +0000 @@ -335,6 +335,9 @@ #ifdef TARGET_ARCH_x86 # include "copy_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "copy_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "copy_sparc.hpp" #endif --- old/src/share/vm/utilities/globalDefinitions.hpp 2021-01-25 19:31:07.724171542 +0000 +++ new/src/share/vm/utilities/globalDefinitions.hpp 2021-01-25 19:31:07.596170197 +0000 @@ -427,9 +427,17 @@ ProfileRTM = 0x0 // Use RTM with abort ratio calculation }; +// The maximum size of the code cache. Can be overridden by targets. +#define CODE_CACHE_SIZE_LIMIT (2*G) +// Allow targets to reduce the default size of the code cache. +#define CODE_CACHE_DEFAULT_LIMIT CODE_CACHE_SIZE_LIMIT + #ifdef TARGET_ARCH_x86 # include "globalDefinitions_x86.hpp" #endif +#ifdef TARGET_ARCH_aarch64 +# include "globalDefinitions_aarch64.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "globalDefinitions_sparc.hpp" #endif --- old/src/share/vm/utilities/macros.hpp 2021-01-25 19:31:08.208176629 +0000 +++ new/src/share/vm/utilities/macros.hpp 2021-01-25 19:31:08.085175336 +0000 @@ -354,6 +354,14 @@ #define NOT_AMD64(code) code #endif +#ifdef AARCH64 +#define AARCH64_ONLY(code) code +#define NOT_AARCH64(code) +#else +#define AARCH64_ONLY(code) +#define NOT_AARCH64(code) code +#endif + #ifdef SPARC #define SPARC_ONLY(code) code #define NOT_SPARC(code) --- old/test/compiler/codegen/IntRotateWithImmediate.java 2021-01-25 19:31:08.701181811 +0000 +++ new/test/compiler/codegen/IntRotateWithImmediate.java 2021-01-25 19:31:08.570180434 +0000 @@ -1,5 +1,6 @@ /* * Copyright 2015 SAP AG. All Rights Reserved. + * Copyright (c) 2016, Red Hat, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,6 +25,7 @@ /* * @test * @bug 8080190 + * @bug 8154537 * @key regression * @summary Test that the rotate distance used in the rotate instruction is properly masked with 0x1f * @run main/othervm -Xbatch -XX:-UseOnStackReplacement IntRotateWithImmediate @@ -33,7 +35,7 @@ public class IntRotateWithImmediate { // This is currently the same as Integer.rotateRight() - static int rotateRight(int i, int distance) { + static int rotateRight1(int i, int distance) { // On some architectures (i.e. x86_64 and ppc64) the following computation is // matched in the .ad file into a single MachNode which emmits a single rotate // machine instruction. It is important that the shift amount is masked to match @@ -43,17 +45,29 @@ return ((i >>> distance) | (i << -distance)); } - static int compute(int x) { - return rotateRight(x, 3); + static int rotateRight2(int i, int distance) { + return ((i >>> distance) | (i << (32-distance))); + } + + static int compute1(int x) { + return rotateRight1(x, 3); + } + + static int compute2(int x) { + return rotateRight2(x, 3); } public static void main(String args[]) { int val = 4096; - int firstResult = compute(val); + int firstResult = compute1(val); for (int i = 0; i < 100000; i++) { - int newResult = compute(val); + int newResult = compute1(val); + if (firstResult != newResult) { + throw new InternalError(firstResult + " != " + newResult); + } + newResult = compute2(val); if (firstResult != newResult) { throw new InternalError(firstResult + " != " + newResult); } --- old/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java 2021-01-25 19:31:09.213187192 +0000 +++ new/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java 2021-01-25 19:31:09.094185942 +0000 @@ -129,7 +129,7 @@ @Override protected boolean isIntrinsicSupported() { - return isServerVM() && Boolean.valueOf(useMathExactIntrinsics) && (Platform.isX86() || Platform.isX64()); + return isServerVM() && Boolean.valueOf(useMathExactIntrinsics) && (Platform.isX86() || Platform.isX64() || Platform.isAArch64()); } @Override @@ -145,7 +145,7 @@ @Override protected boolean isIntrinsicSupported() { - return isServerVM() && Boolean.valueOf(useMathExactIntrinsics) && Platform.isX64(); + return isServerVM() && Boolean.valueOf(useMathExactIntrinsics) && (Platform.isX64() || Platform.isAArch64()); } @Override --- old/test/compiler/intrinsics/multiplytolen/TestMultiplyToLen.java 2021-01-25 19:31:09.712192437 +0000 +++ new/test/compiler/intrinsics/multiplytolen/TestMultiplyToLen.java 2021-01-25 19:31:09.586191113 +0000 @@ -34,6 +34,7 @@ * -XX:CompileCommand=inline,java.math.BigInteger::multiply TestMultiplyToLen */ +import java.util.Arrays; import java.util.Random; import java.math.*; @@ -97,12 +98,36 @@ newsum = newsum.add(newres); if (!bytecompare(oldres,newres)) { + System.out.println(b1); + System.out.println(b2); + System.out.print("mismatch for:b1:" + stringify(b1) + " :b2:" + stringify(b2) + " :oldres:" + stringify(oldres) + " :newres:" + stringify(newres)); + throw new Exception("Failed"); + } + } + + // Test carry propagation. Multiple carries during bignum + // multiplication are rare (especially when using 64-bit + // arithmetic) so we have to provoke them deliberately. + for (int j = 4; j <= 396; j += 4) { + byte[] bytes = new byte[j]; + Arrays.fill(bytes, (byte)255); + b1 = new BigInteger(bytes); + b2 = new BigInteger(bytes); + + oldres = base_multiply(b1,b2); + newres = new_multiply(b1,b2); + + oldsum = oldsum.add(oldres); + newsum = newsum.add(newres); + + if (!bytecompare(oldres,newres)) { System.out.print("mismatch for:b1:" + stringify(b1) + " :b2:" + stringify(b2) + " :oldres:" + stringify(oldres) + " :newres:" + stringify(newres)); System.out.println(b1); System.out.println(b2); throw new Exception("Failed"); } } + if (!bytecompare(oldsum,newsum)) { System.out.println("Failure: oldsum:" + stringify(oldsum) + " newsum:" + stringify(newsum)); throw new Exception("Failed"); --- old/test/compiler/intrinsics/sha/cli/SHAOptionsBase.java 2021-01-25 19:31:10.196197524 +0000 +++ new/test/compiler/intrinsics/sha/cli/SHAOptionsBase.java 2021-01-25 19:31:10.068196179 +0000 @@ -95,6 +95,19 @@ default: throw new Error("Unexpected option " + optionName); } + } else if (Platform.isAArch64()) { + switch (optionName) { + case SHAOptionsBase.USE_SHA_OPTION: + return SHAOptionsBase.SHA_INSTRUCTIONS_ARE_NOT_AVAILABLE; + case SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION: + return SHAOptionsBase.SHA1_INSTRUCTION_IS_NOT_AVAILABLE; + case SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION: + return SHAOptionsBase.SHA256_INSTRUCTION_IS_NOT_AVAILABLE; + case SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION: + return SHAOptionsBase.SHA512_INSTRUCTION_IS_NOT_AVAILABLE; + default: + throw new Error("Unexpected option " + optionName); + } } else { throw new Error("Support for CPUs other then X86 or SPARC is not " + "implemented."); --- old/test/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnSupportedCPU.java 2021-01-25 19:31:10.696202779 +0000 +++ new/test/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnSupportedCPU.java 2021-01-25 19:31:10.557201318 +0000 @@ -34,7 +34,10 @@ */ public class TestUseSHA1IntrinsicsOptionOnSupportedCPU { public static void main(String args[]) throws Throwable { - new SHAOptionsBase(new GenericTestCaseForSupportedSparcCPU( - SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION)).test(); + new SHAOptionsBase( + new GenericTestCaseForSupportedSparcCPU( + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new GenericTestCaseForSupportedAArch64CPU( + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION)).test(); } } --- old/test/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java 2021-01-25 19:31:11.149207541 +0000 +++ new/test/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java 2021-01-25 19:31:11.030206290 +0000 @@ -40,6 +40,8 @@ SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), new UseSHAIntrinsicsSpecificTestCaseForUnsupportedSparcCPU( SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedX86CPU( SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU( --- old/test/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnSupportedCPU.java 2021-01-25 19:31:11.600212281 +0000 +++ new/test/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnSupportedCPU.java 2021-01-25 19:31:11.483211051 +0000 @@ -35,7 +35,10 @@ */ public class TestUseSHA256IntrinsicsOptionOnSupportedCPU { public static void main(String args[]) throws Throwable { - new SHAOptionsBase(new GenericTestCaseForSupportedSparcCPU( - SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION)).test(); + new SHAOptionsBase( + new GenericTestCaseForSupportedSparcCPU( + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new GenericTestCaseForSupportedAArch64CPU( + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION)).test(); } } --- old/test/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java 2021-01-25 19:31:12.089217420 +0000 +++ new/test/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java 2021-01-25 19:31:11.956216022 +0000 @@ -40,6 +40,8 @@ SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), new UseSHAIntrinsicsSpecificTestCaseForUnsupportedSparcCPU( SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedX86CPU( SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU( --- old/test/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnSupportedCPU.java 2021-01-25 19:31:12.547222234 +0000 +++ new/test/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnSupportedCPU.java 2021-01-25 19:31:12.416220857 +0000 @@ -35,7 +35,10 @@ */ public class TestUseSHA512IntrinsicsOptionOnSupportedCPU { public static void main(String args[]) throws Throwable { - new SHAOptionsBase(new GenericTestCaseForSupportedSparcCPU( - SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION)).test(); + new SHAOptionsBase( + new GenericTestCaseForSupportedSparcCPU( + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new GenericTestCaseForSupportedAArch64CPU( + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION)).test(); } } --- old/test/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java 2021-01-25 19:31:13.018227185 +0000 +++ new/test/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java 2021-01-25 19:31:12.898225923 +0000 @@ -40,6 +40,8 @@ SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), new UseSHAIntrinsicsSpecificTestCaseForUnsupportedSparcCPU( SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedX86CPU( SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU( --- old/test/compiler/intrinsics/sha/cli/TestUseSHAOptionOnSupportedCPU.java 2021-01-25 19:31:13.455231778 +0000 +++ new/test/compiler/intrinsics/sha/cli/TestUseSHAOptionOnSupportedCPU.java 2021-01-25 19:31:13.336230527 +0000 @@ -38,6 +38,8 @@ new GenericTestCaseForSupportedSparcCPU( SHAOptionsBase.USE_SHA_OPTION), new UseSHASpecificTestCaseForSupportedSparcCPU( + SHAOptionsBase.USE_SHA_OPTION), + new GenericTestCaseForSupportedAArch64CPU( SHAOptionsBase.USE_SHA_OPTION)).test(); } } --- old/test/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java 2021-01-25 19:31:13.968237170 +0000 +++ new/test/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java 2021-01-25 19:31:13.839235814 +0000 @@ -39,6 +39,8 @@ SHAOptionsBase.USE_SHA_OPTION), new UseSHASpecificTestCaseForUnsupportedSparcCPU( SHAOptionsBase.USE_SHA_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA_OPTION), new GenericTestCaseForUnsupportedX86CPU( SHAOptionsBase.USE_SHA_OPTION), new GenericTestCaseForOtherCPU( --- old/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java 2021-01-25 19:31:14.434242068 +0000 +++ new/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java 2021-01-25 19:31:14.317240838 +0000 @@ -36,8 +36,9 @@ public GenericTestCaseForOtherCPU(String optionName) { // Execute the test case on any CPU except SPARC and X86 super(optionName, new NotPredicate(new OrPredicate(Platform::isSparc, + new OrPredicate(Platform::isAArch64, new OrPredicate(Platform::isPPC, - new OrPredicate(Platform::isX64, Platform::isX86))))); + new OrPredicate(Platform::isX64, Platform::isX86)))))); } @Override --- old/test/compiler/stable/StableConfiguration.java 2021-01-25 19:31:14.898246944 +0000 +++ new/test/compiler/stable/StableConfiguration.java 2021-01-25 19:31:14.772245620 +0000 @@ -41,10 +41,30 @@ System.out.println("Server Compiler: " + get()); } + // The method 'get' below returns true if the method is server compiled + // and is used by the Stable tests to determine whether methods in + // general are being server compiled or not as the -XX:+FoldStableValues + // option is only applicable to -server. + // + // On aarch64 we DeOptimize when patching. This means that when the + // method is compiled as a result of -Xcomp it DeOptimizes immiediately. + // The result is that getMethodCompilationLevel returns 0. This means + // the method returns true based on java.vm.name. + // + // However when the tests are run with -XX:+TieredCompilation and + // -XX:TieredStopAtLevel=1 this fails because methods will always + // be client compiled. + // + // Solution is to add a simple method 'get1' which should never be + // DeOpted and use that to determine the compilation level instead. + static void get1() { + } + // ::get() is among immediately compiled methods. static boolean get() { try { - Method m = StableConfiguration.class.getDeclaredMethod("get"); + get1(); + Method m = StableConfiguration.class.getDeclaredMethod("get1"); int level = WB.getMethodCompilationLevel(m); if (level > 0) { return (level == 4); --- old/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java 2021-01-25 19:31:15.368251884 +0000 +++ new/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java 2021-01-25 19:31:15.238250518 +0000 @@ -59,24 +59,32 @@ }; public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE - = new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, - null); + = new OrPredicate( + new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, + null), + new CPUSpecificPredicate("aarch64", new String[] { "sha1" }, + null)); public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE - = new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, + = new OrPredicate(new CPUSpecificPredicate("aarch64", new String[] { "sha256" }, + null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, null), new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, - null))); + null)))); public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE - = new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha512" }, - null), - new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, + = new OrPredicate( + new CPUSpecificPredicate("aarch64", new String[] { "sha512" }, + null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha512" }, + null), + new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), - new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, - null))); + new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, + null)))); public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE, --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionAARCH64.java 2021-01-25 19:31:15.719255574 +0000 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger; + +public class MachineDescriptionAARCH64 extends MachineDescriptionTwosComplement implements MachineDescription { + public long getAddressSize() { + return 8; + } + + public boolean isLP64() { + return true; + } + + public boolean isBigEndian() { + return false; + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/debugger/aarch64/AARCH64ThreadContext.java 2021-01-25 19:31:16.178260398 +0000 @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.cdbg.*; + +/** Specifies the thread context on aarch64 platforms; only a sub-portion + * of the context is guaranteed to be present on all operating + * systems. */ + +public abstract class AARCH64ThreadContext implements ThreadContext { + // Taken from /usr/include/asm/sigcontext.h on Linux/AARCH64. + + // NOTE: the indices for the various registers must be maintained as + // listed across various operating systems. However, only a small + // subset of the registers' values are guaranteed to be present (and + // must be present for the SA's stack walking to work) + + public static final int R0 = 0; + public static final int R1 = 1; + public static final int R2 = 2; + public static final int R3 = 3; + public static final int R4 = 4; + public static final int R5 = 5; + public static final int R6 = 6; + public static final int R7 = 7; + public static final int R8 = 8; + public static final int R9 = 9; + public static final int R10 = 10; + public static final int R11 = 11; + public static final int R12 = 12; + public static final int R13 = 13; + public static final int R14 = 14; + public static final int R15 = 15; + public static final int R16 = 16; + public static final int R17 = 17; + public static final int R18 = 18; + public static final int R19 = 19; + public static final int R20 = 20; + public static final int R21 = 21; + public static final int R22 = 22; + public static final int R23 = 23; + public static final int R24 = 24; + public static final int R25 = 25; + public static final int R26 = 26; + public static final int R27 = 27; + public static final int R28 = 28; + public static final int FP = 29; + public static final int LR = 30; + public static final int SP = 31; + public static final int PC = 32; + + public static final int NPRGREG = 33; + + private long[] data; + + public AARCH64ThreadContext() { + data = new long[NPRGREG]; + } + + public int getNumRegisters() { + return NPRGREG; + } + + public String getRegisterName(int index) { + switch (index) { + case LR: return "lr"; + case SP: return "sp"; + case PC: return "pc"; + default: + return "r" + index; + } + } + + public void setRegister(int index, long value) { + data[index] = value; + } + + public long getRegister(int index) { + return data[index]; + } + + public CFrame getTopFrame(Debugger dbg) { + return null; + } + + /** This can't be implemented in this class since we would have to + * tie the implementation to, for example, the debugging system */ + public abstract void setRegisterAsAddress(int index, Address value); + + /** This can't be implemented in this class since we would have to + * tie the implementation to, for example, the debugging system */ + public abstract Address getRegisterAsAddress(int index); +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/aarch64/LinuxAARCH64CFrame.java 2021-01-25 19:31:16.596264791 +0000 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.linux.*; +import sun.jvm.hotspot.debugger.cdbg.*; +import sun.jvm.hotspot.debugger.cdbg.basic.*; + +final public class LinuxAARCH64CFrame extends BasicCFrame { + public LinuxAARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) { + super(dbg.getCDebugger()); + this.fp = fp; + this.pc = pc; + this.dbg = dbg; + } + + // override base class impl to avoid ELF parsing + public ClosestSymbol closestSymbolToPC() { + // try native lookup in debugger. + return dbg.lookup(dbg.getAddressValue(pc())); + } + + public Address pc() { + return pc; + } + + public Address localVariableBase() { + return fp; + } + + public CFrame sender(ThreadProxy thread) { + AARCH64ThreadContext context = (AARCH64ThreadContext) thread.getContext(); + Address rsp = context.getRegisterAsAddress(AARCH64ThreadContext.SP); + + if ((fp == null) || fp.lessThan(rsp)) { + return null; + } + + // Check alignment of fp + if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) { + return null; + } + + Address nextFP = fp.getAddressAt(0 * ADDRESS_SIZE); + if (nextFP == null || nextFP.lessThanOrEqual(fp)) { + return null; + } + Address nextPC = fp.getAddressAt(1 * ADDRESS_SIZE); + if (nextPC == null) { + return null; + } + return new LinuxAARCH64CFrame(dbg, nextFP, nextPC); + } + + // package/class internals only + private static final int ADDRESS_SIZE = 8; + private Address pc; + private Address sp; + private Address fp; + private LinuxDebugger dbg; +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/aarch64/LinuxAARCH64ThreadContext.java 2021-01-25 19:31:17.051269573 +0000 @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.linux.*; + +public class LinuxAARCH64ThreadContext extends AARCH64ThreadContext { + private LinuxDebugger debugger; + + public LinuxAARCH64ThreadContext(LinuxDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/aarch64/ProcAARCH64Thread.java 2021-01-25 19:31:17.486274146 +0000 @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.proc.*; +import sun.jvm.hotspot.utilities.*; + +public class ProcAARCH64Thread implements ThreadProxy { + private ProcDebugger debugger; + private int id; + + public ProcAARCH64Thread(ProcDebugger debugger, Address addr) { + this.debugger = debugger; + + // FIXME: the size here should be configurable. However, making it + // so would produce a dependency on the "types" package from the + // debugger package, which is not desired. + this.id = (int) addr.getCIntegerAt(0, 4, true); + } + + public ProcAARCH64Thread(ProcDebugger debugger, long id) { + this.debugger = debugger; + this.id = (int) id; + } + + public ThreadContext getContext() throws IllegalThreadStateException { + ProcAARCH64ThreadContext context = new ProcAARCH64ThreadContext(debugger); + long[] regs = debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == AARCH64ThreadContext.NPRGREG, "size mismatch"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } + + public boolean canSetContext() throws DebuggerException { + return false; + } + + public void setContext(ThreadContext context) + throws IllegalThreadStateException, DebuggerException { + throw new DebuggerException("Unimplemented"); + } + + public String toString() { + return "t@" + id; + } + + public boolean equals(Object obj) { + if ((obj == null) || !(obj instanceof ProcAARCH64Thread)) { + return false; + } + + return (((ProcAARCH64Thread) obj).id == id); + } + + public int hashCode() { + return id; + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/aarch64/ProcAARCH64ThreadContext.java 2021-01-25 19:31:17.896278455 +0000 @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcAARCH64ThreadContext extends AARCH64ThreadContext { + private ProcDebugger debugger; + + public ProcAARCH64ThreadContext(ProcDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/aarch64/ProcAARCH64ThreadFactory.java 2021-01-25 19:31:18.345283174 +0000 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcAARCH64ThreadFactory implements ProcThreadFactory { + private ProcDebugger debugger; + + public ProcAARCH64ThreadFactory(ProcDebugger debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new ProcAARCH64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new ProcAARCH64Thread(debugger, id); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/aarch64/RemoteAARCH64Thread.java 2021-01-25 19:31:18.812288082 +0000 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.remote.*; +import sun.jvm.hotspot.utilities.*; + +public class RemoteAARCH64Thread extends RemoteThread { + public RemoteAARCH64Thread(RemoteDebuggerClient debugger, Address addr) { + super(debugger, addr); + } + + public RemoteAARCH64Thread(RemoteDebuggerClient debugger, long id) { + super(debugger, id); + } + + public ThreadContext getContext() throws IllegalThreadStateException { + RemoteAARCH64ThreadContext context = new RemoteAARCH64ThreadContext(debugger); + long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : + debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == AARCH64ThreadContext.NPRGREG, "size of register set must match"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/aarch64/RemoteAARCH64ThreadContext.java 2021-01-25 19:31:19.269292886 +0000 @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteAARCH64ThreadContext extends AARCH64ThreadContext { + private RemoteDebuggerClient debugger; + + public RemoteAARCH64ThreadContext(RemoteDebuggerClient debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/aarch64/RemoteAARCH64ThreadFactory.java 2021-01-25 19:31:19.753297973 +0000 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteAARCH64ThreadFactory implements RemoteThreadFactory { + private RemoteDebuggerClient debugger; + + public RemoteAARCH64ThreadFactory(RemoteDebuggerClient debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new RemoteAARCH64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new RemoteAARCH64Thread(debugger, id); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64CurrentFrameGuess.java 2021-01-25 19:31:20.243303123 +0000 @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.interpreter.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.aarch64.*; + +/**

Should be able to be used on all aarch64 platforms we support + (Linux/aarch64) to implement JavaThread's "currentFrameGuess()" + functionality. Input is an AARCH64ThreadContext; output is SP, FP, + and PC for an AARCH64Frame. Instantiation of the AARCH64Frame is + left to the caller, since we may need to subclass AARCH64Frame to + support signal handler frames on Unix platforms.

+ +

Algorithm is to walk up the stack within a given range (say, + 512K at most) looking for a plausible PC and SP for a Java frame, + also considering those coming in from the context. If we find a PC + that belongs to the VM (i.e., in generated code like the + interpreter or CodeCache) then we try to find an associated FP. + We repeat this until we either find a complete frame or run out of + stack to look at.

*/ + +public class AARCH64CurrentFrameGuess { + private AARCH64ThreadContext context; + private JavaThread thread; + private Address spFound; + private Address fpFound; + private Address pcFound; + + private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.aarch64.AARCH64Frame.DEBUG") + != null; + + public AARCH64CurrentFrameGuess(AARCH64ThreadContext context, + JavaThread thread) { + this.context = context; + this.thread = thread; + } + + /** Returns false if not able to find a frame within a reasonable range. */ + public boolean run(long regionInBytesToSearch) { + Address sp = context.getRegisterAsAddress(AARCH64ThreadContext.SP); + Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); + Address fp = context.getRegisterAsAddress(AARCH64ThreadContext.FP); + if (sp == null) { + // Bail out if no last java frame either + if (thread.getLastJavaSP() != null) { + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + return false; + } + Address end = sp.addOffsetTo(regionInBytesToSearch); + VM vm = VM.getVM(); + + setValues(null, null, null); // Assume we're not going to find anything + + if (vm.isJavaPCDbg(pc)) { + if (vm.isClientCompiler()) { + // If the topmost frame is a Java frame, we are (pretty much) + // guaranteed to have a viable FP. We should be more robust + // than this (we have the potential for losing entire threads' + // stack traces) but need to see how much work we really have + // to do here. Searching the stack for an (SP, FP) pair is + // hard since it's easy to misinterpret inter-frame stack + // pointers as base-of-frame pointers; we also don't know the + // sizes of C1 frames (not registered in the nmethod) so can't + // derive them from SP. + + setValues(sp, fp, pc); + return true; + } else { + if (vm.getInterpreter().contains(pc)) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + + sp + ", fp = " + fp + ", pc = " + pc); + } + setValues(sp, fp, pc); + return true; + } + + // For the server compiler, FP is not guaranteed to be valid + // for compiled code. In addition, an earlier attempt at a + // non-searching algorithm (see below) failed because the + // stack pointer from the thread context was pointing + // (considerably) beyond the ostensible end of the stack, into + // garbage; walking from the topmost frame back caused a crash. + // + // This algorithm takes the current PC as a given and tries to + // find the correct corresponding SP by walking up the stack + // and repeatedly performing stackwalks (very inefficient). + // + // FIXME: there is something wrong with stackwalking across + // adapter frames...this is likely to be the root cause of the + // failure with the simpler algorithm below. + + for (long offset = 0; + offset < regionInBytesToSearch; + offset += vm.getAddressSize()) { + try { + Address curSP = sp.addOffsetTo(offset); + Frame frame = new AARCH64Frame(curSP, null, pc); + RegisterMap map = thread.newRegisterMap(false); + while (frame != null) { + if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { + // We were able to traverse all the way to the + // bottommost Java frame. + // This sp looks good. Keep it. + if (DEBUG) { + System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); + } + setValues(curSP, null, pc); + return true; + } + frame = frame.sender(map); + } + } catch (Exception e) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); + } + // Bad SP. Try another. + } + } + + // Were not able to find a plausible SP to go with this PC. + // Bail out. + return false; + + /* + // Original algorithm which does not work because SP was + // pointing beyond where it should have: + + // For the server compiler, FP is not guaranteed to be valid + // for compiled code. We see whether the PC is in the + // interpreter and take care of that, otherwise we run code + // (unfortunately) duplicated from AARCH64Frame.senderForCompiledFrame. + + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + + // See if we can derive a frame pointer from SP and PC + // NOTE: This is the code duplicated from AARCH64Frame + Address saved_fp = null; + int llink_offset = cb.getLinkOffset(); + if (llink_offset >= 0) { + // Restore base-pointer, since next frame might be an interpreter frame. + Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); + saved_fp = fp_addr.getAddressAt(0); + } + + setValues(sp, saved_fp, pc); + return true; + } + */ + } + } else { + // If the current program counter was not known to us as a Java + // PC, we currently assume that we are in the run-time system + // and attempt to look to thread-local storage for saved SP and + // FP. Note that if these are null (because we were, in fact, + // in Java code, i.e., vtable stubs or similar, and the SA + // didn't have enough insight into the target VM to understand + // that) then we are going to lose the entire stack trace for + // the thread, which is sub-optimal. FIXME. + + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + + thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); + } + if (thread.getLastJavaSP() == null) { + return false; // No known Java frames on stack + } + + // The runtime has a nasty habit of not saving fp in the frame + // anchor, leaving us to grovel about in the stack to find a + // plausible address. Fortunately, this only happens in + // compiled code; there we always have a valid PC, and we always + // push LR and FP onto the stack as a pair, with FP at the lower + // address. + pc = thread.getLastJavaPC(); + fp = thread.getLastJavaFP(); + sp = thread.getLastJavaSP(); + + if (fp == null) { + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + if (DEBUG) { + System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); + } + // See if we can derive a frame pointer from SP and PC + long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); + if (link_offset >= 0) { + fp = sp.addOffsetTo(link_offset); + } + } + } + + // We found a PC in the frame anchor. Check that it's plausible, and + // if it is, use it. + if (vm.isJavaPCDbg(pc)) { + setValues(sp, fp, pc); + } else { + setValues(sp, fp, null); + } + + return true; + } + } + + public Address getSP() { return spFound; } + public Address getFP() { return fpFound; } + /** May be null if getting values from thread-local storage; take + care to call the correct AARCH64Frame constructor to recover this if + necessary */ + public Address getPC() { return pcFound; } + + private void setValues(Address sp, Address fp, Address pc) { + spFound = sp; + fpFound = fp; + pcFound = pc; + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64Frame.java 2021-01-25 19:31:20.676307674 +0000 @@ -0,0 +1,563 @@ +/* + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.aarch64; + +import java.util.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.compiler.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.oops.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +/** Specialization of and implementation of abstract methods of the + Frame class for the aarch64 family of CPUs. */ + +public class AARCH64Frame extends Frame { + private static final boolean DEBUG; + static { + DEBUG = System.getProperty("sun.jvm.hotspot.runtime.aarch64.AARCH64Frame.DEBUG") != null; + } + + // All frames + private static final int LINK_OFFSET = 0; + private static final int RETURN_ADDR_OFFSET = 1; + private static final int SENDER_SP_OFFSET = 2; + + // Interpreter frames + private static final int INTERPRETER_FRAME_MIRROR_OFFSET = 2; // for native calls only + private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; + private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; + private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; + private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only + private static int INTERPRETER_FRAME_CACHE_OFFSET; + private static int INTERPRETER_FRAME_LOCALS_OFFSET; + private static int INTERPRETER_FRAME_BCX_OFFSET; + private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; + + // Entry frames + private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -8; + + // Native frames + private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; + + private static VMReg fp = new VMReg(29); + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; + INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; + INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; + INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; + INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; + INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + } + + + // an additional field beyond sp and pc: + Address raw_fp; // frame pointer + private Address raw_unextendedSP; + + private AARCH64Frame() { + } + + private void adjustForDeopt() { + if ( pc != null) { + // Look for a deopt pc and if it is deopted convert to original pc + CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); + if (cb != null && cb.isJavaMethod()) { + NMethod nm = (NMethod) cb; + if (pc.equals(nm.deoptHandlerBegin())) { + if (Assert.ASSERTS_ENABLED) { + Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); + } + // adjust pc if frame is deoptimized. + pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); + deoptimized = true; + } + } + } + } + + public AARCH64Frame(Address raw_sp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("AARCH64Frame(sp, fp, pc): " + this); + dumpStack(); + } + } + + public AARCH64Frame(Address raw_sp, Address raw_fp) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + + // We cannot assume SP[-1] always contains a valid return PC (e.g. if + // the callee is a C/C++ compiled frame). If the PC is not known to + // Java then this.pc is null. + Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); + if (VM.getVM().isJavaPCDbg(savedPC)) { + this.pc = savedPC; + } + + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("AARCH64Frame(sp, fp): " + this); + dumpStack(); + } + } + + public AARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_unextendedSp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("AARCH64Frame(sp, unextendedSP, fp, pc): " + this); + dumpStack(); + } + + } + + public Object clone() { + AARCH64Frame frame = new AARCH64Frame(); + frame.raw_sp = raw_sp; + frame.raw_unextendedSP = raw_unextendedSP; + frame.raw_fp = raw_fp; + frame.pc = pc; + frame.deoptimized = deoptimized; + return frame; + } + + public boolean equals(Object arg) { + if (arg == null) { + return false; + } + + if (!(arg instanceof AARCH64Frame)) { + return false; + } + + AARCH64Frame other = (AARCH64Frame) arg; + + return (AddressOps.equal(getSP(), other.getSP()) && + AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && + AddressOps.equal(getFP(), other.getFP()) && + AddressOps.equal(getPC(), other.getPC())); + } + + public int hashCode() { + if (raw_sp == null) { + return 0; + } + + return raw_sp.hashCode(); + } + + public String toString() { + return "sp: " + (getSP() == null? "null" : getSP().toString()) + + ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + + ", fp: " + (getFP() == null? "null" : getFP().toString()) + + ", pc: " + (pc == null? "null" : pc.toString()); + } + + // accessors for the instance variables + public Address getFP() { return raw_fp; } + public Address getSP() { return raw_sp; } + public Address getID() { return raw_sp; } + + // FIXME: not implemented yet + public boolean isSignalHandlerFrameDbg() { return false; } + public int getSignalNumberDbg() { return 0; } + public String getSignalNameDbg() { return null; } + + public boolean isInterpretedFrameValid() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "Not an interpreted frame"); + } + + // These are reasonable sanity checks + if (getFP() == null || getFP().andWithMask(0x3) != null) { + return false; + } + + if (getSP() == null || getSP().andWithMask(0x3) != null) { + return false; + } + + if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { + return false; + } + + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (getFP().lessThanOrEqual(getSP())) { + // this attempts to deal with unsigned comparison above + return false; + } + + if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { + // stack frames shouldn't be large. + return false; + } + + return true; + } + + // FIXME: not applicable in current system + // void patch_pc(Thread* thread, address pc); + + public Frame sender(RegisterMap regMap, CodeBlob cb) { + AARCH64RegisterMap map = (AARCH64RegisterMap) regMap; + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map.setIncludeArgumentOops(false); + + if (isEntryFrame()) return senderForEntryFrame(map); + if (isInterpretedFrame()) return senderForInterpreterFrame(map); + + if(cb == null) { + cb = VM.getVM().getCodeCache().findBlob(getPC()); + } else { + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); + } + } + + if (cb != null) { + return senderForCompiledFrame(map, cb); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return new AARCH64Frame(getSenderSP(), getLink(), getSenderPC()); + } + + private Frame senderForEntryFrame(AARCH64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForEntryFrame"); + } + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + AARCH64JavaCallWrapper jcw = (AARCH64JavaCallWrapper) getEntryFrameCallWrapper(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); + Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); + } + AARCH64Frame fr; + if (jcw.getLastJavaPC() != null) { + fr = new AARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); + } else { + fr = new AARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); + } + map.clear(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); + } + return fr; + } + + //------------------------------------------------------------------------------ + // frame::adjust_unextended_sp + private void adjustUnextendedSP() { + // If we are returning to a compiled MethodHandle call site, the + // saved_fp will in fact be a saved value of the unextended SP. The + // simplest way to tell whether we are returning to such a call site + // is as follows: + + CodeBlob cb = cb(); + NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); + if (senderNm != null) { + // If the sender PC is a deoptimization point, get the original + // PC. For MethodHandle call site the unextended_sp is stored in + // saved_fp. + if (senderNm.isDeoptMhEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP())); + raw_unextendedSP = getFP(); + } + else if (senderNm.isDeoptEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp)); + } + else if (senderNm.isMethodHandleReturn(getPC())) { + raw_unextendedSP = getFP(); + } + } + } + + private Frame senderForInterpreterFrame(AARCH64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForInterpreterFrame"); + } + Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + Address sp = addressOfStackSlot(SENDER_SP_OFFSET); + // We do not need to update the callee-save register mapping because above + // us is either another interpreter frame or a converter-frame, but never + // directly a compiled frame. + // 11/24/04 SFG. With the removal of adapter frames this is no longer true. + // However c2 no longer uses callee save register for java calls so there + // are no callee register to find. + + if (map.getUpdateMap()) + updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); + + return new AARCH64Frame(sp, unextendedSP, getLink(), getSenderPC()); + } + + private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { + map.setLocation(fp, savedFPAddr); + } + + private Frame senderForCompiledFrame(AARCH64RegisterMap map, CodeBlob cb) { + if (DEBUG) { + System.out.println("senderForCompiledFrame"); + } + + // + // NOTE: some of this code is (unfortunately) duplicated AARCH64CurrentFrameGuess + // + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // frame owned by optimizing compiler + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); + } + Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + + // The return_address is always the word on the stack + Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + + // This is the saved value of FP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame. + Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize()); + + if (map.getUpdateMap()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map.setIncludeArgumentOops(cb.callerMustGCArguments()); + + if (cb.getOopMaps() != null) { + OopMapSet.updateRegisterMap(this, cb, map, true); + } + + // Since the prolog does the save and restore of FP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + updateMapWithSavedLink(map, savedFPAddr); + } + + return new AARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); + } + + protected boolean hasSenderPD() { + return true; + } + + public long frameSize() { + return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); + } + + public Address getLink() { + try { + if (DEBUG) { + System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET) + + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0)); + } + return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); + } catch (Exception e) { + if (DEBUG) + System.out.println("Returning null"); + return null; + } + } + + // FIXME: not implementable yet + //inline void frame::set_link(intptr_t* addr) { *(intptr_t **)addr_at(link_offset) = addr; } + + public Address getUnextendedSP() { return raw_unextendedSP; } + + // Return address: + public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } + public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } + + // return address of param, zero origin index. + public Address getNativeParamAddr(int idx) { + return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); + } + + public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } + + public Address addressOfInterpreterFrameLocals() { + return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); + } + + private Address addressOfInterpreterFrameBCX() { + return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); + } + + public int getInterpreterFrameBCI() { + // FIXME: this is not atomic with respect to GC and is unsuitable + // for use in a non-debugging, or reflective, system. Need to + // figure out how to express this. + Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); + Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); + Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); + return bcpToBci(bcp, method); + } + + public Address addressOfInterpreterFrameMDX() { + return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); + } + + // FIXME + //inline int frame::interpreter_frame_monitor_size() { + // return BasicObjectLock::size(); + //} + + // expression stack + // (the max_stack arguments are used by the GC; see class FrameClosure) + + public Address addressOfInterpreterFrameExpressionStack() { + Address monitorEnd = interpreterFrameMonitorEnd().address(); + return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); + } + + public int getInterpreterFrameExpressionStackDirection() { return -1; } + + // top of expression stack + public Address addressOfInterpreterFrameTOS() { + return getSP(); + } + + /** Expression stack from top down */ + public Address addressOfInterpreterFrameTOSAt(int slot) { + return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); + } + + public Address getInterpreterFrameSenderSP() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "interpreted frame expected"); + } + return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + } + + // Monitors + public BasicObjectLock interpreterFrameMonitorBegin() { + return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); + } + + public BasicObjectLock interpreterFrameMonitorEnd() { + Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); + if (Assert.ASSERTS_ENABLED) { + // make sure the pointer points inside the frame + Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); + Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); + } + return new BasicObjectLock(result); + } + + public int interpreterFrameMonitorSize() { + return BasicObjectLock.size(); + } + + // Method + public Address addressOfInterpreterFrameMethod() { + return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); + } + + // Constant pool cache + public Address addressOfInterpreterFrameCPCache() { + return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); + } + + // Entry frames + public JavaCallWrapper getEntryFrameCallWrapper() { + return new AARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); + } + + protected Address addressOfSavedOopResult() { + // offset is 2 for compiler2 and 3 for compiler1 + return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * + VM.getVM().getAddressSize()); + } + + protected Address addressOfSavedReceiver() { + return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + } + + private void dumpStack() { + for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + AddressOps.lt(addr, getSP()); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + System.out.println("-----------------------"); + for (Address addr = getSP(); + AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64JavaCallWrapper.java 2021-01-25 19:31:21.161312772 +0000 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.aarch64; + +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.runtime.*; + +public class AARCH64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaFrameAnchor"); + + lastJavaFPField = type.getAddressField("_last_Java_fp"); + } + + public AARCH64JavaCallWrapper(Address addr) { + super(addr); + } + + public Address getLastJavaFP() { + return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64RegisterMap.java 2021-01-25 19:31:21.609317480 +0000 @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.runtime.*; + +public class AARCH64RegisterMap extends RegisterMap { + + /** This is the only public constructor */ + public AARCH64RegisterMap(JavaThread thread, boolean updateMap) { + super(thread, updateMap); + } + + protected AARCH64RegisterMap(RegisterMap map) { + super(map); + } + + public Object clone() { + AARCH64RegisterMap retval = new AARCH64RegisterMap(this); + return retval; + } + + // no PD state to clear or copy: + protected void clearPD() {} + protected void initializePD() {} + protected void initializeFromPD(RegisterMap map) {} + protected Address getLocationPD(VMReg reg) { return null; } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_aarch64/LinuxAARCH64JavaThreadPDAccess.java 2021-01-25 19:31:22.047322084 +0000 @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.linux_aarch64; + +import java.io.*; +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.aarch64.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +public class LinuxAARCH64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; + private static AddressField osThreadField; + + // Field from OSThread + private static CIntegerField osThreadThreadIDField; + + // This is currently unneeded but is being kept in case we change + // the currentFrameGuess algorithm + private static final long GUESS_SCAN_RANGE = 128 * 1024; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaThread"); + osThreadField = type.getAddressField("_osthread"); + + Type anchorType = db.lookupType("JavaFrameAnchor"); + lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); + + Type osThreadType = db.lookupType("OSThread"); + osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); + } + + public Address getLastJavaFP(Address addr) { + return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); + } + + public Address getLastJavaPC(Address addr) { + return null; + } + + public Address getBaseOfStackPointer(Address addr) { + return null; + } + + public Frame getLastFramePD(JavaThread thread, Address addr) { + Address fp = thread.getLastJavaFP(); + if (fp == null) { + return null; // no information + } + return new AARCH64Frame(thread.getLastJavaSP(), fp); + } + + public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { + return new AARCH64RegisterMap(thread, updateMap); + } + + public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { + ThreadProxy t = getThreadProxy(addr); + AARCH64ThreadContext context = (AARCH64ThreadContext) t.getContext(); + AARCH64CurrentFrameGuess guesser = new AARCH64CurrentFrameGuess(context, thread); + if (!guesser.run(GUESS_SCAN_RANGE)) { + return null; + } + if (guesser.getPC() == null) { + return new AARCH64Frame(guesser.getSP(), guesser.getFP()); + } else { + return new AARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); + } + } + + public void printThreadIDOn(Address addr, PrintStream tty) { + tty.print(getThreadProxy(addr)); + } + + public void printInfoOn(Address threadAddr, PrintStream tty) { + tty.print("Thread id: "); + printThreadIDOn(threadAddr, tty); +// tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); + } + + public Address getLastSP(Address addr) { + ThreadProxy t = getThreadProxy(addr); + AARCH64ThreadContext context = (AARCH64ThreadContext) t.getContext(); + return context.getRegisterAsAddress(AARCH64ThreadContext.SP); + } + + public ThreadProxy getThreadProxy(Address addr) { + // Addr is the address of the JavaThread. + // Fetch the OSThread (for now and for simplicity, not making a + // separate "OSThread" class in this package) + Address osThreadAddr = osThreadField.getValue(addr); + // Get the address of the _thread_id from the OSThread + Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); + + JVMDebugger debugger = VM.getVM().getDebugger(); + return debugger.getThreadForIdentifierAddress(threadIdAddr); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/make/linux/makefiles/aarch64.make 2021-01-25 19:31:22.472326551 +0000 @@ -0,0 +1,40 @@ +# +# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# +# + +# If FDLIBM_CFLAGS is non-empty it holds CFLAGS needed to be passed to +# the compiler so as to be able to produce optimized objects +# without losing precision. +ifneq ($(FDLIBM_CFLAGS),) + OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/SPEED) $(FDLIBM_CFLAGS) + OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/SPEED) $(FDLIBM_CFLAGS) +else + OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT) + OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT) +endif +# Must also specify if CPU is little endian +CFLAGS += -DVM_LITTLE_ENDIAN + +# CFLAGS += -D_LP64=1 + +OPT_CFLAGS/compactingPermGenGen.o = -O1 --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/make/linux/platform_aarch64 2021-01-25 19:31:22.921331270 +0000 @@ -0,0 +1,15 @@ +os_family = linux + +arch = aarch64 + +arch_model = aarch64 + +os_arch = linux_aarch64 + +os_arch_model = linux_aarch64 + +lib_arch = aarch64 + +compiler = gcc + +sysdefs = -DLINUX -D_GNU_SOURCE -DAARCH64 --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/aarch64.ad 2021-01-25 19:31:23.361335895 +0000 @@ -0,0 +1,15677 @@ +// +// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2014, 2019, Red Hat Inc. +// All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// AArch64 Architecture Description File + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// archtecture. + +register %{ +//----------Architecture Description Register Definitions---------------------- +// General Registers +// "reg_def" name ( register save type, C convention save type, +// ideal register type, encoding ); +// Register Save Types: +// +// NS = No-Save: The register allocator assumes that these registers +// can be used without saving upon entry to the method, & +// that they do not need to be saved at call sites. +// +// SOC = Save-On-Call: The register allocator assumes that these registers +// can be used without saving upon entry to the method, +// but that they must be saved at call sites. +// +// SOE = Save-On-Entry: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, but they do not need to be saved at call +// sites. +// +// AS = Always-Save: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, & that they must be saved at call sites. +// +// Ideal Register Type is used to determine how to save & restore a +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. +// +// The encoding number is the actual bit-pattern placed into the opcodes. + +// We must define the 64 bit int registers in two 32 bit halves, the +// real lower register and a virtual upper half register. upper halves +// are used by the register allocator but are not actually supplied as +// operands to memory ops. +// +// follow the C1 compiler in making registers +// +// r0-r7,r10-r26 volatile (caller save) +// r27-r32 system (no save, no allocate) +// r8-r9 invisible to the allocator (so we can use them as scratch regs) +// +// as regards Java usage. we don't use any callee save registers +// because this makes it difficult to de-optimise a frame (see comment +// in x86 implementation of Deoptimization::unwind_callee_save_values) +// + +// General Registers + +reg_def R0 ( SOC, SOC, Op_RegI, 0, r0->as_VMReg() ); +reg_def R0_H ( SOC, SOC, Op_RegI, 0, r0->as_VMReg()->next() ); +reg_def R1 ( SOC, SOC, Op_RegI, 1, r1->as_VMReg() ); +reg_def R1_H ( SOC, SOC, Op_RegI, 1, r1->as_VMReg()->next() ); +reg_def R2 ( SOC, SOC, Op_RegI, 2, r2->as_VMReg() ); +reg_def R2_H ( SOC, SOC, Op_RegI, 2, r2->as_VMReg()->next() ); +reg_def R3 ( SOC, SOC, Op_RegI, 3, r3->as_VMReg() ); +reg_def R3_H ( SOC, SOC, Op_RegI, 3, r3->as_VMReg()->next() ); +reg_def R4 ( SOC, SOC, Op_RegI, 4, r4->as_VMReg() ); +reg_def R4_H ( SOC, SOC, Op_RegI, 4, r4->as_VMReg()->next() ); +reg_def R5 ( SOC, SOC, Op_RegI, 5, r5->as_VMReg() ); +reg_def R5_H ( SOC, SOC, Op_RegI, 5, r5->as_VMReg()->next() ); +reg_def R6 ( SOC, SOC, Op_RegI, 6, r6->as_VMReg() ); +reg_def R6_H ( SOC, SOC, Op_RegI, 6, r6->as_VMReg()->next() ); +reg_def R7 ( SOC, SOC, Op_RegI, 7, r7->as_VMReg() ); +reg_def R7_H ( SOC, SOC, Op_RegI, 7, r7->as_VMReg()->next() ); +reg_def R10 ( SOC, SOC, Op_RegI, 10, r10->as_VMReg() ); +reg_def R10_H ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next()); +reg_def R11 ( SOC, SOC, Op_RegI, 11, r11->as_VMReg() ); +reg_def R11_H ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next()); +reg_def R12 ( SOC, SOC, Op_RegI, 12, r12->as_VMReg() ); +reg_def R12_H ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next()); +reg_def R13 ( SOC, SOC, Op_RegI, 13, r13->as_VMReg() ); +reg_def R13_H ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next()); +reg_def R14 ( SOC, SOC, Op_RegI, 14, r14->as_VMReg() ); +reg_def R14_H ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next()); +reg_def R15 ( SOC, SOC, Op_RegI, 15, r15->as_VMReg() ); +reg_def R15_H ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next()); +reg_def R16 ( SOC, SOC, Op_RegI, 16, r16->as_VMReg() ); +reg_def R16_H ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next()); +reg_def R17 ( SOC, SOC, Op_RegI, 17, r17->as_VMReg() ); +reg_def R17_H ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next()); +reg_def R18 ( SOC, SOC, Op_RegI, 18, r18->as_VMReg() ); +reg_def R18_H ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next()); +reg_def R19 ( SOC, SOE, Op_RegI, 19, r19->as_VMReg() ); +reg_def R19_H ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next()); +reg_def R20 ( SOC, SOE, Op_RegI, 20, r20->as_VMReg() ); // caller esp +reg_def R20_H ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next()); +reg_def R21 ( SOC, SOE, Op_RegI, 21, r21->as_VMReg() ); +reg_def R21_H ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next()); +reg_def R22 ( SOC, SOE, Op_RegI, 22, r22->as_VMReg() ); +reg_def R22_H ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next()); +reg_def R23 ( SOC, SOE, Op_RegI, 23, r23->as_VMReg() ); +reg_def R23_H ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next()); +reg_def R24 ( SOC, SOE, Op_RegI, 24, r24->as_VMReg() ); +reg_def R24_H ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next()); +reg_def R25 ( SOC, SOE, Op_RegI, 25, r25->as_VMReg() ); +reg_def R25_H ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next()); +reg_def R26 ( SOC, SOE, Op_RegI, 26, r26->as_VMReg() ); +reg_def R26_H ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next()); +reg_def R27 ( NS, SOE, Op_RegI, 27, r27->as_VMReg() ); // heapbase +reg_def R27_H ( NS, SOE, Op_RegI, 27, r27->as_VMReg()->next()); +reg_def R28 ( NS, SOE, Op_RegI, 28, r28->as_VMReg() ); // thread +reg_def R28_H ( NS, SOE, Op_RegI, 28, r28->as_VMReg()->next()); +reg_def R29 ( NS, NS, Op_RegI, 29, r29->as_VMReg() ); // fp +reg_def R29_H ( NS, NS, Op_RegI, 29, r29->as_VMReg()->next()); +reg_def R30 ( NS, NS, Op_RegI, 30, r30->as_VMReg() ); // lr +reg_def R30_H ( NS, NS, Op_RegI, 30, r30->as_VMReg()->next()); +reg_def R31 ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg() ); // sp +reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next()); + +// ---------------------------- +// Float/Double Registers +// ---------------------------- + +// Double Registers + +// The rules of ADL require that double registers be defined in pairs. +// Each pair must be two 32-bit values, but not necessarily a pair of +// single float registers. In each pair, ADLC-assigned register numbers +// must be adjacent, with the lower number even. Finally, when the +// CPU stores such a register pair to memory, the word associated with +// the lower ADLC-assigned number must be stored to the lower address. + +// AArch64 has 32 floating-point registers. Each can store a vector of +// single or double precision floating-point values up to 8 * 32 +// floats, 4 * 64 bit floats or 2 * 128 bit floats. We currently only +// use the first float or double element of the vector. + +// for Java use float registers v0-v15 are always save on call whereas +// the platform ABI treats v8-v15 as callee save). float registers +// v16-v31 are SOC as per the platform spec + + reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); + reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); + reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) ); + reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) ); + + reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); + reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); + reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) ); + reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) ); + + reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); + reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); + reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) ); + reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) ); + + reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); + reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); + reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) ); + reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) ); + + reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); + reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); + reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) ); + reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) ); + + reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); + reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); + reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) ); + reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) ); + + reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); + reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); + reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) ); + reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) ); + + reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); + reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); + reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) ); + reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) ); + + reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() ); + reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() ); + reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) ); + reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) ); + + reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() ); + reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() ); + reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) ); + reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) ); + + reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() ); + reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() ); + reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2)); + reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3)); + + reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() ); + reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() ); + reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2)); + reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3)); + + reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() ); + reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() ); + reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2)); + reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3)); + + reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() ); + reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() ); + reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2)); + reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3)); + + reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() ); + reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() ); + reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2)); + reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3)); + + reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() ); + reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() ); + reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2)); + reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3)); + + reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); + reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() ); + reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2)); + reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3)); + + reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); + reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() ); + reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2)); + reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3)); + + reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); + reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() ); + reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2)); + reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3)); + + reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); + reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() ); + reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2)); + reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3)); + + reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); + reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() ); + reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2)); + reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3)); + + reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); + reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() ); + reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2)); + reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3)); + + reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); + reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() ); + reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2)); + reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3)); + + reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); + reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() ); + reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2)); + reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3)); + + reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); + reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() ); + reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2)); + reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3)); + + reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); + reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() ); + reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2)); + reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3)); + + reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); + reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() ); + reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2)); + reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3)); + + reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); + reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() ); + reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2)); + reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3)); + + reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); + reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() ); + reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2)); + reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3)); + + reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); + reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() ); + reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2)); + reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3)); + + reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); + reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() ); + reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2)); + reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3)); + + reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); + reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() ); + reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2)); + reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3)); + +// ---------------------------- +// Special Registers +// ---------------------------- + +// the AArch64 CSPR status flag register is not directly acessible as +// instruction operand. the FPSR status flag register is a system +// register which can be written/read using MSR/MRS but again does not +// appear as an operand (a code identifying the FSPR occurs as an +// immediate value in the instruction). + +reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad()); + + +// Specify priority of register selection within phases of register +// allocation. Highest priority is first. A useful heuristic is to +// give registers a low priority when they are required by machine +// instructions, like EAX and EDX on I486, and choose no-save registers +// before save-on-call, & save-on-call before save-on-entry. Registers +// which participate in fixed calling sequences should come last. +// Registers which are used as pairs must fall on an even boundary. + +alloc_class chunk0( + // volatiles + R10, R10_H, + R11, R11_H, + R12, R12_H, + R13, R13_H, + R14, R14_H, + R15, R15_H, + R16, R16_H, + R17, R17_H, + R18, R18_H, + + // arg registers + R0, R0_H, + R1, R1_H, + R2, R2_H, + R3, R3_H, + R4, R4_H, + R5, R5_H, + R6, R6_H, + R7, R7_H, + + // non-volatiles + R19, R19_H, + R20, R20_H, + R21, R21_H, + R22, R22_H, + R23, R23_H, + R24, R24_H, + R25, R25_H, + R26, R26_H, + + // non-allocatable registers + + R27, R27_H, // heapbase + R28, R28_H, // thread + R29, R29_H, // fp + R30, R30_H, // lr + R31, R31_H, // sp +); + +alloc_class chunk1( + + // no save + V16, V16_H, V16_J, V16_K, + V17, V17_H, V17_J, V17_K, + V18, V18_H, V18_J, V18_K, + V19, V19_H, V19_J, V19_K, + V20, V20_H, V20_J, V20_K, + V21, V21_H, V21_J, V21_K, + V22, V22_H, V22_J, V22_K, + V23, V23_H, V23_J, V23_K, + V24, V24_H, V24_J, V24_K, + V25, V25_H, V25_J, V25_K, + V26, V26_H, V26_J, V26_K, + V27, V27_H, V27_J, V27_K, + V28, V28_H, V28_J, V28_K, + V29, V29_H, V29_J, V29_K, + V30, V30_H, V30_J, V30_K, + V31, V31_H, V31_J, V31_K, + + // arg registers + V0, V0_H, V0_J, V0_K, + V1, V1_H, V1_J, V1_K, + V2, V2_H, V2_J, V2_K, + V3, V3_H, V3_J, V3_K, + V4, V4_H, V4_J, V4_K, + V5, V5_H, V5_J, V5_K, + V6, V6_H, V6_J, V6_K, + V7, V7_H, V7_J, V7_K, + + // non-volatiles + V8, V8_H, V8_J, V8_K, + V9, V9_H, V9_J, V9_K, + V10, V10_H, V10_J, V10_K, + V11, V11_H, V11_J, V11_K, + V12, V12_H, V12_J, V12_K, + V13, V13_H, V13_J, V13_K, + V14, V14_H, V14_J, V14_K, + V15, V15_H, V15_J, V15_K, +); + +alloc_class chunk2(RFLAGS); + +//----------Architecture Description Register Classes-------------------------- +// Several register classes are automatically defined based upon information in +// this architecture description. +// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) +// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) +// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) +// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) +// + +// Class for all 32 bit integer registers -- excludes SP which will +// never be used as an integer register +reg_class any_reg32( + R0, + R1, + R2, + R3, + R4, + R5, + R6, + R7, + R10, + R11, + R12, + R13, + R14, + R15, + R16, + R17, + R18, + R19, + R20, + R21, + R22, + R23, + R24, + R25, + R26, + R27, + R28, + R29, + R30 +); + +// Singleton class for R0 int register +reg_class int_r0_reg(R0); + +// Singleton class for R2 int register +reg_class int_r2_reg(R2); + +// Singleton class for R3 int register +reg_class int_r3_reg(R3); + +// Singleton class for R4 int register +reg_class int_r4_reg(R4); + +// Class for all long integer registers (including RSP) +reg_class any_reg( + R0, R0_H, + R1, R1_H, + R2, R2_H, + R3, R3_H, + R4, R4_H, + R5, R5_H, + R6, R6_H, + R7, R7_H, + R10, R10_H, + R11, R11_H, + R12, R12_H, + R13, R13_H, + R14, R14_H, + R15, R15_H, + R16, R16_H, + R17, R17_H, + R18, R18_H, + R19, R19_H, + R20, R20_H, + R21, R21_H, + R22, R22_H, + R23, R23_H, + R24, R24_H, + R25, R25_H, + R26, R26_H, + R27, R27_H, + R28, R28_H, + R29, R29_H, + R30, R30_H, + R31, R31_H +); + +// Class for all non-special integer registers +reg_class no_special_reg32( + R0, + R1, + R2, + R3, + R4, + R5, + R6, + R7, + R10, + R11, + R12, // rmethod + R13, + R14, + R15, + R16, + R17, + R18, + R19, + R20, + R21, + R22, + R23, + R24, + R25, + R26 + /* R27, */ // heapbase + /* R28, */ // thread + /* R29, */ // fp + /* R30, */ // lr + /* R31 */ // sp +); + +// Class for all non-special long integer registers +reg_class no_special_reg( + R0, R0_H, + R1, R1_H, + R2, R2_H, + R3, R3_H, + R4, R4_H, + R5, R5_H, + R6, R6_H, + R7, R7_H, + R10, R10_H, + R11, R11_H, + R12, R12_H, // rmethod + R13, R13_H, + R14, R14_H, + R15, R15_H, + R16, R16_H, + R17, R17_H, + R18, R18_H, + R19, R19_H, + R20, R20_H, + R21, R21_H, + R22, R22_H, + R23, R23_H, + R24, R24_H, + R25, R25_H, + R26, R26_H, + /* R27, R27_H, */ // heapbase + /* R28, R28_H, */ // thread + /* R29, R29_H, */ // fp + /* R30, R30_H, */ // lr + /* R31, R31_H */ // sp +); + +// Class for 64 bit register r0 +reg_class r0_reg( + R0, R0_H +); + +// Class for 64 bit register r1 +reg_class r1_reg( + R1, R1_H +); + +// Class for 64 bit register r2 +reg_class r2_reg( + R2, R2_H +); + +// Class for 64 bit register r3 +reg_class r3_reg( + R3, R3_H +); + +// Class for 64 bit register r4 +reg_class r4_reg( + R4, R4_H +); + +// Class for 64 bit register r5 +reg_class r5_reg( + R5, R5_H +); + +// Class for 64 bit register r10 +reg_class r10_reg( + R10, R10_H +); + +// Class for 64 bit register r11 +reg_class r11_reg( + R11, R11_H +); + +// Class for method register +reg_class method_reg( + R12, R12_H +); + +// Class for heapbase register +reg_class heapbase_reg( + R27, R27_H +); + +// Class for thread register +reg_class thread_reg( + R28, R28_H +); + +// Class for frame pointer register +reg_class fp_reg( + R29, R29_H +); + +// Class for link register +reg_class lr_reg( + R30, R30_H +); + +// Class for long sp register +reg_class sp_reg( + R31, R31_H +); + +// Class for all pointer registers +reg_class ptr_reg( + R0, R0_H, + R1, R1_H, + R2, R2_H, + R3, R3_H, + R4, R4_H, + R5, R5_H, + R6, R6_H, + R7, R7_H, + R10, R10_H, + R11, R11_H, + R12, R12_H, + R13, R13_H, + R14, R14_H, + R15, R15_H, + R16, R16_H, + R17, R17_H, + R18, R18_H, + R19, R19_H, + R20, R20_H, + R21, R21_H, + R22, R22_H, + R23, R23_H, + R24, R24_H, + R25, R25_H, + R26, R26_H, + R27, R27_H, + R28, R28_H, + R29, R29_H, + R30, R30_H, + R31, R31_H +); + +// Class for all non_special pointer registers +reg_class no_special_ptr_reg( + R0, R0_H, + R1, R1_H, + R2, R2_H, + R3, R3_H, + R4, R4_H, + R5, R5_H, + R6, R6_H, + R7, R7_H, + R10, R10_H, + R11, R11_H, + R12, R12_H, + R13, R13_H, + R14, R14_H, + R15, R15_H, + R16, R16_H, + R17, R17_H, + R18, R18_H, + R19, R19_H, + R20, R20_H, + R21, R21_H, + R22, R22_H, + R23, R23_H, + R24, R24_H, + R25, R25_H, + R26, R26_H, + /* R27, R27_H, */ // heapbase + /* R28, R28_H, */ // thread + /* R29, R29_H, */ // fp + /* R30, R30_H, */ // lr + /* R31, R31_H */ // sp +); + +// Class for all float registers +reg_class float_reg( + V0, + V1, + V2, + V3, + V4, + V5, + V6, + V7, + V8, + V9, + V10, + V11, + V12, + V13, + V14, + V15, + V16, + V17, + V18, + V19, + V20, + V21, + V22, + V23, + V24, + V25, + V26, + V27, + V28, + V29, + V30, + V31 +); + +// Double precision float registers have virtual `high halves' that +// are needed by the allocator. +// Class for all double registers +reg_class double_reg( + V0, V0_H, + V1, V1_H, + V2, V2_H, + V3, V3_H, + V4, V4_H, + V5, V5_H, + V6, V6_H, + V7, V7_H, + V8, V8_H, + V9, V9_H, + V10, V10_H, + V11, V11_H, + V12, V12_H, + V13, V13_H, + V14, V14_H, + V15, V15_H, + V16, V16_H, + V17, V17_H, + V18, V18_H, + V19, V19_H, + V20, V20_H, + V21, V21_H, + V22, V22_H, + V23, V23_H, + V24, V24_H, + V25, V25_H, + V26, V26_H, + V27, V27_H, + V28, V28_H, + V29, V29_H, + V30, V30_H, + V31, V31_H +); + +// Class for all 64bit vector registers +reg_class vectord_reg( + V0, V0_H, + V1, V1_H, + V2, V2_H, + V3, V3_H, + V4, V4_H, + V5, V5_H, + V6, V6_H, + V7, V7_H, + V8, V8_H, + V9, V9_H, + V10, V10_H, + V11, V11_H, + V12, V12_H, + V13, V13_H, + V14, V14_H, + V15, V15_H, + V16, V16_H, + V17, V17_H, + V18, V18_H, + V19, V19_H, + V20, V20_H, + V21, V21_H, + V22, V22_H, + V23, V23_H, + V24, V24_H, + V25, V25_H, + V26, V26_H, + V27, V27_H, + V28, V28_H, + V29, V29_H, + V30, V30_H, + V31, V31_H +); + +// Class for all 128bit vector registers +reg_class vectorx_reg( + V0, V0_H, V0_J, V0_K, + V1, V1_H, V1_J, V1_K, + V2, V2_H, V2_J, V2_K, + V3, V3_H, V3_J, V3_K, + V4, V4_H, V4_J, V4_K, + V5, V5_H, V5_J, V5_K, + V6, V6_H, V6_J, V6_K, + V7, V7_H, V7_J, V7_K, + V8, V8_H, V8_J, V8_K, + V9, V9_H, V9_J, V9_K, + V10, V10_H, V10_J, V10_K, + V11, V11_H, V11_J, V11_K, + V12, V12_H, V12_J, V12_K, + V13, V13_H, V13_J, V13_K, + V14, V14_H, V14_J, V14_K, + V15, V15_H, V15_J, V15_K, + V16, V16_H, V16_J, V16_K, + V17, V17_H, V17_J, V17_K, + V18, V18_H, V18_J, V18_K, + V19, V19_H, V19_J, V19_K, + V20, V20_H, V20_J, V20_K, + V21, V21_H, V21_J, V21_K, + V22, V22_H, V22_J, V22_K, + V23, V23_H, V23_J, V23_K, + V24, V24_H, V24_J, V24_K, + V25, V25_H, V25_J, V25_K, + V26, V26_H, V26_J, V26_K, + V27, V27_H, V27_J, V27_K, + V28, V28_H, V28_J, V28_K, + V29, V29_H, V29_J, V29_K, + V30, V30_H, V30_J, V30_K, + V31, V31_H, V31_J, V31_K +); + +// Class for 128 bit register v0 +reg_class v0_reg( + V0, V0_H +); + +// Class for 128 bit register v1 +reg_class v1_reg( + V1, V1_H +); + +// Class for 128 bit register v2 +reg_class v2_reg( + V2, V2_H +); + +// Class for 128 bit register v3 +reg_class v3_reg( + V3, V3_H +); + +// Singleton class for condition codes +reg_class int_flags(RFLAGS); + +%} + +//----------DEFINITION BLOCK--------------------------------------------------- +// Define name --> value mappings to inform the ADLC of an integer valued name +// Current support includes integer values in the range [0, 0x7FFFFFFF] +// Format: +// int_def ( , ); +// Generated Code in ad_.hpp +// #define () +// // value == +// Generated code in ad_.cpp adlc_verification() +// assert( == , "Expect () to equal "); +// + +// we follow the ppc-aix port in using a simple cost model which ranks +// register operations as cheap, memory ops as more expensive and +// branches as most expensive. the first two have a low as well as a +// normal cost. huge cost appears to be a way of saying don't do +// something + +definitions %{ + // The default cost (of a register move instruction). + int_def INSN_COST ( 100, 100); + int_def BRANCH_COST ( 200, 2 * INSN_COST); + int_def CALL_COST ( 200, 2 * INSN_COST); + int_def VOLATILE_REF_COST ( 1000, 10 * INSN_COST); +%} + + +//----------SOURCE BLOCK------------------------------------------------------- +// This is a block of C++ code which provides values, functions, and +// definitions necessary in the rest of the architecture description + +source_hpp %{ + +class CallStubImpl { + + //-------------------------------------------------------------- + //---< Used for optimization in Compile::shorten_branches >--- + //-------------------------------------------------------------- + + public: + // Size of call trampoline stub. + static uint size_call_trampoline() { + return 0; // no call trampolines on this platform + } + + // number of relocations needed by a call trampoline stub + static uint reloc_call_trampoline() { + return 0; // no call trampolines on this platform + } +}; + +class HandlerImpl { + + public: + + static int emit_exception_handler(CodeBuffer &cbuf); + static int emit_deopt_handler(CodeBuffer& cbuf); + + static uint size_exception_handler() { + return MacroAssembler::far_branch_size(); + } + + static uint size_deopt_handler() { + // count one adr and one far branch instruction + // return 4 * NativeInstruction::instruction_size; + return NativeInstruction::instruction_size + MacroAssembler::far_branch_size(); + } +}; + + bool is_CAS(int opcode); + + // predicates controlling emit of ldr/ldar and associated dmb + + bool unnecessary_acquire(const Node *barrier); + bool needs_acquiring_load(const Node *load); + + // predicates controlling emit of str/stlr and associated dmbs + + bool unnecessary_release(const Node *barrier); + bool unnecessary_volatile(const Node *barrier); + bool needs_releasing_store(const Node *store); + + // predicate controlling translation of CompareAndSwapX + bool needs_acquiring_load_exclusive(const Node *load); + + // predicate controlling translation of StoreCM + bool unnecessary_storestore(const Node *storecm); +%} + +source %{ + + // Optimizaton of volatile gets and puts + // ------------------------------------- + // + // AArch64 has ldar and stlr instructions which we can safely + // use to implement volatile reads and writes. For a volatile read + // we simply need + // + // ldar + // + // and for a volatile write we need + // + // stlr + // + // Alternatively, we can implement them by pairing a normal + // load/store with a memory barrier. For a volatile read we need + // + // ldr + // dmb ishld + // + // for a volatile write + // + // dmb ish + // str + // dmb ish + // + // We can also use ldaxr and stlxr to implement compare and swap CAS + // sequences. These are normally translated to an instruction + // sequence like the following + // + // dmb ish + // retry: + // ldxr rval raddr + // cmp rval rold + // b.ne done + // stlxr rval, rnew, rold + // cbnz rval retry + // done: + // cset r0, eq + // dmb ishld + // + // Note that the exclusive store is already using an stlxr + // instruction. That is required to ensure visibility to other + // threads of the exclusive write (assuming it succeeds) before that + // of any subsequent writes. + // + // The following instruction sequence is an improvement on the above + // + // retry: + // ldaxr rval raddr + // cmp rval rold + // b.ne done + // stlxr rval, rnew, rold + // cbnz rval retry + // done: + // cset r0, eq + // + // We don't need the leading dmb ish since the stlxr guarantees + // visibility of prior writes in the case that the swap is + // successful. Crucially we don't have to worry about the case where + // the swap is not successful since no valid program should be + // relying on visibility of prior changes by the attempting thread + // in the case where the CAS fails. + // + // Similarly, we don't need the trailing dmb ishld if we substitute + // an ldaxr instruction since that will provide all the guarantees we + // require regarding observation of changes made by other threads + // before any change to the CAS address observed by the load. + // + // In order to generate the desired instruction sequence we need to + // be able to identify specific 'signature' ideal graph node + // sequences which i) occur as a translation of a volatile reads or + // writes or CAS operations and ii) do not occur through any other + // translation or graph transformation. We can then provide + // alternative aldc matching rules which translate these node + // sequences to the desired machine code sequences. Selection of the + // alternative rules can be implemented by predicates which identify + // the relevant node sequences. + // + // The ideal graph generator translates a volatile read to the node + // sequence + // + // LoadX[mo_acquire] + // MemBarAcquire + // + // As a special case when using the compressed oops optimization we + // may also see this variant + // + // LoadN[mo_acquire] + // DecodeN + // MemBarAcquire + // + // A volatile write is translated to the node sequence + // + // MemBarRelease + // StoreX[mo_release] {CardMark}-optional + // MemBarVolatile + // + // n.b. the above node patterns are generated with a strict + // 'signature' configuration of input and output dependencies (see + // the predicates below for exact details). The card mark may be as + // simple as a few extra nodes or, in a few GC configurations, may + // include more complex control flow between the leading and + // trailing memory barriers. However, whatever the card mark + // configuration these signatures are unique to translated volatile + // reads/stores -- they will not appear as a result of any other + // bytecode translation or inlining nor as a consequence of + // optimizing transforms. + // + // We also want to catch inlined unsafe volatile gets and puts and + // be able to implement them using either ldar/stlr or some + // combination of ldr/stlr and dmb instructions. + // + // Inlined unsafe volatiles puts manifest as a minor variant of the + // normal volatile put node sequence containing an extra cpuorder + // membar + // + // MemBarRelease + // MemBarCPUOrder + // StoreX[mo_release] {CardMark}-optional + // MemBarVolatile + // + // n.b. as an aside, the cpuorder membar is not itself subject to + // matching and translation by adlc rules. However, the rule + // predicates need to detect its presence in order to correctly + // select the desired adlc rules. + // + // Inlined unsafe volatile gets manifest as a somewhat different + // node sequence to a normal volatile get + // + // MemBarCPUOrder + // || \\ + // MemBarAcquire LoadX[mo_acquire] + // || + // MemBarCPUOrder + // + // In this case the acquire membar does not directly depend on the + // load. However, we can be sure that the load is generated from an + // inlined unsafe volatile get if we see it dependent on this unique + // sequence of membar nodes. Similarly, given an acquire membar we + // can know that it was added because of an inlined unsafe volatile + // get if it is fed and feeds a cpuorder membar and if its feed + // membar also feeds an acquiring load. + // + // Finally an inlined (Unsafe) CAS operation is translated to the + // following ideal graph + // + // MemBarRelease + // MemBarCPUOrder + // CompareAndSwapX {CardMark}-optional + // MemBarCPUOrder + // MemBarAcquire + // + // So, where we can identify these volatile read and write + // signatures we can choose to plant either of the above two code + // sequences. For a volatile read we can simply plant a normal + // ldr and translate the MemBarAcquire to a dmb. However, we can + // also choose to inhibit translation of the MemBarAcquire and + // inhibit planting of the ldr, instead planting an ldar. + // + // When we recognise a volatile store signature we can choose to + // plant at a dmb ish as a translation for the MemBarRelease, a + // normal str and then a dmb ish for the MemBarVolatile. + // Alternatively, we can inhibit translation of the MemBarRelease + // and MemBarVolatile and instead plant a simple stlr + // instruction. + // + // when we recognise a CAS signature we can choose to plant a dmb + // ish as a translation for the MemBarRelease, the conventional + // macro-instruction sequence for the CompareAndSwap node (which + // uses ldxr) and then a dmb ishld for the MemBarAcquire. + // Alternatively, we can elide generation of the dmb instructions + // and plant the alternative CompareAndSwap macro-instruction + // sequence (which uses ldaxr). + // + // Of course, the above only applies when we see these signature + // configurations. We still want to plant dmb instructions in any + // other cases where we may see a MemBarAcquire, MemBarRelease or + // MemBarVolatile. For example, at the end of a constructor which + // writes final/volatile fields we will see a MemBarRelease + // instruction and this needs a 'dmb ish' lest we risk the + // constructed object being visible without making the + // final/volatile field writes visible. + // + // n.b. the translation rules below which rely on detection of the + // volatile signatures and insert ldar or stlr are failsafe. + // If we see anything other than the signature configurations we + // always just translate the loads and stores to ldr and str + // and translate acquire, release and volatile membars to the + // relevant dmb instructions. + // + + // is_CAS(int opcode) + // + // return true if opcode is one of the possible CompareAndSwapX + // values otherwise false. + + bool is_CAS(int opcode) + { + switch(opcode) { + // We handle these + case Op_CompareAndSwapI: + case Op_CompareAndSwapL: + case Op_CompareAndSwapP: + case Op_CompareAndSwapN: + case Op_GetAndSetI: + case Op_GetAndSetL: + case Op_GetAndSetP: + case Op_GetAndSetN: + case Op_GetAndAddI: + case Op_GetAndAddL: + return true; + default: + return false; + } + } + +// predicates controlling emit of ldr/ldar and associated dmb + +bool unnecessary_acquire(const Node *barrier) +{ + assert(barrier->is_MemBar(), "expecting a membar"); + + if (UseBarriersForVolatile) { + // we need to plant a dmb + return false; + } + + MemBarNode* mb = barrier->as_MemBar(); + + if (mb->trailing_load()) { + return true; + } + + if (mb->trailing_load_store()) { + Node* load_store = mb->in(MemBarNode::Precedent); + assert(load_store->is_LoadStore(), "unexpected graph shape"); + return is_CAS(load_store->Opcode()); + } + + return false; +} + +bool needs_acquiring_load(const Node *n) +{ + assert(n->is_Load(), "expecting a load"); + if (UseBarriersForVolatile) { + // we use a normal load and a dmb + return false; + } + + LoadNode *ld = n->as_Load(); + + return ld->is_acquire(); +} + +bool unnecessary_release(const Node *n) +{ + assert((n->is_MemBar() && + n->Opcode() == Op_MemBarRelease), + "expecting a release membar"); + + if (UseBarriersForVolatile) { + // we need to plant a dmb + return false; + } + + MemBarNode *barrier = n->as_MemBar(); + + if (!barrier->leading()) { + return false; + } else { + Node* trailing = barrier->trailing_membar(); + MemBarNode* trailing_mb = trailing->as_MemBar(); + assert(trailing_mb->trailing(), "Not a trailing membar?"); + assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars"); + + Node* mem = trailing_mb->in(MemBarNode::Precedent); + if (mem->is_Store()) { + assert(mem->as_Store()->is_release(), ""); + assert(trailing_mb->Opcode() == Op_MemBarVolatile, ""); + return true; + } else { + assert(mem->is_LoadStore(), ""); + assert(trailing_mb->Opcode() == Op_MemBarAcquire, ""); + return is_CAS(mem->Opcode()); + } + } + + return false; +} + +bool unnecessary_volatile(const Node *n) +{ + // assert n->is_MemBar(); + if (UseBarriersForVolatile) { + // we need to plant a dmb + return false; + } + + MemBarNode *mbvol = n->as_MemBar(); + + bool release = mbvol->trailing_store(); + assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), ""); +#ifdef ASSERT + if (release) { + Node* leading = mbvol->leading_membar(); + assert(leading->Opcode() == Op_MemBarRelease, ""); + assert(leading->as_MemBar()->leading_store(), ""); + assert(leading->as_MemBar()->trailing_membar() == mbvol, ""); + } +#endif + + return release; +} + +// predicates controlling emit of str/stlr and associated dmbs + +bool needs_releasing_store(const Node *n) +{ + // assert n->is_Store(); + if (UseBarriersForVolatile) { + // we use a normal store and dmb combination + return false; + } + + StoreNode *st = n->as_Store(); + + return st->trailing_membar() != NULL; +} + +// predicate controlling translation of CAS +// +// returns true if CAS needs to use an acquiring load otherwise false + +bool needs_acquiring_load_exclusive(const Node *n) +{ + assert(is_CAS(n->Opcode()), "expecting a compare and swap"); + if (UseBarriersForVolatile) { + return false; + } + + LoadStoreNode* ldst = n->as_LoadStore(); + assert(ldst->trailing_membar() != NULL, "expected trailing membar"); + + // so we can just return true here + return true; +} + +// predicate controlling translation of StoreCM +// +// returns true if a StoreStore must precede the card write otherwise +// false + +bool unnecessary_storestore(const Node *storecm) +{ + assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); + + // we need to generate a dmb ishst between an object put and the + // associated card mark when we are using CMS without conditional + // card marking + + if (UseConcMarkSweepGC && !UseCondCardMark) { + return false; + } + + // a storestore is unnecesary in all other cases + + return true; +} + + +#define __ _masm. + +// advance declaratuons for helper functions to convert register +// indices to register objects + +// the ad file has to provide implementations of certain methods +// expected by the generic code +// +// REQUIRED FUNCTIONALITY + +//============================================================================= + +// !!!!! Special hack to get all types of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. + +int MachCallStaticJavaNode::ret_addr_offset() +{ + // call should be a simple bl + // unless this is a method handle invoke in which case it is + // mov(rfp, sp), bl, mov(sp, rfp) + int off = 4; + if (_method_handle_invoke) { + off += 4; + } + return off; +} + +int MachCallDynamicJavaNode::ret_addr_offset() +{ + return 16; // movz, movk, movk, bl +} + +int MachCallRuntimeNode::ret_addr_offset() { + // for generated stubs the call will be + // bl(addr) + // for real runtime callouts it will be six instructions + // see aarch64_enc_java_to_runtime + // adr(rscratch2, retaddr) + // lea(rscratch1, RuntimeAddress(addr) + // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize))) + // blr(rscratch1) + CodeBlob *cb = CodeCache::find_blob(_entry_point); + if (cb) { + return MacroAssembler::far_branch_size(); + } else { + return 6 * NativeInstruction::instruction_size; + } +} + +// Indicate if the safepoint node needs the polling page as an input + +// the shared code plants the oop data at the start of the generated +// code for the safepoint node and that needs ot be at the load +// instruction itself. so we cannot plant a mov of the safepoint poll +// address followed by a load. setting this to true means the mov is +// scheduled as a prior instruction. that's better for scheduling +// anyway. + +bool SafePointNode::needs_polling_address_input() +{ + return true; +} + +//============================================================================= + +#ifndef PRODUCT +void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + st->print("BREAKPOINT"); +} +#endif + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + __ brk(0); +} + +uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +//============================================================================= + +#ifndef PRODUCT + void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { + st->print("nop \t# %d bytes pad for loops and calls", _count); + } +#endif + + void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { + MacroAssembler _masm(&cbuf); + for (int i = 0; i < _count; i++) { + __ nop(); + } + } + + uint MachNopNode::size(PhaseRegAlloc*) const { + return _count * NativeInstruction::instruction_size; + } + +//============================================================================= +const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; + +int Compile::ConstantTable::calculate_table_base_offset() const { + return 0; // absolute addressing, no offset +} + +bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } +void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { + ShouldNotReachHere(); +} + +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + // Empty encoding +} + +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { + return 0; +} + +#ifndef PRODUCT +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + st->print("-- \t// MachConstantBaseNode (empty encoding)"); +} +#endif + +#ifndef PRODUCT +void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + Compile* C = ra_->C; + + int framesize = C->frame_slots() << LogBytesPerInt; + + if (C->need_stack_bang(framesize)) + st->print("# stack bang size=%d\n\t", framesize); + + if (framesize == 0) { + // Is this even possible? + st->print("stp lr, rfp, [sp, #%d]!", -(2 * wordSize)); + } else if (framesize < ((1 << 9) + 2 * wordSize)) { + st->print("sub sp, sp, #%d\n\t", framesize); + st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize); + } else { + st->print("stp lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize)); + st->print("mov rscratch1, #%d\n\t", framesize - 2 * wordSize); + st->print("sub sp, sp, rscratch1"); + } +} +#endif + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + + // n.b. frame size includes space for return pc and rfp + long framesize = ((long)C->frame_slots()) << LogBytesPerInt; + assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment"); + + // insert a nop at the start of the prolog so we can patch in a + // branch if we need to invalidate the method later + __ nop(); + + if (C->need_stack_bang(framesize)) + __ generate_stack_overflow_check(framesize); + + __ build_frame(framesize); + + if (VerifyStackAtCalls) { + Unimplemented(); + } + + C->set_frame_complete(cbuf.insts_size()); + + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. + Compile::ConstantTable& constant_table = C->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } +} + +uint MachPrologNode::size(PhaseRegAlloc* ra_) const +{ + return MachNode::size(ra_); // too many variables; just compute it + // the hard way +} + +int MachPrologNode::reloc() const +{ + return 0; +} + +//============================================================================= + +#ifndef PRODUCT +void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + Compile* C = ra_->C; + int framesize = C->frame_slots() << LogBytesPerInt; + + st->print("# pop frame %d\n\t",framesize); + + if (framesize == 0) { + st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize)); + } else if (framesize < ((1 << 9) + 2 * wordSize)) { + st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize); + st->print("add sp, sp, #%d\n\t", framesize); + } else { + st->print("mov rscratch1, #%d\n\t", framesize - 2 * wordSize); + st->print("add sp, sp, rscratch1\n\t"); + st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize)); + } + + if (do_polling() && C->is_method_compilation()) { + st->print("# touch polling page\n\t"); + st->print("mov rscratch1, #" INTPTR_FORMAT "\n\t", p2i(os::get_polling_page())); + st->print("ldr zr, [rscratch1]"); + } +} +#endif + +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + int framesize = C->frame_slots() << LogBytesPerInt; + + __ remove_frame(framesize); + + if (do_polling() && C->is_method_compilation()) { + __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type); + } +} + +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { + // Variable size. Determine dynamically. + return MachNode::size(ra_); +} + +int MachEpilogNode::reloc() const { + // Return number of relocatable values contained in this instruction. + return 1; // 1 for polling page. +} + +const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); +} + +// This method seems to be obsolete. It is declared in machnode.hpp +// and defined in all *.ad files, but it is never called. Should we +// get rid of it? +int MachEpilogNode::safepoint_offset() const { + assert(do_polling(), "no return for this epilog node"); + return 4; +} + +//============================================================================= + +// Figure out which register class each belongs in: rc_int, rc_float or +// rc_stack. +enum RC { rc_bad, rc_int, rc_float, rc_stack }; + +static enum RC rc_class(OptoReg::Name reg) { + + if (reg == OptoReg::Bad) { + return rc_bad; + } + + // we have 30 int registers * 2 halves + // (rscratch1 and rscratch2 are omitted) + + if (reg < 60) { + return rc_int; + } + + // we have 32 float register * 2 halves + if (reg < 60 + 128) { + return rc_float; + } + + // Between float regs & stack is the flags regs. + assert(OptoReg::is_stack(reg), "blow up if spilling flags"); + + return rc_stack; +} + +uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const { + Compile* C = ra_->C; + + // Get registers to move. + OptoReg::Name src_hi = ra_->get_reg_second(in(1)); + OptoReg::Name src_lo = ra_->get_reg_first(in(1)); + OptoReg::Name dst_hi = ra_->get_reg_second(this); + OptoReg::Name dst_lo = ra_->get_reg_first(this); + + enum RC src_hi_rc = rc_class(src_hi); + enum RC src_lo_rc = rc_class(src_lo); + enum RC dst_hi_rc = rc_class(dst_hi); + enum RC dst_lo_rc = rc_class(dst_lo); + + assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); + + if (src_hi != OptoReg::Bad) { + assert((src_lo&1)==0 && src_lo+1==src_hi && + (dst_lo&1)==0 && dst_lo+1==dst_hi, + "expected aligned-adjacent pairs"); + } + + if (src_lo == dst_lo && src_hi == dst_hi) { + return 0; // Self copy, no move. + } + + bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi && + (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi; + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + + if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); + assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); + if (cbuf) { + MacroAssembler _masm(cbuf); + assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + // stack->stack + assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset"); + if (ireg == Op_VecD) { + __ unspill(rscratch1, true, src_offset); + __ spill(rscratch1, true, dst_offset); + } else { + __ spill_copy128(src_offset, dst_offset); + } + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { + __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]), + ireg == Op_VecD ? __ T8B : __ T16B, + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { + __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), + ireg == Op_VecD ? __ D : __ Q, + ra_->reg2offset(dst_lo)); + } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { + __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), + ireg == Op_VecD ? __ D : __ Q, + ra_->reg2offset(src_lo)); + } else { + ShouldNotReachHere(); + } + } + } else if (cbuf) { + MacroAssembler _masm(cbuf); + switch (src_lo_rc) { + case rc_int: + if (dst_lo_rc == rc_int) { // gpr --> gpr copy + if (is64) { + __ mov(as_Register(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); + } else { + MacroAssembler _masm(cbuf); + __ movw(as_Register(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); + } + } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy + if (is64) { + __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); + } else { + __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); + } + } else { // gpr --> stack spill + assert(dst_lo_rc == rc_stack, "spill to bad register class"); + __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset); + } + break; + case rc_float: + if (dst_lo_rc == rc_int) { // fpr --> gpr copy + if (is64) { + __ fmovd(as_Register(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else { + __ fmovs(as_Register(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } + } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy + if (cbuf) { + __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else { + __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } + } else { // fpr --> stack spill + assert(dst_lo_rc == rc_stack, "spill to bad register class"); + __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), + is64 ? __ D : __ S, dst_offset); + } + break; + case rc_stack: + if (dst_lo_rc == rc_int) { // stack --> gpr load + __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); + } else if (dst_lo_rc == rc_float) { // stack --> fpr load + __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), + is64 ? __ D : __ S, src_offset); + } else { // stack --> stack copy + assert(dst_lo_rc == rc_stack, "spill to bad register class"); + __ unspill(rscratch1, is64, src_offset); + __ spill(rscratch1, is64, dst_offset); + } + break; + default: + assert(false, "bad rc_class for spill"); + ShouldNotReachHere(); + } + } + + if (st) { + st->print("spill "); + if (src_lo_rc == rc_stack) { + st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo)); + } else { + st->print("%s -> ", Matcher::regName[src_lo]); + } + if (dst_lo_rc == rc_stack) { + st->print("[sp, #%d]", ra_->reg2offset(dst_lo)); + } else { + st->print("%s", Matcher::regName[dst_lo]); + } + if (bottom_type()->isa_vect() != NULL) { + st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128); + } else { + st->print("\t# spill size = %d", is64 ? 64:32); + } + } + + return 0; + +} + +#ifndef PRODUCT +void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + if (!ra_) + st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx); + else + implementation(NULL, ra_, false, st); +} +#endif + +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + implementation(&cbuf, ra_, false, NULL); +} + +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +//============================================================================= + +#ifndef PRODUCT +void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_reg_first(this); + st->print("add %s, rsp, #%d]\t# box lock", + Matcher::regName[reg], offset); +} +#endif + +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_encode(this); + + if (Assembler::operand_valid_for_add_sub_immediate(offset)) { + __ add(as_Register(reg), sp, offset); + } else { + ShouldNotReachHere(); + } +} + +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { + // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_). + return 4; +} + +//============================================================================= + +#ifndef PRODUCT +void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + st->print_cr("# MachUEPNode"); + if (UseCompressedClassPointers) { + st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass"); + if (Universe::narrow_klass_shift() != 0) { + st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1"); + } + } else { + st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass"); + } + st->print_cr("\tcmp r0, rscratch1\t # Inline cache check"); + st->print_cr("\tbne, SharedRuntime::_ic_miss_stub"); +} +#endif + +void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const +{ + // This is the unverified entry point. + MacroAssembler _masm(&cbuf); + + __ cmp_klass(j_rarg0, rscratch2, rscratch1); + Label skip; + // TODO + // can we avoid this skip and still use a reloc? + __ br(Assembler::EQ, skip); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + __ bind(skip); +} + +uint MachUEPNode::size(PhaseRegAlloc* ra_) const +{ + return MachNode::size(ra_); +} + +// REQUIRED EMIT CODE + +//============================================================================= + +// Emit exception handler code. +int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) +{ + // mov rscratch1 #exception_blob_entry_point + // br rscratch1 + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + int offset = __ offset(); + __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); + __ end_a_stub(); + return offset; +} + +// Emit deopt handler code. +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) +{ + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + int offset = __ offset(); + + __ adr(lr, __ pc()); + __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); + + assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); + __ end_a_stub(); + return offset; +} + +// REQUIRED MATCHER CODE + +//============================================================================= + +const bool Matcher::match_rule_supported(int opcode) { + + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_StrEquals and other intrinsics + if (!has_match_rule(opcode)) { + return false; + } + + return true; // Per default match rules are supported. +} + +int Matcher::regnum_to_fpu_offset(int regnum) +{ + Unimplemented(); + return 0; +} + +// Is this branch offset short enough that a short branch can be used? +// +// NOTE: If the platform does not provide any short branch variants, then +// this method should return false for offset 0. +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + // The passed offset is relative to address of the branch. + + return (-32768 <= offset && offset < 32768); +} + +const bool Matcher::isSimpleConstant64(jlong value) { + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. + // Probably always true, even if a temp register is required. + return true; +} + +// true just means we have fast l2f conversion +const bool Matcher::convL2FSupported(void) { + return true; +} + +// Vector width in bytes. +const int Matcher::vector_width_in_bytes(BasicType bt) { + int size = MIN2(16,(int)MaxVectorSize); + // Minimum 2 values in vector + if (size < 2*type2aelembytes(bt)) size = 0; + // But never < 4 + if (size < 4) size = 0; + return size; +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} +const int Matcher::min_vector_size(const BasicType bt) { +// For the moment limit the vector size to 8 bytes + int size = 8 / type2aelembytes(bt); + if (size < 2) size = 2; + return size; +} + +// Vector ideal reg. +const uint Matcher::vector_ideal_reg(int len) { + switch(len) { + case 8: return Op_VecD; + case 16: return Op_VecX; + } + ShouldNotReachHere(); + return 0; +} + +const uint Matcher::vector_shift_count_ideal_reg(int size) { + switch(size) { + case 8: return Op_VecD; + case 16: return Op_VecX; + } + ShouldNotReachHere(); + return 0; +} + +// AES support not yet implemented +const bool Matcher::pass_original_key_for_aes() { + return false; +} + +// x86 supports misaligned vectors store/load. +const bool Matcher::misaligned_vectors_ok() { + return !AlignVector; // can be changed by flag +} + +// false => size gets scaled to BytesPerLong, ok. +const bool Matcher::init_array_count_is_in_bytes = false; + +// Threshold size for cleararray. +const int Matcher::init_array_short_size = 4 * BytesPerLong; + +// Use conditional move (CMOVL) +const int Matcher::long_cmove_cost() { + // long cmoves are no more expensive than int cmoves + return 0; +} + +const int Matcher::float_cmove_cost() { + // float cmoves are no more expensive than int cmoves + return 0; +} + +// Does the CPU require late expand (see block.cpp for description of late expand)? +const bool Matcher::require_postalloc_expand = false; + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? True for Intel but false for most RISCs +const bool Matcher::clone_shift_expressions = false; + +// Do we need to mask the count passed to shift instructions or does +// the cpu only look at the lower 5/6 bits anyway? +const bool Matcher::need_masked_shift_count = false; + +// This affects two different things: +// - how Decode nodes are matched +// - how ImplicitNullCheck opportunities are recognized +// If true, the matcher will try to remove all Decodes and match them +// (as operands) into nodes. NullChecks are not prepared to deal with +// Decodes by final_graph_reshaping(). +// If false, final_graph_reshaping() forces the decode behind the Cmp +// for a NullCheck. The matcher matches the Decode node into a register. +// Implicit_null_check optimization moves the Decode along with the +// memory operation back up before the NullCheck. +bool Matcher::narrow_oop_use_complex_address() { + return Universe::narrow_oop_shift() == 0; +} + +bool Matcher::narrow_klass_use_complex_address() { +// TODO +// decide whether we need to set this to true + return false; +} + +// Is it better to copy float constants, or load them directly from +// memory? Intel can load a float constant from a direct address, +// requiring no extra registers. Most RISCs will have to materialize +// an address into a register first, so they would do better to copy +// the constant from stack. +const bool Matcher::rematerialize_float_constants = false; + +// If CPU can load and store mis-aligned doubles directly then no +// fixup is needed. Else we split the double into 2 integer pieces +// and move it piece-by-piece. Only happens when passing doubles into +// C code as the Java calling convention forces doubles to be aligned. +const bool Matcher::misaligned_doubles_ok = true; + +// No-op on amd64 +void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { + Unimplemented(); +} + +// Advertise here if the CPU requires explicit rounding operations to +// implement the UseStrictFP mode. +const bool Matcher::strict_fp_requires_explicit_rounding = false; + +// Are floats converted to double when stored to stack during +// deoptimization? +bool Matcher::float_in_double() { return true; } + +// Do ints take an entire long register or just half? +// The relevant question is how the int is callee-saved: +// the whole long is written but de-opt'ing will have to extract +// the relevant 32 bits. +const bool Matcher::int_in_long = true; + +// Return whether or not this register is ever used as an argument. +// This function is used on startup to build the trampoline stubs in +// generateOptoStub. Registers not mentioned will be killed by the VM +// call in the trampoline, and arguments in those registers not be +// available to the callee. +bool Matcher::can_be_java_arg(int reg) +{ + return + reg == R0_num || reg == R0_H_num || + reg == R1_num || reg == R1_H_num || + reg == R2_num || reg == R2_H_num || + reg == R3_num || reg == R3_H_num || + reg == R4_num || reg == R4_H_num || + reg == R5_num || reg == R5_H_num || + reg == R6_num || reg == R6_H_num || + reg == R7_num || reg == R7_H_num || + reg == V0_num || reg == V0_H_num || + reg == V1_num || reg == V1_H_num || + reg == V2_num || reg == V2_H_num || + reg == V3_num || reg == V3_H_num || + reg == V4_num || reg == V4_H_num || + reg == V5_num || reg == V5_H_num || + reg == V6_num || reg == V6_H_num || + reg == V7_num || reg == V7_H_num; +} + +bool Matcher::is_spillable_arg(int reg) +{ + return can_be_java_arg(reg); +} + +bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { + return false; +} + +RegMask Matcher::divI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODI projection of divmodI. +RegMask Matcher::modI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for DIVL projection of divmodL. +RegMask Matcher::divL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODL projection of divmodL. +RegMask Matcher::modL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +const RegMask Matcher::method_handle_invoke_SP_save_mask() { + return FP_REG_mask(); +} + + +#define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN) \ + MacroAssembler _masm(&cbuf); \ + { \ + guarantee(INDEX == -1, "mode not permitted for volatile"); \ + guarantee(DISP == 0, "mode not permitted for volatile"); \ + guarantee(SCALE == 0, "mode not permitted for volatile"); \ + __ INSN(REG, as_Register(BASE)); \ + } + +typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr); +typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr); +typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, + MacroAssembler::SIMD_RegVariant T, const Address &adr); + + // Used for all non-volatile memory accesses. The use of + // $mem->opcode() to discover whether this pattern uses sign-extended + // offsets is something of a kludge. + static void loadStore(MacroAssembler masm, mem_insn insn, + Register reg, int opcode, + Register base, int index, int size, int disp) + { + Address::extend scale; + + // Hooboy, this is fugly. We need a way to communicate to the + // encoder that the index needs to be sign extended, so we have to + // enumerate all the cases. + switch (opcode) { + case INDINDEXSCALEDOFFSETI2L: + case INDINDEXSCALEDI2L: + case INDINDEXSCALEDOFFSETI2LN: + case INDINDEXSCALEDI2LN: + case INDINDEXOFFSETI2L: + case INDINDEXOFFSETI2LN: + scale = Address::sxtw(size); + break; + default: + scale = Address::lsl(size); + } + + if (index == -1) { + (masm.*insn)(reg, Address(base, disp)); + } else { + if (disp == 0) { + (masm.*insn)(reg, Address(base, as_Register(index), scale)); + } else { + masm.lea(rscratch1, Address(base, disp)); + (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale)); + } + } + } + + static void loadStore(MacroAssembler masm, mem_float_insn insn, + FloatRegister reg, int opcode, + Register base, int index, int size, int disp) + { + Address::extend scale; + + switch (opcode) { + case INDINDEXSCALEDOFFSETI2L: + case INDINDEXSCALEDI2L: + case INDINDEXSCALEDOFFSETI2LN: + case INDINDEXSCALEDI2LN: + scale = Address::sxtw(size); + break; + default: + scale = Address::lsl(size); + } + + if (index == -1) { + (masm.*insn)(reg, Address(base, disp)); + } else { + if (disp == 0) { + (masm.*insn)(reg, Address(base, as_Register(index), scale)); + } else { + masm.lea(rscratch1, Address(base, disp)); + (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale)); + } + } + } + + static void loadStore(MacroAssembler masm, mem_vector_insn insn, + FloatRegister reg, MacroAssembler::SIMD_RegVariant T, + int opcode, Register base, int index, int size, int disp) + { + if (index == -1) { + (masm.*insn)(reg, T, Address(base, disp)); + } else { + assert(disp == 0, "unsupported address mode"); + (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size))); + } + } + +%} + + + +//----------ENCODING BLOCK----------------------------------------------------- +// This block specifies the encoding classes used by the compiler to +// output byte streams. Encoding classes are parameterized macros +// used by Machine Instruction Nodes in order to generate the bit +// encoding of the instruction. Operands specify their base encoding +// interface with the interface keyword. There are currently +// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & +// COND_INTER. REG_INTER causes an operand to generate a function +// which returns its register number when queried. CONST_INTER causes +// an operand to generate a function which returns the value of the +// constant when queried. MEMORY_INTER causes an operand to generate +// four functions which return the Base Register, the Index Register, +// the Scale Value, and the Offset Value of the operand when queried. +// COND_INTER causes an operand to generate six functions which return +// the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional +// instruction. +// +// Instructions specify two basic values for encoding. Again, a +// function is available to check if the constant displacement is an +// oop. They use the ins_encode keyword to specify their encoding +// classes (which must be a sequence of enc_class names, and their +// parameters, specified in the encoding block), and they use the +// opcode keyword to specify, in order, their primary, secondary, and +// tertiary opcode. Only the opcode sections which a particular +// instruction needs for encoding need to be specified. +encode %{ + // Build emit functions for each basic byte or larger field in the + // intel encoding scheme (opcode, rm, sib, immediate), and call them + // from C++ code in the enc_class source block. Emit functions will + // live in the main source block for now. In future, we can + // generalize this by adding a syntax that specifies the sizes of + // fields in an order, so that the adlc can build the emit functions + // automagically + + // catch all for unimplemented encodings + enc_class enc_unimplemented %{ + MacroAssembler _masm(&cbuf); + __ unimplemented("C2 catch all"); + %} + + // BEGIN Non-volatile memory access + + enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strb(iRegI src, memory mem) %{ + Register src_reg = as_Register($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strb0(memory mem) %{ + MacroAssembler _masm(&cbuf); + loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strb0_ordered(memory mem) %{ + MacroAssembler _masm(&cbuf); + __ membar(Assembler::StoreStore); + loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strh(iRegI src, memory mem) %{ + Register src_reg = as_Register($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strh0(memory mem) %{ + MacroAssembler _masm(&cbuf); + loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strw(iRegI src, memory mem) %{ + Register src_reg = as_Register($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strw0(memory mem) %{ + MacroAssembler _masm(&cbuf); + loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_str(iRegL src, memory mem) %{ + Register src_reg = as_Register($src$$reg); + // we sometimes get asked to store the stack pointer into the + // current thread -- we cannot do that directly on AArch64 + if (src_reg == r31_sp) { + MacroAssembler _masm(&cbuf); + assert(as_Register($mem$$base) == rthread, "unexpected store for sp"); + __ mov(rscratch2, sp); + src_reg = rscratch2; + } + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_str0(memory mem) %{ + MacroAssembler _masm(&cbuf); + loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strs(vRegF src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strd(vRegD src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strvS(vecD src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strvD(vecD src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strvQ(vecX src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + // END Non-volatile memory access + + // this encoding writes the address of the first instruction in the + // call sequence for the runtime call into the anchor pc slot. this + // address allows the runtime to i) locate the code buffer for the + // caller (any address in the buffer would do) and ii) find the oop + // map associated with the call (has to address the instruction + // following the call). note that we have to store the address which + // follows the actual call. + // + // the offset from the current pc can be computed by considering + // what gets generated between this point up to and including the + // call. it looks like this + // + // movz xscratch1 0xnnnn <-- current pc is here + // movk xscratch1 0xnnnn + // movk xscratch1 0xnnnn + // str xscratch1, [xthread,#anchor_pc_off] + // mov xscratch2, sp + // str xscratch2, [xthread,#anchor_sp_off + // mov x0, x1 + // . . . + // mov xn-1, xn + // mov xn, thread <-- always passed + // mov xn+1, rfp <-- optional iff primary == 1 + // movz xscratch1 0xnnnn + // movk xscratch1 0xnnnn + // movk xscratch1 0xnnnn + // blr xscratch1 + // . . . + // + // where the called routine has n args (including the thread and, + // possibly the stub's caller return address currently in rfp). we + // can compute n by looking at the number of args passed into the + // stub. we assert that nargs is < 7. + // + // so the offset we need to add to the pc (in 32-bit words) is + // 3 + <-- load 48-bit constant return pc + // 1 + <-- write anchor pc + // 1 + <-- copy sp + // 1 + <-- write anchor sp + // nargs + <-- java stub arg count + // 1 + <-- extra thread arg + // [ 1 + ] <-- optional ret address of stub caller + // 3 + <-- load 64 bit call target address + // 1 <-- blr instruction + // + // i.e we need to add (nargs + 11) * 4 bytes or (nargs + 12) * 4 bytes + // + + enc_class aarch64_enc_save_pc() %{ + Compile* C = ra_->C; + int nargs = C->tf()->domain()->cnt() - TypeFunc::Parms; + if ($primary) { nargs++; } + assert(nargs <= 8, "opto runtime stub has more than 8 args!"); + MacroAssembler _masm(&cbuf); + address pc = __ pc(); + int call_offset = (nargs + 11) * 4; + int field_offset = in_bytes(JavaThread::frame_anchor_offset()) + + in_bytes(JavaFrameAnchor::last_Java_pc_offset()); + __ lea(rscratch1, InternalAddress(pc + call_offset)); + __ str(rscratch1, Address(rthread, field_offset)); + %} + + // volatile loads and stores + + enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{ + MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, stlrb); + if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) + __ dmb(__ ISH); + %} + + enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{ + MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, stlrh); + if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) + __ dmb(__ ISH); + %} + + enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{ + MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, stlrw); + if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) + __ dmb(__ ISH); + %} + + + enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldarb); + __ sxtbw(dst_reg, dst_reg); + %} + + enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldarb); + __ sxtb(dst_reg, dst_reg); + %} + + enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{ + MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldarb); + %} + + enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{ + MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldarb); + %} + + enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldarh); + __ sxthw(dst_reg, dst_reg); + %} + + enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{ + Register dst_reg = as_Register($dst$$reg); + MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldarh); + __ sxth(dst_reg, dst_reg); + %} + + enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{ + MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldarh); + %} + + enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{ + MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldarh); + %} + + enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{ + MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldarw); + %} + + enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{ + MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldarw); + %} + + enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{ + MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldar); + %} + + enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{ + MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldarw); + __ fmovs(as_FloatRegister($dst$$reg), rscratch1); + %} + + enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{ + MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, ldar); + __ fmovd(as_FloatRegister($dst$$reg), rscratch1); + %} + + enc_class aarch64_enc_stlr(iRegL src, memory mem) %{ + Register src_reg = as_Register($src$$reg); + // we sometimes get asked to store the stack pointer into the + // current thread -- we cannot do that directly on AArch64 + if (src_reg == r31_sp) { + MacroAssembler _masm(&cbuf); + assert(as_Register($mem$$base) == rthread, "unexpected store for sp"); + __ mov(rscratch2, sp); + src_reg = rscratch2; + } + MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, stlr); + if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) + __ dmb(__ ISH); + %} + + enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{ + { + MacroAssembler _masm(&cbuf); + FloatRegister src_reg = as_FloatRegister($src$$reg); + __ fmovs(rscratch2, src_reg); + } + MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, stlrw); + if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) + __ dmb(__ ISH); + %} + + enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{ + { + MacroAssembler _masm(&cbuf); + FloatRegister src_reg = as_FloatRegister($src$$reg); + __ fmovd(rscratch2, src_reg); + } + MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, + rscratch1, stlr); + if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) + __ dmb(__ ISH); + %} + + // synchronized read/update encodings + + enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register base = as_Register($mem$$base); + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + if (index == -1) { + if (disp != 0) { + __ lea(rscratch1, Address(base, disp)); + __ ldaxr(dst_reg, rscratch1); + } else { + // TODO + // should we ever get anything other than this case? + __ ldaxr(dst_reg, base); + } + } else { + Register index_reg = as_Register(index); + if (disp == 0) { + __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale))); + __ ldaxr(dst_reg, rscratch1); + } else { + __ lea(rscratch1, Address(base, disp)); + __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale))); + __ ldaxr(dst_reg, rscratch1); + } + } + %} + + enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{ + MacroAssembler _masm(&cbuf); + Register src_reg = as_Register($src$$reg); + Register base = as_Register($mem$$base); + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + if (index == -1) { + if (disp != 0) { + __ lea(rscratch2, Address(base, disp)); + __ stlxr(rscratch1, src_reg, rscratch2); + } else { + // TODO + // should we ever get anything other than this case? + __ stlxr(rscratch1, src_reg, base); + } + } else { + Register index_reg = as_Register(index); + if (disp == 0) { + __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale))); + __ stlxr(rscratch1, src_reg, rscratch2); + } else { + __ lea(rscratch2, Address(base, disp)); + __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale))); + __ stlxr(rscratch1, src_reg, rscratch2); + } + } + __ cmpw(rscratch1, zr); + %} + + enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ + MacroAssembler _masm(&cbuf); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, + Assembler::xword, /*acquire*/ false, /*release*/ true); + %} + + enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{ + MacroAssembler _masm(&cbuf); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, + Assembler::word, /*acquire*/ false, /*release*/ true); + %} + + + // The only difference between aarch64_enc_cmpxchg and + // aarch64_enc_cmpxchg_acq is that we use load-acquire in the + // CompareAndSwap sequence to serve as a barrier on acquiring a + // lock. + enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ + MacroAssembler _masm(&cbuf); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, + Assembler::xword, /*acquire*/ true, /*release*/ true); + %} + + enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{ + MacroAssembler _masm(&cbuf); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, + Assembler::word, /*acquire*/ true, /*release*/ true); + %} + + // auxiliary used for CompareAndSwapX to set result register + enc_class aarch64_enc_cset_eq(iRegINoSp res) %{ + MacroAssembler _masm(&cbuf); + Register res_reg = as_Register($res$$reg); + __ cset(res_reg, Assembler::EQ); + %} + + // prefetch encodings + + enc_class aarch64_enc_prefetchr(memory mem) %{ + MacroAssembler _masm(&cbuf); + Register base = as_Register($mem$$base); + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + if (index == -1) { + __ prfm(Address(base, disp), PLDL1KEEP); + } else { + Register index_reg = as_Register(index); + if (disp == 0) { + __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP); + } else { + __ lea(rscratch1, Address(base, disp)); + __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PLDL1KEEP); + } + } + %} + + enc_class aarch64_enc_prefetchw(memory mem) %{ + MacroAssembler _masm(&cbuf); + Register base = as_Register($mem$$base); + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + if (index == -1) { + __ prfm(Address(base, disp), PSTL1KEEP); + } else { + Register index_reg = as_Register(index); + if (disp == 0) { + __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP); + } else { + __ lea(rscratch1, Address(base, disp)); + __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP); + } + } + %} + + enc_class aarch64_enc_prefetchnta(memory mem) %{ + MacroAssembler _masm(&cbuf); + Register base = as_Register($mem$$base); + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + if (index == -1) { + __ prfm(Address(base, disp), PSTL1STRM); + } else { + Register index_reg = as_Register(index); + if (disp == 0) { + __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1STRM); + __ nop(); + } else { + __ lea(rscratch1, Address(base, disp)); + __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1STRM); + } + } + %} + + /// mov envcodings + + enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{ + MacroAssembler _masm(&cbuf); + u_int32_t con = (u_int32_t)$src$$constant; + Register dst_reg = as_Register($dst$$reg); + if (con == 0) { + __ movw(dst_reg, zr); + } else { + __ movw(dst_reg, con); + } + %} + + enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + u_int64_t con = (u_int64_t)$src$$constant; + if (con == 0) { + __ mov(dst_reg, zr); + } else { + __ mov(dst_reg, con); + } + %} + + enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL || con == (address)1) { + ShouldNotReachHere(); + } else { + relocInfo::relocType rtype = $src->constant_reloc(); + if (rtype == relocInfo::oop_type) { + __ movoop(dst_reg, (jobject)con, /*immediate*/true); + } else if (rtype == relocInfo::metadata_type) { + __ mov_metadata(dst_reg, (Metadata*)con); + } else { + assert(rtype == relocInfo::none, "unexpected reloc type"); + if (con < (address)(uintptr_t)os::vm_page_size()) { + __ mov(dst_reg, con); + } else { + unsigned long offset; + __ adrp(dst_reg, con, offset); + __ add(dst_reg, dst_reg, offset); + } + } + } + %} + + enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + __ mov(dst_reg, zr); + %} + + enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + __ mov(dst_reg, (u_int64_t)1); + %} + + enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{ + MacroAssembler _masm(&cbuf); + address page = (address)$src$$constant; + Register dst_reg = as_Register($dst$$reg); + unsigned long off; + __ adrp(dst_reg, Address(page, relocInfo::poll_type), off); + assert(off == 0, "assumed offset == 0"); + %} + + enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{ + MacroAssembler _masm(&cbuf); + __ load_byte_map_base($dst$$Register); + %} + + enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL) { + ShouldNotReachHere(); + } else { + relocInfo::relocType rtype = $src->constant_reloc(); + assert(rtype == relocInfo::oop_type, "unexpected reloc type"); + __ set_narrow_oop(dst_reg, (jobject)con); + } + %} + + enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + __ mov(dst_reg, zr); + %} + + enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL) { + ShouldNotReachHere(); + } else { + relocInfo::relocType rtype = $src->constant_reloc(); + assert(rtype == relocInfo::metadata_type, "unexpected reloc type"); + __ set_narrow_klass(dst_reg, (Klass *)con); + } + %} + + // arithmetic encodings + + enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src_reg = as_Register($src1$$reg); + int32_t con = (int32_t)$src2$$constant; + // add has primary == 0, subtract has primary == 1 + if ($primary) { con = -con; } + if (con < 0) { + __ subw(dst_reg, src_reg, -con); + } else { + __ addw(dst_reg, src_reg, con); + } + %} + + enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src_reg = as_Register($src1$$reg); + int32_t con = (int32_t)$src2$$constant; + // add has primary == 0, subtract has primary == 1 + if ($primary) { con = -con; } + if (con < 0) { + __ sub(dst_reg, src_reg, -con); + } else { + __ add(dst_reg, src_reg, con); + } + %} + + enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1); + %} + + enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1); + %} + + enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1); + %} + + enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ + MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1); + %} + + // compare instruction encodings + + enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{ + MacroAssembler _masm(&cbuf); + Register reg1 = as_Register($src1$$reg); + Register reg2 = as_Register($src2$$reg); + __ cmpw(reg1, reg2); + %} + + enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{ + MacroAssembler _masm(&cbuf); + Register reg = as_Register($src1$$reg); + int32_t val = $src2$$constant; + if (val >= 0) { + __ subsw(zr, reg, val); + } else { + __ addsw(zr, reg, -val); + } + %} + + enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{ + MacroAssembler _masm(&cbuf); + Register reg1 = as_Register($src1$$reg); + u_int32_t val = (u_int32_t)$src2$$constant; + __ movw(rscratch1, val); + __ cmpw(reg1, rscratch1); + %} + + enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{ + MacroAssembler _masm(&cbuf); + Register reg1 = as_Register($src1$$reg); + Register reg2 = as_Register($src2$$reg); + __ cmp(reg1, reg2); + %} + + enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{ + MacroAssembler _masm(&cbuf); + Register reg = as_Register($src1$$reg); + int64_t val = $src2$$constant; + if (val >= 0) { + __ subs(zr, reg, val); + } else if (val != -val) { + __ adds(zr, reg, -val); + } else { + // aargh, Long.MIN_VALUE is a special case + __ orr(rscratch1, zr, (u_int64_t)val); + __ subs(zr, reg, rscratch1); + } + %} + + enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{ + MacroAssembler _masm(&cbuf); + Register reg1 = as_Register($src1$$reg); + u_int64_t val = (u_int64_t)$src2$$constant; + __ mov(rscratch1, val); + __ cmp(reg1, rscratch1); + %} + + enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{ + MacroAssembler _masm(&cbuf); + Register reg1 = as_Register($src1$$reg); + Register reg2 = as_Register($src2$$reg); + __ cmp(reg1, reg2); + %} + + enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{ + MacroAssembler _masm(&cbuf); + Register reg1 = as_Register($src1$$reg); + Register reg2 = as_Register($src2$$reg); + __ cmpw(reg1, reg2); + %} + + enc_class aarch64_enc_testp(iRegP src) %{ + MacroAssembler _masm(&cbuf); + Register reg = as_Register($src$$reg); + __ cmp(reg, zr); + %} + + enc_class aarch64_enc_testn(iRegN src) %{ + MacroAssembler _masm(&cbuf); + Register reg = as_Register($src$$reg); + __ cmpw(reg, zr); + %} + + enc_class aarch64_enc_b(label lbl) %{ + MacroAssembler _masm(&cbuf); + Label *L = $lbl$$label; + __ b(*L); + %} + + enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{ + MacroAssembler _masm(&cbuf); + Label *L = $lbl$$label; + __ br ((Assembler::Condition)$cmp$$cmpcode, *L); + %} + + enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{ + MacroAssembler _masm(&cbuf); + Label *L = $lbl$$label; + __ br ((Assembler::Condition)$cmp$$cmpcode, *L); + %} + + enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) + %{ + Register sub_reg = as_Register($sub$$reg); + Register super_reg = as_Register($super$$reg); + Register temp_reg = as_Register($temp$$reg); + Register result_reg = as_Register($result$$reg); + + Label miss; + MacroAssembler _masm(&cbuf); + __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg, + NULL, &miss, + /*set_cond_codes:*/ true); + if ($primary) { + __ mov(result_reg, zr); + } + __ bind(miss); + %} + + enc_class aarch64_enc_java_static_call(method meth) %{ + MacroAssembler _masm(&cbuf); + + address mark = __ pc(); + address addr = (address)$meth$$method; + address call; + if (!_method) { + // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. + call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf); + } else if (_optimized_virtual) { + call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf); + } else { + call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf); + } + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + + if (_method) { + // Emit stub for static call + address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } + %} + + enc_class aarch64_enc_java_handle_call(method meth) %{ + MacroAssembler _masm(&cbuf); + relocInfo::relocType reloc; + + // RFP is preserved across all calls, even compiled calls. + // Use it to preserve SP. + __ mov(rfp, sp); + + address mark = __ pc(); + address addr = (address)$meth$$method; + address call; + if (!_method) { + // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. + call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf); + } else if (_optimized_virtual) { + call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf); + } else { + call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf); + } + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + + if (_method) { + // Emit stub for static call + address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } + + // now restore sp + __ mov(sp, rfp); + %} + + enc_class aarch64_enc_java_dynamic_call(method meth) %{ + MacroAssembler _masm(&cbuf); + address call = __ ic_call((address)$meth$$method); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + %} + + enc_class aarch64_enc_call_epilog() %{ + MacroAssembler _masm(&cbuf); + if (VerifyStackAtCalls) { + // Check that stack depth is unchanged: find majik cookie on stack + __ call_Unimplemented(); + } + %} + + enc_class aarch64_enc_java_to_runtime(method meth) %{ + MacroAssembler _masm(&cbuf); + + // some calls to generated routines (arraycopy code) are scheduled + // by C2 as runtime calls. if so we can call them using a br (they + // will be in a reachable segment) otherwise we have to use a blr + // which loads the absolute address into a register. + address entry = (address)$meth$$method; + CodeBlob *cb = CodeCache::find_blob(entry); + if (cb) { + address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type)); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } else { + Label retaddr; + __ adr(rscratch2, retaddr); + __ lea(rscratch1, RuntimeAddress(entry)); + // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc() + __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize))); + __ blr(rscratch1); + __ bind(retaddr); + __ add(sp, sp, 2 * wordSize); + } + %} + + enc_class aarch64_enc_rethrow() %{ + MacroAssembler _masm(&cbuf); + __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); + %} + + enc_class aarch64_enc_ret() %{ + MacroAssembler _masm(&cbuf); + __ ret(lr); + %} + + enc_class aarch64_enc_tail_call(iRegP jump_target) %{ + MacroAssembler _masm(&cbuf); + Register target_reg = as_Register($jump_target$$reg); + __ br(target_reg); + %} + + enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{ + MacroAssembler _masm(&cbuf); + Register target_reg = as_Register($jump_target$$reg); + // exception oop should be in r0 + // ret addr has been popped into lr + // callee expects it in r3 + __ mov(r3, lr); + __ br(target_reg); + %} + + enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ + MacroAssembler _masm(&cbuf); + Register oop = as_Register($object$$reg); + Register box = as_Register($box$$reg); + Register disp_hdr = as_Register($tmp$$reg); + Register tmp = as_Register($tmp2$$reg); + Label cont; + Label object_has_monitor; + Label cas_failed; + + assert_different_registers(oop, box, tmp, disp_hdr); + + // Load markOop from object into displaced_header. + __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + + // Always do locking in runtime. + if (EmitSync & 0x01) { + __ cmp(oop, zr); + return; + } + + if (UseBiasedLocking && !UseOptoBiasInlining) { + __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont); + } + + // Handle existing monitor + if ((EmitSync & 0x02) == 0) { + __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor); + } + + // Set tmp to be (markOop of object | UNLOCK_VALUE). + __ orr(tmp, disp_hdr, markOopDesc::unlocked_value); + + // Load Compare Value application register. + + // Initialize the box. (Must happen before we update the object mark!) + __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + + // Compare object markOop with an unlocked value (tmp) and if + // equal exchange the stack address of our box with object markOop. + // On failure disp_hdr contains the possibly locked markOop. + if (UseLSE) { + __ mov(disp_hdr, tmp); + __ casal(Assembler::xword, disp_hdr, box, oop); // Updates disp_hdr + __ cmp(tmp, disp_hdr); + __ br(Assembler::EQ, cont); + } else { + Label retry_load; + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + __ prfm(Address(oop), PSTL1STRM); + __ bind(retry_load); + __ ldaxr(disp_hdr, oop); + __ cmp(tmp, disp_hdr); + __ br(Assembler::NE, cas_failed); + // use stlxr to ensure update is immediately visible + __ stlxr(disp_hdr, box, oop); + __ cbzw(disp_hdr, cont); + __ b(retry_load); + } + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // If the compare-and-exchange succeeded, then we found an unlocked + // object, will have now locked it will continue at label cont + + __ bind(cas_failed); + // We did not see an unlocked object so try the fast recursive case. + + // Check if the owner is self by comparing the value in the + // markOop of object (disp_hdr) with the stack pointer. + __ mov(rscratch1, sp); + __ sub(disp_hdr, disp_hdr, rscratch1); + __ mov(tmp, (address) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); + // If condition is true we are cont and hence we can store 0 as the + // displaced header in the box, which indicates that it is a recursive lock. + __ ands(tmp/*==0?*/, disp_hdr, tmp); + __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); + + // Handle existing monitor. + if ((EmitSync & 0x02) == 0) { + __ b(cont); + + __ bind(object_has_monitor); + // The object's monitor m is unlocked iff m->owner == NULL, + // otherwise m->owner may contain a thread or a stack address. + // + // Try to CAS m->owner from NULL to current thread. + __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value)); + __ mov(disp_hdr, zr); + + if (UseLSE) { + __ mov(rscratch1, disp_hdr); + __ casal(Assembler::xword, rscratch1, rthread, tmp); + __ cmp(rscratch1, disp_hdr); + } else { + Label retry_load, fail; + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + __ prfm(Address(tmp), PSTL1STRM); + __ bind(retry_load); + __ ldaxr(rscratch1, tmp); + __ cmp(disp_hdr, rscratch1); + __ br(Assembler::NE, fail); + // use stlxr to ensure update is immediately visible + __ stlxr(rscratch1, rthread, tmp); + __ cbnzw(rscratch1, retry_load); + __ bind(fail); + } + + // Store a non-null value into the box to avoid looking like a re-entrant + // lock. The fast-path monitor unlock code checks for + // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the + // relevant bit set, and also matches ObjectSynchronizer::slow_enter. + __ mov(tmp, (address)markOopDesc::unused_mark()); + __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + } + + __ bind(cont); + // flag == EQ indicates success + // flag == NE indicates failure + %} + + enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ + MacroAssembler _masm(&cbuf); + Register oop = as_Register($object$$reg); + Register box = as_Register($box$$reg); + Register disp_hdr = as_Register($tmp$$reg); + Register tmp = as_Register($tmp2$$reg); + Label cont; + Label object_has_monitor; + + assert_different_registers(oop, box, tmp, disp_hdr); + + // Always do locking in runtime. + if (EmitSync & 0x01) { + __ cmp(oop, zr); // Oop can't be 0 here => always false. + return; + } + + if (UseBiasedLocking && !UseOptoBiasInlining) { + __ biased_locking_exit(oop, tmp, cont); + } + + // Find the lock address and load the displaced header from the stack. + __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + + // If the displaced header is 0, we have a recursive unlock. + __ cmp(disp_hdr, zr); + __ br(Assembler::EQ, cont); + + // Handle existing monitor. + if ((EmitSync & 0x02) == 0) { + __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); + __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor); + } + + // Check if it is still a light weight lock, this is is true if we + // see the stack address of the basicLock in the markOop of the + // object. + + if (UseLSE) { + __ mov(tmp, box); + __ casl(Assembler::xword, tmp, disp_hdr, oop); + __ cmp(tmp, box); + __ b(cont); + } else { + Label retry_load; + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + __ prfm(Address(oop), PSTL1STRM); + __ bind(retry_load); + __ ldxr(tmp, oop); + __ cmp(box, tmp); + __ br(Assembler::NE, cont); + // use stlxr to ensure update is immediately visible + __ stlxr(tmp, disp_hdr, oop); + __ cbzw(tmp, cont); + __ b(retry_load); + } + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // Handle existing monitor. + if ((EmitSync & 0x02) == 0) { + __ bind(object_has_monitor); + __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor + __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); + __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); + __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner. + __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions + __ cmp(rscratch1, zr); + __ br(Assembler::NE, cont); + + __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); + __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); + __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0. + __ cmp(rscratch1, zr); + __ br(Assembler::NE, cont); + // need a release store here + __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); + __ stlr(zr, tmp); // set unowned + } + + __ bind(cont); + // flag == EQ indicates success + // flag == NE indicates failure + %} + +%} + +//----------FRAME-------------------------------------------------------------- +// Definition of frame structure and management information. +// +// S T A C K L A Y O U T Allocators stack-slot number +// | (to get allocators register number +// G Owned by | | v add OptoReg::stack0()) +// r CALLER | | +// o | +--------+ pad to even-align allocators stack-slot +// w V | pad0 | numbers; owned by CALLER +// t -----------+--------+----> Matcher::_in_arg_limit, unaligned +// h ^ | in | 5 +// | | args | 4 Holes in incoming args owned by SELF +// | | | | 3 +// | | +--------+ +// V | | old out| Empty on Intel, window on Sparc +// | old |preserve| Must be even aligned. +// | SP-+--------+----> Matcher::_old_SP, even aligned +// | | in | 3 area for Intel ret address +// Owned by |preserve| Empty on Sparc. +// SELF +--------+ +// | | pad2 | 2 pad to align old SP +// | +--------+ 1 +// | | locks | 0 +// | +--------+----> OptoReg::stack0(), even aligned +// | | pad1 | 11 pad to align new SP +// | +--------+ +// | | | 10 +// | | spills | 9 spills +// V | | 8 (pad0 slot for callee) +// -----------+--------+----> Matcher::_out_arg_limit, unaligned +// ^ | out | 7 +// | | args | 6 Holes in outgoing args owned by CALLEE +// Owned by +--------+ +// CALLEE | new out| 6 Empty on Intel, window on Sparc +// | new |preserve| Must be even-aligned. +// | SP-+--------+----> Matcher::_new_SP, even aligned +// | | | +// +// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is +// known from SELF's arguments and the Java calling convention. +// Region 6-7 is determined per call site. +// Note 2: If the calling convention leaves holes in the incoming argument +// area, those holes are owned by SELF. Holes in the outgoing area +// are owned by the CALLEE. Holes should not be nessecary in the +// incoming area, as the Java calling convention is completely under +// the control of the AD file. Doubles can be sorted and packed to +// avoid holes. Holes in the outgoing arguments may be nessecary for +// varargs C calling conventions. +// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is +// even aligned with pad0 as needed. +// Region 6 is even aligned. Region 6-7 is NOT even aligned; +// (the latter is true on Intel but is it false on AArch64?) +// region 6-11 is even aligned; it may be padded out more so that +// the region from SP to FP meets the minimum stack alignment. +// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack +// alignment. Region 11, pad1, may be dynamically extended so that +// SP meets the minimum alignment. + +frame %{ + // What direction does stack grow in (assumed to be same for C & Java) + stack_direction(TOWARDS_LOW); + + // These three registers define part of the calling convention + // between compiled code and the interpreter. + + // Inline Cache Register or methodOop for I2C. + inline_cache_reg(R12); + + // Method Oop Register when calling interpreter. + interpreter_method_oop_reg(R12); + + // Number of stack slots consumed by locking an object + sync_stack_slots(2); + + // Compiled code's Frame Pointer + frame_pointer(R31); + + // Interpreter stores its frame pointer in a register which is + // stored to the stack by I2CAdaptors. + // I2CAdaptors convert from interpreted java to compiled java. + interpreter_frame_pointer(R29); + + // Stack alignment requirement + stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) + + // Number of stack slots between incoming argument block and the start of + // a new frame. The PROLOG must add this many slots to the stack. The + // EPILOG must remove this many slots. aarch64 needs two slots for + // return address and fp. + // TODO think this is correct but check + in_preserve_stack_slots(4); + + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt); + + // The after-PROLOG location of the return address. Location of + // return address specifies a type (REG or STACK) and a number + // representing the register number (i.e. - use a register name) or + // stack slot. + // Ret Addr is on stack in slot 0 if no locks or verification or alignment. + // Otherwise, it is above the locks and verification slot and alignment word + // TODO this may well be correct but need to check why that - 2 is there + // ppc port uses 0 but we definitely need to allow for fixed_slots + // which folds in the space used for monitors + return_addr(STACK - 2 + + round_to((Compile::current()->in_preserve_stack_slots() + + Compile::current()->fixed_slots()), + stack_alignment_in_slots())); + + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + + calling_convention + %{ + // No difference between ingoing/outgoing just pass false + SharedRuntime::java_calling_convention(sig_bt, regs, length, false); + %} + + c_calling_convention + %{ + // This is obviously always outgoing + (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length); + %} + + // Location of compiled Java return values. Same as C for now. + return_value + %{ + // TODO do we allow ideal_reg == Op_RegN??? + assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, + "only return normal values"); + + static const int lo[Op_RegL + 1] = { // enum name + 0, // Op_Node + 0, // Op_Set + R0_num, // Op_RegN + R0_num, // Op_RegI + R0_num, // Op_RegP + V0_num, // Op_RegF + V0_num, // Op_RegD + R0_num // Op_RegL + }; + + static const int hi[Op_RegL + 1] = { // enum name + 0, // Op_Node + 0, // Op_Set + OptoReg::Bad, // Op_RegN + OptoReg::Bad, // Op_RegI + R0_H_num, // Op_RegP + OptoReg::Bad, // Op_RegF + V0_H_num, // Op_RegD + R0_H_num // Op_RegL + }; + + return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); + %} +%} + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(1); // Required cost attribute + +//----------Instruction Attributes--------------------------------------------- +ins_attrib ins_cost(INSN_COST); // Required cost attribute +ins_attrib ins_size(32); // Required size attribute (in bits) +ins_attrib ins_short_branch(0); // Required flag: is this instruction + // a non-matching short branch variant + // of some long branch? +ins_attrib ins_alignment(4); // Required alignment attribute (must + // be a power of 2) specifies the + // alignment that some part of the + // instruction (not necessarily the + // start) requires. If > 1, a + // compute_padding() function must be + // provided for the instruction + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +//----------Simple Operands---------------------------------------------------- + +// Integer operands 32 bit +// 32 bit immediate +operand immI() +%{ + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 32 bit zero +operand immI0() +%{ + predicate(n->get_int() == 0); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 32 bit unit increment +operand immI_1() +%{ + predicate(n->get_int() == 1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 32 bit unit decrement +operand immI_M1() +%{ + predicate(n->get_int() == -1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_le_4() +%{ + predicate(n->get_int() <= 4); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_31() +%{ + predicate(n->get_int() == 31); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_8() +%{ + predicate(n->get_int() == 8); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_16() +%{ + predicate(n->get_int() == 16); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_24() +%{ + predicate(n->get_int() == 24); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_32() +%{ + predicate(n->get_int() == 32); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_48() +%{ + predicate(n->get_int() == 48); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_56() +%{ + predicate(n->get_int() == 56); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_64() +%{ + predicate(n->get_int() == 64); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_255() +%{ + predicate(n->get_int() == 255); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_65535() +%{ + predicate(n->get_int() == 65535); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immL_63() +%{ + predicate(n->get_int() == 63); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immL_255() +%{ + predicate(n->get_int() == 255); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immL_65535() +%{ + predicate(n->get_long() == 65535L); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immL_4294967295() +%{ + predicate(n->get_long() == 4294967295L); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immL_bitmask() +%{ + predicate((n->get_long() != 0) + && ((n->get_long() & 0xc000000000000000l) == 0) + && is_power_of_2(n->get_long() + 1)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_bitmask() +%{ + predicate((n->get_int() != 0) + && ((n->get_int() & 0xc0000000) == 0) + && is_power_of_2(n->get_int() + 1)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Scale values for scaled offset addressing modes (up to long but not quad) +operand immIScale() +%{ + predicate(0 <= n->get_int() && (n->get_int() <= 3)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 26 bit signed offset -- for pc-relative branches +operand immI26() +%{ + predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25))); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 19 bit signed offset -- for pc-relative loads +operand immI19() +%{ + predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18))); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 12 bit unsigned offset -- for base plus immediate loads +operand immIU12() +%{ + predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12))); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immLU12() +%{ + predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12))); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Offset for scaled or unscaled immediate loads and stores +operand immIOffset() +%{ + predicate(Address::offset_ok_for_immed(n->get_int())); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immIOffset4() +%{ + predicate(Address::offset_ok_for_immed(n->get_int(), 2)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immIOffset8() +%{ + predicate(Address::offset_ok_for_immed(n->get_int(), 3)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immIOffset16() +%{ + predicate(Address::offset_ok_for_immed(n->get_int(), 4)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immLoffset() +%{ + predicate(Address::offset_ok_for_immed(n->get_long())); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immLoffset4() +%{ + predicate(Address::offset_ok_for_immed(n->get_long(), 2)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immLoffset8() +%{ + predicate(Address::offset_ok_for_immed(n->get_long(), 3)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immLoffset16() +%{ + predicate(Address::offset_ok_for_immed(n->get_long(), 4)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 32 bit integer valid for add sub immediate +operand immIAddSub() +%{ + predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int())); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 32 bit unsigned integer valid for logical immediate +// TODO -- check this is right when e.g the mask is 0x80000000 +operand immILog() +%{ + predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int())); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer operands 64 bit +// 64 bit immediate +operand immL() +%{ + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 64 bit zero +operand immL0() +%{ + predicate(n->get_long() == 0); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 64 bit unit increment +operand immL_1() +%{ + predicate(n->get_long() == 1); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 64 bit unit decrement +operand immL_M1() +%{ + predicate(n->get_long() == -1); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 32 bit offset of pc in thread anchor + +operand immL_pc_off() +%{ + predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) + + in_bytes(JavaFrameAnchor::last_Java_pc_offset())); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 64 bit integer valid for add sub immediate +operand immLAddSub() +%{ + predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long())); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// 64 bit integer valid for logical immediate +operand immLLog() +%{ + predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long())); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: low 32-bit mask +operand immL_32bits() +%{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer operands +// Pointer Immediate +operand immP() +%{ + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immP0() +%{ + predicate(n->get_ptr() == 0); + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate One +// this is used in object initialization (initial object header) +operand immP_1() +%{ + predicate(n->get_ptr() == 1); + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Polling Page Pointer Immediate +operand immPollPage() +%{ + predicate((address)n->get_ptr() == os::get_polling_page()); + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Card Table Byte Map Base +operand immByteMapBase() +%{ + // Get base of card map + predicate((jbyte*)n->get_ptr() == + ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base); + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate Minus One +// this is used when we want to write the current PC to the thread anchor +operand immP_M1() +%{ + predicate(n->get_ptr() == -1); + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate Minus Two +// this is used when we want to write the current PC to the thread anchor +operand immP_M2() +%{ + predicate(n->get_ptr() == -2); + match(ConP); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Float and Double operands +// Double Immediate +operand immD() +%{ + match(ConD); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// constant 'double +0.0'. +operand immD0() +%{ + predicate((n->getd() == 0) && + (fpclassify(n->getd()) == FP_ZERO) && (signbit(n->getd()) == 0)); + match(ConD); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// constant 'double +0.0'. +operand immDPacked() +%{ + predicate(Assembler::operand_valid_for_float_immediate(n->getd())); + match(ConD); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate +operand immF() +%{ + match(ConF); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// constant 'float +0.0'. +operand immF0() +%{ + predicate((n->getf() == 0) && + (fpclassify(n->getf()) == FP_ZERO) && (signbit(n->getf()) == 0)); + match(ConF); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// +operand immFPacked() +%{ + predicate(Assembler::operand_valid_for_float_immediate((double)n->getf())); + match(ConF); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Narrow pointer operands +// Narrow Pointer Immediate +operand immN() +%{ + match(ConN); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Narrow NULL Pointer Immediate +operand immN0() +%{ + predicate(n->get_narrowcon() == 0); + match(ConN); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immNKlass() +%{ + match(ConNKlass); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer 32 bit Register Operands +// Integer 32 bitRegister (excludes SP) +operand iRegI() +%{ + constraint(ALLOC_IN_RC(any_reg32)); + match(RegI); + match(iRegINoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Integer 32 bit Register not Special +operand iRegINoSp() +%{ + constraint(ALLOC_IN_RC(no_special_reg32)); + match(RegI); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Integer 64 bit Register Operands +// Integer 64 bit Register (includes SP) +operand iRegL() +%{ + constraint(ALLOC_IN_RC(any_reg)); + match(RegL); + match(iRegLNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Integer 64 bit Register not Special +operand iRegLNoSp() +%{ + constraint(ALLOC_IN_RC(no_special_reg)); + match(RegL); + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register Operands +// Pointer Register +operand iRegP() +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(RegP); + match(iRegPNoSp); + match(iRegP_R0); + //match(iRegP_R2); + //match(iRegP_R4); + //match(iRegP_R5); + match(thread_RegP); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register not Special +operand iRegPNoSp() +%{ + constraint(ALLOC_IN_RC(no_special_ptr_reg)); + match(RegP); + // match(iRegP); + // match(iRegP_R0); + // match(iRegP_R2); + // match(iRegP_R4); + // match(iRegP_R5); + // match(thread_RegP); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register R0 only +operand iRegP_R0() +%{ + constraint(ALLOC_IN_RC(r0_reg)); + match(RegP); + // match(iRegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register R1 only +operand iRegP_R1() +%{ + constraint(ALLOC_IN_RC(r1_reg)); + match(RegP); + // match(iRegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register R2 only +operand iRegP_R2() +%{ + constraint(ALLOC_IN_RC(r2_reg)); + match(RegP); + // match(iRegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register R3 only +operand iRegP_R3() +%{ + constraint(ALLOC_IN_RC(r3_reg)); + match(RegP); + // match(iRegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register R4 only +operand iRegP_R4() +%{ + constraint(ALLOC_IN_RC(r4_reg)); + match(RegP); + // match(iRegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register R5 only +operand iRegP_R5() +%{ + constraint(ALLOC_IN_RC(r5_reg)); + match(RegP); + // match(iRegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register R10 only +operand iRegP_R10() +%{ + constraint(ALLOC_IN_RC(r10_reg)); + match(RegP); + // match(iRegP); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Long 64 bit Register R11 only +operand iRegL_R11() +%{ + constraint(ALLOC_IN_RC(r11_reg)); + match(RegL); + match(iRegLNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Pointer 64 bit Register FP only +operand iRegP_FP() +%{ + constraint(ALLOC_IN_RC(fp_reg)); + match(RegP); + // match(iRegP); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Register R0 only +operand iRegI_R0() +%{ + constraint(ALLOC_IN_RC(int_r0_reg)); + match(RegI); + match(iRegINoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Register R2 only +operand iRegI_R2() +%{ + constraint(ALLOC_IN_RC(int_r2_reg)); + match(RegI); + match(iRegINoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Register R3 only +operand iRegI_R3() +%{ + constraint(ALLOC_IN_RC(int_r3_reg)); + match(RegI); + match(iRegINoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + + +// Register R2 only +operand iRegI_R4() +%{ + constraint(ALLOC_IN_RC(int_r4_reg)); + match(RegI); + match(iRegINoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + + +// Pointer Register Operands +// Narrow Pointer Register +operand iRegN() +%{ + constraint(ALLOC_IN_RC(any_reg32)); + match(RegN); + match(iRegNNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Integer 64 bit Register not Special +operand iRegNNoSp() +%{ + constraint(ALLOC_IN_RC(no_special_reg32)); + match(RegN); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// heap base register -- used for encoding immN0 + +operand iRegIHeapbase() +%{ + constraint(ALLOC_IN_RC(heapbase_reg)); + match(RegI); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Float Register +// Float register operands +operand vRegF() +%{ + constraint(ALLOC_IN_RC(float_reg)); + match(RegF); + + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Double Register +// Double register operands +operand vRegD() +%{ + constraint(ALLOC_IN_RC(double_reg)); + match(RegD); + + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vecD() +%{ + constraint(ALLOC_IN_RC(vectord_reg)); + match(VecD); + + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vecX() +%{ + constraint(ALLOC_IN_RC(vectorx_reg)); + match(VecX); + + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vRegD_V0() +%{ + constraint(ALLOC_IN_RC(v0_reg)); + match(RegD); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vRegD_V1() +%{ + constraint(ALLOC_IN_RC(v1_reg)); + match(RegD); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vRegD_V2() +%{ + constraint(ALLOC_IN_RC(v2_reg)); + match(RegD); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand vRegD_V3() +%{ + constraint(ALLOC_IN_RC(v3_reg)); + match(RegD); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Flags register, used as output of signed compare instructions + +// note that on AArch64 we also use this register as the output for +// for floating point compare instructions (CmpF CmpD). this ensures +// that ordered inequality tests use GT, GE, LT or LE none of which +// pass through cases where the result is unordered i.e. one or both +// inputs to the compare is a NaN. this means that the ideal code can +// replace e.g. a GT with an LE and not end up capturing the NaN case +// (where the comparison should always fail). EQ and NE tests are +// always generated in ideal code so that unordered folds into the NE +// case, matching the behaviour of AArch64 NE. +// +// This differs from x86 where the outputs of FP compares use a +// special FP flags registers and where compares based on this +// register are distinguished into ordered inequalities (cmpOpUCF) and +// EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests +// to explicitly handle the unordered case in branches. x86 also has +// to include extra CMoveX rules to accept a cmpOpUCF input. + +operand rFlagsReg() +%{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + op_cost(0); + format %{ "RFLAGS" %} + interface(REG_INTER); +%} + +// Flags register, used as output of unsigned compare instructions +operand rFlagsRegU() +%{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + op_cost(0); + format %{ "RFLAGSU" %} + interface(REG_INTER); +%} + +// Special Registers + +// Method Register +operand inline_cache_RegP(iRegP reg) +%{ + constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg + match(reg); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand interpreter_method_oop_RegP(iRegP reg) +%{ + constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg + match(reg); + match(iRegPNoSp); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +// Thread Register +operand thread_RegP(iRegP reg) +%{ + constraint(ALLOC_IN_RC(thread_reg)); // link_reg + match(reg); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +operand lr_RegP(iRegP reg) +%{ + constraint(ALLOC_IN_RC(lr_reg)); // link_reg + match(reg); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + +//----------Memory Operands---------------------------------------------------- + +operand indirect(iRegP reg) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(reg); + op_cost(0); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp(0x0); + %} +%} + +operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP reg (LShiftL lreg scale)) off); + op_cost(INSN_COST); + format %{ "$reg, $lreg lsl($scale), $off" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale($scale); + disp($off); + %} +%} + +operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP reg (LShiftL lreg scale)) off); + op_cost(INSN_COST); + format %{ "$reg, $lreg lsl($scale), $off" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale($scale); + disp($off); + %} +%} + +operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP reg (ConvI2L ireg)) off); + op_cost(INSN_COST); + format %{ "$reg, $ireg, $off I2L" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale(0x0); + disp($off); + %} +%} + +operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off); + op_cost(INSN_COST); + format %{ "$reg, $ireg sxtw($scale), $off I2L" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale($scale); + disp($off); + %} +%} + +operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg (LShiftL (ConvI2L ireg) scale)); + op_cost(0); + format %{ "$reg, $ireg sxtw($scale), 0, I2L" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale($scale); + disp(0x0); + %} +%} + +operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg (LShiftL lreg scale)); + op_cost(0); + format %{ "$reg, $lreg lsl($scale)" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale($scale); + disp(0x0); + %} +%} + +operand indIndex(iRegP reg, iRegL lreg) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg lreg); + op_cost(0); + format %{ "$reg, $lreg" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale(0x0); + disp(0x0); + %} +%} + +operand indOffI(iRegP reg, immIOffset off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffI4(iRegP reg, immIOffset4 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffI8(iRegP reg, immIOffset8 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffI16(iRegP reg, immIOffset16 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffL(iRegP reg, immLoffset off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffL4(iRegP reg, immLoffset4 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffL8(iRegP reg, immLoffset8 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffL16(iRegP reg, immLoffset16 off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indirectN(iRegN reg) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(DecodeN reg); + op_cost(0); + format %{ "[$reg]\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp(0x0); + %} +%} + +operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off); + op_cost(0); + format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale($scale); + disp($off); + %} +%} + +operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off); + op_cost(INSN_COST); + format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale($scale); + disp($off); + %} +%} + +operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off); + op_cost(INSN_COST); + format %{ "$reg, $ireg, $off I2L\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale(0x0); + disp($off); + %} +%} + +operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off); + op_cost(INSN_COST); + format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale($scale); + disp($off); + %} +%} + +operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)); + op_cost(0); + format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale($scale); + disp(0x0); + %} +%} + +operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) (LShiftL lreg scale)); + op_cost(0); + format %{ "$reg, $lreg lsl($scale)\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale($scale); + disp(0x0); + %} +%} + +operand indIndexN(iRegN reg, iRegL lreg) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) lreg); + op_cost(0); + format %{ "$reg, $lreg\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale(0x0); + disp(0x0); + %} +%} + +operand indOffIN(iRegN reg, immIOffset off) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) off); + op_cost(0); + format %{ "[$reg, $off]\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +operand indOffLN(iRegN reg, immLoffset off) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) off); + op_cost(0); + format %{ "[$reg, $off]\t# narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + + + +// AArch64 opto stubs need to write to the pc slot in the thread anchor +operand thread_anchor_pc(thread_RegP reg, immL_pc_off off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg off); + op_cost(0); + format %{ "[$reg, $off]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + +//----------Special Memory Operands-------------------------------------------- +// Stack Slot Operand - This operand is used for loading and storing temporary +// values on the stack where a match requires a value to +// flow through memory. +operand stackSlotP(sRegP reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(100); + // No match rule because this operand is only generated in matching + // match(RegP); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1e); // RSP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotI(sRegI reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + // match(RegI); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1e); // RSP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotF(sRegF reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + // match(RegF); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1e); // RSP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotD(sRegD reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + // match(RegD); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1e); // RSP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotL(sRegL reg) +%{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + // match(RegL); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1e); // RSP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +// Operands for expressing Control Flow +// NOTE: Label is a predefined operand which should not be redefined in +// the AD file. It is generically handled within the ADLC. + +//----------Conditional Branch Operands---------------------------------------- +// Comparison Op - This is the operation of the comparison, and is limited to +// the following set of codes: +// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) +// +// Other attributes of the comparison, such as unsignedness, are specified +// by the comparison instruction that sets a condition code flags register. +// That result is represented by a flags operand whose subtype is appropriate +// to the unsignedness (etc.) of the comparison. +// +// Later, the instruction which matches both the Comparison Op (a Bool) and +// the flags (produced by the Cmp) specifies the coding of the comparison op +// by matching a specific subtype of Bool operand below, such as cmpOpU. + +// used for signed integral comparisons and fp comparisons + +operand cmpOp() +%{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x0, "eq"); + not_equal(0x1, "ne"); + less(0xb, "lt"); + greater_equal(0xa, "ge"); + less_equal(0xd, "le"); + greater(0xc, "gt"); + overflow(0x6, "vs"); + no_overflow(0x7, "vc"); + %} +%} + +// used for unsigned integral comparisons + +operand cmpOpU() +%{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x0, "eq"); + not_equal(0x1, "ne"); + less(0x3, "lo"); + greater_equal(0x2, "hs"); + less_equal(0x9, "ls"); + greater(0x8, "hi"); + overflow(0x6, "vs"); + no_overflow(0x7, "vc"); + %} +%} + +// Special operand allowing long args to int ops to be truncated for free + +operand iRegL2I(iRegL reg) %{ + + op_cost(0); + + match(ConvL2I reg); + + format %{ "l2i($reg)" %} + + interface(REG_INTER) +%} + +opclass vmem4(indirect, indIndex, indOffI4, indOffL4); +opclass vmem8(indirect, indIndex, indOffI8, indOffL8); +opclass vmem16(indirect, indIndex, indOffI16, indOffL16); + +//----------OPERAND CLASSES---------------------------------------------------- +// Operand Classes are groups of operands that are used as to simplify +// instruction definitions by not requiring the AD writer to specify +// separate instructions for every form of operand when the +// instruction accepts multiple operand types with the same basic +// encoding and format. The classic case of this is memory operands. + +// memory is used to define read/write location for load/store +// instruction defs. we can turn a memory op into an Address + +opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL, + indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN); + + // iRegIorL2I is used for src inputs in rules for 32 bit int (I) + + +// iRegIorL2I is used for src inputs in rules for 32 bit int (I) +// operations. it allows the src to be either an iRegI or a (ConvL2I +// iRegL). in the latter case the l2i normally planted for a ConvL2I +// can be elided because the 32-bit instruction will just employ the +// lower 32 bits anyway. +// +// n.b. this does not elide all L2I conversions. if the truncated +// value is consumed by more than one operation then the ConvL2I +// cannot be bundled into the consuming nodes so an l2i gets planted +// (actually a movw $dst $src) and the downstream instructions consume +// the result of the l2i as an iRegI input. That's a shame since the +// movw is actually redundant but its not too costly. + +opclass iRegIorL2I(iRegI, iRegL2I); + +//----------PIPELINE----------------------------------------------------------- +// Rules which define the behavior of the target architectures pipeline. + +// For specific pipelines, eg A53, define the stages of that pipeline +//pipe_desc(ISS, EX1, EX2, WR); +#define ISS S0 +#define EX1 S1 +#define EX2 S2 +#define WR S3 + +// Integer ALU reg operation +pipeline %{ + +attributes %{ + // ARM instructions are of fixed length + fixed_size_instructions; // Fixed size instructions TODO does + max_instructions_per_bundle = 2; // A53 = 2, A57 = 4 + // ARM instructions come in 32-bit word units + instruction_unit_size = 4; // An instruction is 4 bytes long + instruction_fetch_unit_size = 64; // The processor fetches one line + instruction_fetch_units = 1; // of 64 bytes + + // List of nop instructions + nops( MachNop ); +%} + +// We don't use an actual pipeline model so don't care about resources +// or description. we do use pipeline classes to introduce fixed +// latencies + +//----------RESOURCES---------------------------------------------------------- +// Resources are the functional units available to the machine + +resources( INS0, INS1, INS01 = INS0 | INS1, + ALU0, ALU1, ALU = ALU0 | ALU1, + MAC, + DIV, + BRANCH, + LDST, + NEON_FP); + +//----------PIPELINE DESCRIPTION----------------------------------------------- +// Pipeline Description specifies the stages in the machine's pipeline + +// Define the pipeline as a generic 6 stage pipeline +pipe_desc(S0, S1, S2, S3, S4, S5); + +//----------PIPELINE CLASSES--------------------------------------------------- +// Pipeline Classes describe the stages in which input and output are +// referenced by the hardware pipeline. + +pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2) +%{ + single_instruction; + src1 : S1(read); + src2 : S2(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2) +%{ + single_instruction; + src1 : S1(read); + src2 : S2(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_uop_s(vRegF dst, vRegF src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_uop_d(vRegD dst, vRegD src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_d2f(vRegF dst, vRegD src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_f2d(vRegD dst, vRegF src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_f2i(iRegINoSp dst, vRegF src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_f2l(iRegLNoSp dst, vRegF src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_i2f(vRegF dst, iRegIorL2I src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_l2f(vRegF dst, iRegL src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_d2i(iRegINoSp dst, vRegD src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_d2l(iRegLNoSp dst, vRegD src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_i2d(vRegD dst, iRegIorL2I src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_l2d(vRegD dst, iRegIorL2I src) +%{ + single_instruction; + src : S1(read); + dst : S5(write); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2) +%{ + single_instruction; + src1 : S1(read); + src2 : S2(read); + dst : S5(write); + INS0 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2) +%{ + single_instruction; + src1 : S1(read); + src2 : S2(read); + dst : S5(write); + INS0 : ISS; + NEON_FP : S5; +%} + +pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr) +%{ + single_instruction; + cr : S1(read); + src1 : S1(read); + src2 : S1(read); + dst : S3(write); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr) +%{ + single_instruction; + cr : S1(read); + src1 : S1(read); + src2 : S1(read); + dst : S3(write); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class fp_imm_s(vRegF dst) +%{ + single_instruction; + dst : S3(write); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class fp_imm_d(vRegD dst) +%{ + single_instruction; + dst : S3(write); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class fp_load_constant_s(vRegF dst) +%{ + single_instruction; + dst : S4(write); + INS01 : ISS; + NEON_FP : S4; +%} + +pipe_class fp_load_constant_d(vRegD dst) +%{ + single_instruction; + dst : S4(write); + INS01 : ISS; + NEON_FP : S4; +%} + +pipe_class vmul64(vecD dst, vecD src1, vecD src2) +%{ + single_instruction; + dst : S5(write); + src1 : S1(read); + src2 : S1(read); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class vmul128(vecX dst, vecX src1, vecX src2) +%{ + single_instruction; + dst : S5(write); + src1 : S1(read); + src2 : S1(read); + INS0 : ISS; + NEON_FP : S5; +%} + +pipe_class vmla64(vecD dst, vecD src1, vecD src2) +%{ + single_instruction; + dst : S5(write); + src1 : S1(read); + src2 : S1(read); + dst : S1(read); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class vmla128(vecX dst, vecX src1, vecX src2) +%{ + single_instruction; + dst : S5(write); + src1 : S1(read); + src2 : S1(read); + dst : S1(read); + INS0 : ISS; + NEON_FP : S5; +%} + +pipe_class vdop64(vecD dst, vecD src1, vecD src2) +%{ + single_instruction; + dst : S4(write); + src1 : S2(read); + src2 : S2(read); + INS01 : ISS; + NEON_FP : S4; +%} + +pipe_class vdop128(vecX dst, vecX src1, vecX src2) +%{ + single_instruction; + dst : S4(write); + src1 : S2(read); + src2 : S2(read); + INS0 : ISS; + NEON_FP : S4; +%} + +pipe_class vlogical64(vecD dst, vecD src1, vecD src2) +%{ + single_instruction; + dst : S3(write); + src1 : S2(read); + src2 : S2(read); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class vlogical128(vecX dst, vecX src1, vecX src2) +%{ + single_instruction; + dst : S3(write); + src1 : S2(read); + src2 : S2(read); + INS0 : ISS; + NEON_FP : S3; +%} + +pipe_class vshift64(vecD dst, vecD src, vecX shift) +%{ + single_instruction; + dst : S3(write); + src : S1(read); + shift : S1(read); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class vshift128(vecX dst, vecX src, vecX shift) +%{ + single_instruction; + dst : S3(write); + src : S1(read); + shift : S1(read); + INS0 : ISS; + NEON_FP : S3; +%} + +pipe_class vshift64_imm(vecD dst, vecD src, immI shift) +%{ + single_instruction; + dst : S3(write); + src : S1(read); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class vshift128_imm(vecX dst, vecX src, immI shift) +%{ + single_instruction; + dst : S3(write); + src : S1(read); + INS0 : ISS; + NEON_FP : S3; +%} + +pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2) +%{ + single_instruction; + dst : S5(write); + src1 : S1(read); + src2 : S1(read); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2) +%{ + single_instruction; + dst : S5(write); + src1 : S1(read); + src2 : S1(read); + INS0 : ISS; + NEON_FP : S5; +%} + +pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2) +%{ + single_instruction; + dst : S5(write); + src1 : S1(read); + src2 : S1(read); + INS0 : ISS; + NEON_FP : S5; +%} + +pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2) +%{ + single_instruction; + dst : S5(write); + src1 : S1(read); + src2 : S1(read); + INS0 : ISS; + NEON_FP : S5; +%} + +pipe_class vsqrt_fp128(vecX dst, vecX src) +%{ + single_instruction; + dst : S5(write); + src : S1(read); + INS0 : ISS; + NEON_FP : S5; +%} + +pipe_class vunop_fp64(vecD dst, vecD src) +%{ + single_instruction; + dst : S5(write); + src : S1(read); + INS01 : ISS; + NEON_FP : S5; +%} + +pipe_class vunop_fp128(vecX dst, vecX src) +%{ + single_instruction; + dst : S5(write); + src : S1(read); + INS0 : ISS; + NEON_FP : S5; +%} + +pipe_class vdup_reg_reg64(vecD dst, iRegI src) +%{ + single_instruction; + dst : S3(write); + src : S1(read); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class vdup_reg_reg128(vecX dst, iRegI src) +%{ + single_instruction; + dst : S3(write); + src : S1(read); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class vdup_reg_freg64(vecD dst, vRegF src) +%{ + single_instruction; + dst : S3(write); + src : S1(read); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class vdup_reg_freg128(vecX dst, vRegF src) +%{ + single_instruction; + dst : S3(write); + src : S1(read); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class vdup_reg_dreg128(vecX dst, vRegD src) +%{ + single_instruction; + dst : S3(write); + src : S1(read); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class vmovi_reg_imm64(vecD dst) +%{ + single_instruction; + dst : S3(write); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class vmovi_reg_imm128(vecX dst) +%{ + single_instruction; + dst : S3(write); + INS0 : ISS; + NEON_FP : S3; +%} + +pipe_class vload_reg_mem64(vecD dst, vmem8 mem) +%{ + single_instruction; + dst : S5(write); + mem : ISS(read); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class vload_reg_mem128(vecX dst, vmem16 mem) +%{ + single_instruction; + dst : S5(write); + mem : ISS(read); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class vstore_reg_mem64(vecD src, vmem8 mem) +%{ + single_instruction; + mem : ISS(read); + src : S2(read); + INS01 : ISS; + NEON_FP : S3; +%} + +pipe_class vstore_reg_mem128(vecD src, vmem16 mem) +%{ + single_instruction; + mem : ISS(read); + src : S2(read); + INS01 : ISS; + NEON_FP : S3; +%} + +//------- Integer ALU operations -------------------------- + +// Integer ALU reg-reg operation +// Operands needed in EX1, result generated in EX2 +// Eg. ADD x0, x1, x2 +pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : EX2(write); + src1 : EX1(read); + src2 : EX1(read); + INS01 : ISS; // Dual issue as instruction 0 or 1 + ALU : EX2; +%} + +// Integer ALU reg-reg operation with constant shift +// Shifted register must be available in LATE_ISS instead of EX1 +// Eg. ADD x0, x1, x2, LSL #2 +pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift) +%{ + single_instruction; + dst : EX2(write); + src1 : EX1(read); + src2 : ISS(read); + INS01 : ISS; + ALU : EX2; +%} + +// Integer ALU reg operation with constant shift +// Eg. LSL x0, x1, #shift +pipe_class ialu_reg_shift(iRegI dst, iRegI src1) +%{ + single_instruction; + dst : EX2(write); + src1 : ISS(read); + INS01 : ISS; + ALU : EX2; +%} + +// Integer ALU reg-reg operation with variable shift +// Both operands must be available in LATE_ISS instead of EX1 +// Result is available in EX1 instead of EX2 +// Eg. LSLV x0, x1, x2 +pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : EX1(write); + src1 : ISS(read); + src2 : ISS(read); + INS01 : ISS; + ALU : EX1; +%} + +// Integer ALU reg-reg operation with extract +// As for _vshift above, but result generated in EX2 +// Eg. EXTR x0, x1, x2, #N +pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : EX2(write); + src1 : ISS(read); + src2 : ISS(read); + INS1 : ISS; // Can only dual issue as Instruction 1 + ALU : EX1; +%} + +// Integer ALU reg operation +// Eg. NEG x0, x1 +pipe_class ialu_reg(iRegI dst, iRegI src) +%{ + single_instruction; + dst : EX2(write); + src : EX1(read); + INS01 : ISS; + ALU : EX2; +%} + +// Integer ALU reg mmediate operation +// Eg. ADD x0, x1, #N +pipe_class ialu_reg_imm(iRegI dst, iRegI src1) +%{ + single_instruction; + dst : EX2(write); + src1 : EX1(read); + INS01 : ISS; + ALU : EX2; +%} + +// Integer ALU immediate operation (no source operands) +// Eg. MOV x0, #N +pipe_class ialu_imm(iRegI dst) +%{ + single_instruction; + dst : EX1(write); + INS01 : ISS; + ALU : EX1; +%} + +//------- Compare operation ------------------------------- + +// Compare reg-reg +// Eg. CMP x0, x1 +pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2) +%{ + single_instruction; +// fixed_latency(16); + cr : EX2(write); + op1 : EX1(read); + op2 : EX1(read); + INS01 : ISS; + ALU : EX2; +%} + +// Compare reg-reg +// Eg. CMP x0, #N +pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1) +%{ + single_instruction; +// fixed_latency(16); + cr : EX2(write); + op1 : EX1(read); + INS01 : ISS; + ALU : EX2; +%} + +//------- Conditional instructions ------------------------ + +// Conditional no operands +// Eg. CSINC x0, zr, zr, +pipe_class icond_none(iRegI dst, rFlagsReg cr) +%{ + single_instruction; + cr : EX1(read); + dst : EX2(write); + INS01 : ISS; + ALU : EX2; +%} + +// Conditional 2 operand +// EG. CSEL X0, X1, X2, +pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr) +%{ + single_instruction; + cr : EX1(read); + src1 : EX1(read); + src2 : EX1(read); + dst : EX2(write); + INS01 : ISS; + ALU : EX2; +%} + +// Conditional 2 operand +// EG. CSEL X0, X1, X2, +pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr) +%{ + single_instruction; + cr : EX1(read); + src : EX1(read); + dst : EX2(write); + INS01 : ISS; + ALU : EX2; +%} + +//------- Multiply pipeline operations -------------------- + +// Multiply reg-reg +// Eg. MUL w0, w1, w2 +pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + INS01 : ISS; + MAC : WR; +%} + +// Multiply accumulate +// Eg. MADD w0, w1, w2, w3 +pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) +%{ + single_instruction; + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + src3 : ISS(read); + INS01 : ISS; + MAC : WR; +%} + +// Eg. MUL w0, w1, w2 +pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + fixed_latency(3); // Maximum latency for 64 bit mul + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + INS01 : ISS; + MAC : WR; +%} + +// Multiply accumulate +// Eg. MADD w0, w1, w2, w3 +pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) +%{ + single_instruction; + fixed_latency(3); // Maximum latency for 64 bit mul + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + src3 : ISS(read); + INS01 : ISS; + MAC : WR; +%} + +//------- Divide pipeline operations -------------------- + +// Eg. SDIV w0, w1, w2 +pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + fixed_latency(8); // Maximum latency for 32 bit divide + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + INS0 : ISS; // Can only dual issue as instruction 0 + DIV : WR; +%} + +// Eg. SDIV x0, x1, x2 +pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ + single_instruction; + fixed_latency(16); // Maximum latency for 64 bit divide + dst : WR(write); + src1 : ISS(read); + src2 : ISS(read); + INS0 : ISS; // Can only dual issue as instruction 0 + DIV : WR; +%} + +//------- Load pipeline operations ------------------------ + +// Load - prefetch +// Eg. PFRM +pipe_class iload_prefetch(memory mem) +%{ + single_instruction; + mem : ISS(read); + INS01 : ISS; + LDST : WR; +%} + +// Load - reg, mem +// Eg. LDR x0, +pipe_class iload_reg_mem(iRegI dst, memory mem) +%{ + single_instruction; + dst : WR(write); + mem : ISS(read); + INS01 : ISS; + LDST : WR; +%} + +// Load - reg, reg +// Eg. LDR x0, [sp, x1] +pipe_class iload_reg_reg(iRegI dst, iRegI src) +%{ + single_instruction; + dst : WR(write); + src : ISS(read); + INS01 : ISS; + LDST : WR; +%} + +//------- Store pipeline operations ----------------------- + +// Store - zr, mem +// Eg. STR zr, +pipe_class istore_mem(memory mem) +%{ + single_instruction; + mem : ISS(read); + INS01 : ISS; + LDST : WR; +%} + +// Store - reg, mem +// Eg. STR x0, +pipe_class istore_reg_mem(iRegI src, memory mem) +%{ + single_instruction; + mem : ISS(read); + src : EX2(read); + INS01 : ISS; + LDST : WR; +%} + +// Store - reg, reg +// Eg. STR x0, [sp, x1] +pipe_class istore_reg_reg(iRegI dst, iRegI src) +%{ + single_instruction; + dst : ISS(read); + src : EX2(read); + INS01 : ISS; + LDST : WR; +%} + +//------- Store pipeline operations ----------------------- + +// Branch +pipe_class pipe_branch() +%{ + single_instruction; + INS01 : ISS; + BRANCH : EX1; +%} + +// Conditional branch +pipe_class pipe_branch_cond(rFlagsReg cr) +%{ + single_instruction; + cr : EX1(read); + INS01 : ISS; + BRANCH : EX1; +%} + +// Compare & Branch +// EG. CBZ/CBNZ +pipe_class pipe_cmp_branch(iRegI op1) +%{ + single_instruction; + op1 : EX1(read); + INS01 : ISS; + BRANCH : EX1; +%} + +//------- Synchronisation operations ---------------------- + +// Any operation requiring serialization. +// EG. DMB/Atomic Ops/Load Acquire/Str Release +pipe_class pipe_serial() +%{ + single_instruction; + force_serialization; + fixed_latency(16); + INS01 : ISS(2); // Cannot dual issue with any other instruction + LDST : WR; +%} + +// Generic big/slow expanded idiom - also serialized +pipe_class pipe_slow() +%{ + instruction_count(10); + multiple_bundles; + force_serialization; + fixed_latency(16); + INS01 : ISS(2); // Cannot dual issue with any other instruction + LDST : WR; +%} + +// Empty pipeline class +pipe_class pipe_class_empty() +%{ + single_instruction; + fixed_latency(0); +%} + +// Default pipeline class. +pipe_class pipe_class_default() +%{ + single_instruction; + fixed_latency(2); +%} + +// Pipeline class for compares. +pipe_class pipe_class_compare() +%{ + single_instruction; + fixed_latency(16); +%} + +// Pipeline class for memory operations. +pipe_class pipe_class_memory() +%{ + single_instruction; + fixed_latency(16); +%} + +// Pipeline class for call. +pipe_class pipe_class_call() +%{ + single_instruction; + fixed_latency(100); +%} + +// Define the class for the Nop node. +define %{ + MachNop = pipe_class_empty; +%} + +%} +//----------INSTRUCTIONS------------------------------------------------------- +// +// match -- States which machine-independent subtree may be replaced +// by this instruction. +// ins_cost -- The estimated cost of this instruction is used by instruction +// selection to identify a minimum cost tree of machine +// instructions that matches a tree of machine-independent +// instructions. +// format -- A string providing the disassembly for this instruction. +// The value of an instruction's operand may be inserted +// by referring to it with a '$' prefix. +// opcode -- Three instruction opcodes may be provided. These are referred +// to within an encode class as $primary, $secondary, and $tertiary +// rrspectively. The primary opcode is commonly used to +// indicate the type of machine instruction, while secondary +// and tertiary are often used for prefix options or addressing +// modes. +// ins_encode -- A list of encode classes with parameters. The encode class +// name must have been defined in an 'enc_class' specification +// in the encode section of the architecture description. + +// ============================================================================ +// Memory (Load/Store) Instructions + +// Load Instructions + +// Load Byte (8 bit signed) +instruct loadB(iRegINoSp dst, memory mem) +%{ + match(Set dst (LoadB mem)); + predicate(!needs_acquiring_load(n)); + + ins_cost(4 * INSN_COST); + format %{ "ldrsbw $dst, $mem\t# byte" %} + + ins_encode(aarch64_enc_ldrsbw(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Byte (8 bit signed) into long +instruct loadB2L(iRegLNoSp dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadB mem))); + predicate(!needs_acquiring_load(n->in(1))); + + ins_cost(4 * INSN_COST); + format %{ "ldrsb $dst, $mem\t# byte" %} + + ins_encode(aarch64_enc_ldrsb(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Byte (8 bit unsigned) +instruct loadUB(iRegINoSp dst, memory mem) +%{ + match(Set dst (LoadUB mem)); + predicate(!needs_acquiring_load(n)); + + ins_cost(4 * INSN_COST); + format %{ "ldrbw $dst, $mem\t# byte" %} + + ins_encode(aarch64_enc_ldrb(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Byte (8 bit unsigned) into long +instruct loadUB2L(iRegLNoSp dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadUB mem))); + predicate(!needs_acquiring_load(n->in(1))); + + ins_cost(4 * INSN_COST); + format %{ "ldrb $dst, $mem\t# byte" %} + + ins_encode(aarch64_enc_ldrb(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Short (16 bit signed) +instruct loadS(iRegINoSp dst, memory mem) +%{ + match(Set dst (LoadS mem)); + predicate(!needs_acquiring_load(n)); + + ins_cost(4 * INSN_COST); + format %{ "ldrshw $dst, $mem\t# short" %} + + ins_encode(aarch64_enc_ldrshw(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Short (16 bit signed) into long +instruct loadS2L(iRegLNoSp dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadS mem))); + predicate(!needs_acquiring_load(n->in(1))); + + ins_cost(4 * INSN_COST); + format %{ "ldrsh $dst, $mem\t# short" %} + + ins_encode(aarch64_enc_ldrsh(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Char (16 bit unsigned) +instruct loadUS(iRegINoSp dst, memory mem) +%{ + match(Set dst (LoadUS mem)); + predicate(!needs_acquiring_load(n)); + + ins_cost(4 * INSN_COST); + format %{ "ldrh $dst, $mem\t# short" %} + + ins_encode(aarch64_enc_ldrh(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Short/Char (16 bit unsigned) into long +instruct loadUS2L(iRegLNoSp dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadUS mem))); + predicate(!needs_acquiring_load(n->in(1))); + + ins_cost(4 * INSN_COST); + format %{ "ldrh $dst, $mem\t# short" %} + + ins_encode(aarch64_enc_ldrh(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Integer (32 bit signed) +instruct loadI(iRegINoSp dst, memory mem) +%{ + match(Set dst (LoadI mem)); + predicate(!needs_acquiring_load(n)); + + ins_cost(4 * INSN_COST); + format %{ "ldrw $dst, $mem\t# int" %} + + ins_encode(aarch64_enc_ldrw(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Integer (32 bit signed) into long +instruct loadI2L(iRegLNoSp dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadI mem))); + predicate(!needs_acquiring_load(n->in(1))); + + ins_cost(4 * INSN_COST); + format %{ "ldrsw $dst, $mem\t# int" %} + + ins_encode(aarch64_enc_ldrsw(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Integer (32 bit unsigned) into long +instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) +%{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load())); + + ins_cost(4 * INSN_COST); + format %{ "ldrw $dst, $mem\t# int" %} + + ins_encode(aarch64_enc_ldrw(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Long (64 bit signed) +instruct loadL(iRegLNoSp dst, memory mem) +%{ + match(Set dst (LoadL mem)); + predicate(!needs_acquiring_load(n)); + + ins_cost(4 * INSN_COST); + format %{ "ldr $dst, $mem\t# int" %} + + ins_encode(aarch64_enc_ldr(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Range +instruct loadRange(iRegINoSp dst, memory mem) +%{ + match(Set dst (LoadRange mem)); + + ins_cost(4 * INSN_COST); + format %{ "ldrw $dst, $mem\t# range" %} + + ins_encode(aarch64_enc_ldrw(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Pointer +instruct loadP(iRegPNoSp dst, memory mem) +%{ + match(Set dst (LoadP mem)); + predicate(!needs_acquiring_load(n)); + + ins_cost(4 * INSN_COST); + format %{ "ldr $dst, $mem\t# ptr" %} + + ins_encode(aarch64_enc_ldr(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Compressed Pointer +instruct loadN(iRegNNoSp dst, memory mem) +%{ + match(Set dst (LoadN mem)); + predicate(!needs_acquiring_load(n)); + + ins_cost(4 * INSN_COST); + format %{ "ldrw $dst, $mem\t# compressed ptr" %} + + ins_encode(aarch64_enc_ldrw(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Klass Pointer +instruct loadKlass(iRegPNoSp dst, memory mem) +%{ + match(Set dst (LoadKlass mem)); + predicate(!needs_acquiring_load(n)); + + ins_cost(4 * INSN_COST); + format %{ "ldr $dst, $mem\t# class" %} + + ins_encode(aarch64_enc_ldr(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Narrow Klass Pointer +instruct loadNKlass(iRegNNoSp dst, memory mem) +%{ + match(Set dst (LoadNKlass mem)); + predicate(!needs_acquiring_load(n)); + + ins_cost(4 * INSN_COST); + format %{ "ldrw $dst, $mem\t# compressed class ptr" %} + + ins_encode(aarch64_enc_ldrw(dst, mem)); + + ins_pipe(iload_reg_mem); +%} + +// Load Float +instruct loadF(vRegF dst, memory mem) +%{ + match(Set dst (LoadF mem)); + predicate(!needs_acquiring_load(n)); + + ins_cost(4 * INSN_COST); + format %{ "ldrs $dst, $mem\t# float" %} + + ins_encode( aarch64_enc_ldrs(dst, mem) ); + + ins_pipe(pipe_class_memory); +%} + +// Load Double +instruct loadD(vRegD dst, memory mem) +%{ + match(Set dst (LoadD mem)); + predicate(!needs_acquiring_load(n)); + + ins_cost(4 * INSN_COST); + format %{ "ldrd $dst, $mem\t# double" %} + + ins_encode( aarch64_enc_ldrd(dst, mem) ); + + ins_pipe(pipe_class_memory); +%} + + +// Load Int Constant +instruct loadConI(iRegINoSp dst, immI src) +%{ + match(Set dst src); + + ins_cost(INSN_COST); + format %{ "mov $dst, $src\t# int" %} + + ins_encode( aarch64_enc_movw_imm(dst, src) ); + + ins_pipe(ialu_imm); +%} + +// Load Long Constant +instruct loadConL(iRegLNoSp dst, immL src) +%{ + match(Set dst src); + + ins_cost(INSN_COST); + format %{ "mov $dst, $src\t# long" %} + + ins_encode( aarch64_enc_mov_imm(dst, src) ); + + ins_pipe(ialu_imm); +%} + +// Load Pointer Constant + +instruct loadConP(iRegPNoSp dst, immP con) +%{ + match(Set dst con); + + ins_cost(INSN_COST * 4); + format %{ + "mov $dst, $con\t# ptr\n\t" + %} + + ins_encode(aarch64_enc_mov_p(dst, con)); + + ins_pipe(ialu_imm); +%} + +// Load Null Pointer Constant + +instruct loadConP0(iRegPNoSp dst, immP0 con) +%{ + match(Set dst con); + + ins_cost(INSN_COST); + format %{ "mov $dst, $con\t# NULL ptr" %} + + ins_encode(aarch64_enc_mov_p0(dst, con)); + + ins_pipe(ialu_imm); +%} + +// Load Pointer Constant One + +instruct loadConP1(iRegPNoSp dst, immP_1 con) +%{ + match(Set dst con); + + ins_cost(INSN_COST); + format %{ "mov $dst, $con\t# NULL ptr" %} + + ins_encode(aarch64_enc_mov_p1(dst, con)); + + ins_pipe(ialu_imm); +%} + +// Load Poll Page Constant + +instruct loadConPollPage(iRegPNoSp dst, immPollPage con) +%{ + match(Set dst con); + + ins_cost(INSN_COST); + format %{ "adr $dst, $con\t# Poll Page Ptr" %} + + ins_encode(aarch64_enc_mov_poll_page(dst, con)); + + ins_pipe(ialu_imm); +%} + +// Load Byte Map Base Constant + +instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) +%{ + match(Set dst con); + + ins_cost(INSN_COST); + format %{ "adr $dst, $con\t# Byte Map Base" %} + + ins_encode(aarch64_enc_mov_byte_map_base(dst, con)); + + ins_pipe(ialu_imm); +%} + +// Load Narrow Pointer Constant + +instruct loadConN(iRegNNoSp dst, immN con) +%{ + match(Set dst con); + + ins_cost(INSN_COST * 4); + format %{ "mov $dst, $con\t# compressed ptr" %} + + ins_encode(aarch64_enc_mov_n(dst, con)); + + ins_pipe(ialu_imm); +%} + +// Load Narrow Null Pointer Constant + +instruct loadConN0(iRegNNoSp dst, immN0 con) +%{ + match(Set dst con); + + ins_cost(INSN_COST); + format %{ "mov $dst, $con\t# compressed NULL ptr" %} + + ins_encode(aarch64_enc_mov_n0(dst, con)); + + ins_pipe(ialu_imm); +%} + +// Load Narrow Klass Constant + +instruct loadConNKlass(iRegNNoSp dst, immNKlass con) +%{ + match(Set dst con); + + ins_cost(INSN_COST); + format %{ "mov $dst, $con\t# compressed klass ptr" %} + + ins_encode(aarch64_enc_mov_nk(dst, con)); + + ins_pipe(ialu_imm); +%} + +// Load Packed Float Constant + +instruct loadConF_packed(vRegF dst, immFPacked con) %{ + match(Set dst con); + ins_cost(INSN_COST * 4); + format %{ "fmovs $dst, $con"%} + ins_encode %{ + __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant); + %} + + ins_pipe(fp_imm_s); +%} + +// Load Float Constant + +instruct loadConF(vRegF dst, immF con) %{ + match(Set dst con); + + ins_cost(INSN_COST * 4); + + format %{ + "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t" + %} + + ins_encode %{ + __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con)); + %} + + ins_pipe(fp_load_constant_s); +%} + +// Load Packed Double Constant + +instruct loadConD_packed(vRegD dst, immDPacked con) %{ + match(Set dst con); + ins_cost(INSN_COST); + format %{ "fmovd $dst, $con"%} + ins_encode %{ + __ fmovd(as_FloatRegister($dst$$reg), $con$$constant); + %} + + ins_pipe(fp_imm_d); +%} + +// Load Double Constant + +instruct loadConD(vRegD dst, immD con) %{ + match(Set dst con); + + ins_cost(INSN_COST * 5); + format %{ + "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t" + %} + + ins_encode %{ + __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con)); + %} + + ins_pipe(fp_load_constant_d); +%} + +// Store Instructions + +// Store CMS card-mark Immediate +instruct storeimmCM0(immI0 zero, memory mem) +%{ + match(Set mem (StoreCM mem zero)); + predicate(unnecessary_storestore(n)); + + ins_cost(INSN_COST); + format %{ "strb zr, $mem\t# byte" %} + + ins_encode(aarch64_enc_strb0(mem)); + + ins_pipe(istore_mem); +%} + +// Store CMS card-mark Immediate with intervening StoreStore +// needed when using CMS with no conditional card marking +instruct storeimmCM0_ordered(immI0 zero, memory mem) +%{ + match(Set mem (StoreCM mem zero)); + + ins_cost(INSN_COST * 2); + format %{ "dmb ishst" + "\n\tstrb zr, $mem\t# byte" %} + + ins_encode(aarch64_enc_strb0_ordered(mem)); + + ins_pipe(istore_mem); +%} + +// Store Byte +instruct storeB(iRegIorL2I src, memory mem) +%{ + match(Set mem (StoreB mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "strb $src, $mem\t# byte" %} + + ins_encode(aarch64_enc_strb(src, mem)); + + ins_pipe(istore_reg_mem); +%} + + +instruct storeimmB0(immI0 zero, memory mem) +%{ + match(Set mem (StoreB mem zero)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "strb zr, $mem\t# byte" %} + + ins_encode(aarch64_enc_strb0(mem)); + + ins_pipe(istore_mem); +%} + +// Store Char/Short +instruct storeC(iRegIorL2I src, memory mem) +%{ + match(Set mem (StoreC mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "strh $src, $mem\t# short" %} + + ins_encode(aarch64_enc_strh(src, mem)); + + ins_pipe(istore_reg_mem); +%} + +instruct storeimmC0(immI0 zero, memory mem) +%{ + match(Set mem (StoreC mem zero)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "strh zr, $mem\t# short" %} + + ins_encode(aarch64_enc_strh0(mem)); + + ins_pipe(istore_mem); +%} + +// Store Integer + +instruct storeI(iRegIorL2I src, memory mem) +%{ + match(Set mem(StoreI mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "strw $src, $mem\t# int" %} + + ins_encode(aarch64_enc_strw(src, mem)); + + ins_pipe(istore_reg_mem); +%} + +instruct storeimmI0(immI0 zero, memory mem) +%{ + match(Set mem(StoreI mem zero)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "strw zr, $mem\t# int" %} + + ins_encode(aarch64_enc_strw0(mem)); + + ins_pipe(istore_mem); +%} + +// Store Long (64 bit signed) +instruct storeL(iRegL src, memory mem) +%{ + match(Set mem (StoreL mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "str $src, $mem\t# int" %} + + ins_encode(aarch64_enc_str(src, mem)); + + ins_pipe(istore_reg_mem); +%} + +// Store Long (64 bit signed) +instruct storeimmL0(immL0 zero, memory mem) +%{ + match(Set mem (StoreL mem zero)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "str zr, $mem\t# int" %} + + ins_encode(aarch64_enc_str0(mem)); + + ins_pipe(istore_mem); +%} + +// Store Pointer +instruct storeP(iRegP src, memory mem) +%{ + match(Set mem (StoreP mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "str $src, $mem\t# ptr" %} + + ins_encode(aarch64_enc_str(src, mem)); + + ins_pipe(istore_reg_mem); +%} + +// Store Pointer +instruct storeimmP0(immP0 zero, memory mem) +%{ + match(Set mem (StoreP mem zero)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "str zr, $mem\t# ptr" %} + + ins_encode(aarch64_enc_str0(mem)); + + ins_pipe(istore_mem); +%} + +// Store Compressed Pointer +instruct storeN(iRegN src, memory mem) +%{ + match(Set mem (StoreN mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "strw $src, $mem\t# compressed ptr" %} + + ins_encode(aarch64_enc_strw(src, mem)); + + ins_pipe(istore_reg_mem); +%} + +instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) +%{ + match(Set mem (StoreN mem zero)); + predicate(Universe::narrow_oop_base() == NULL && + Universe::narrow_klass_base() == NULL && + (!needs_releasing_store(n))); + + ins_cost(INSN_COST); + format %{ "strw rheapbase, $mem\t# compressed ptr (rheapbase==0)" %} + + ins_encode(aarch64_enc_strw(heapbase, mem)); + + ins_pipe(istore_reg_mem); +%} + +// Store Float +instruct storeF(vRegF src, memory mem) +%{ + match(Set mem (StoreF mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "strs $src, $mem\t# float" %} + + ins_encode( aarch64_enc_strs(src, mem) ); + + ins_pipe(pipe_class_memory); +%} + +// TODO +// implement storeImmF0 and storeFImmPacked + +// Store Double +instruct storeD(vRegD src, memory mem) +%{ + match(Set mem (StoreD mem src)); + predicate(!needs_releasing_store(n)); + + ins_cost(INSN_COST); + format %{ "strd $src, $mem\t# double" %} + + ins_encode( aarch64_enc_strd(src, mem) ); + + ins_pipe(pipe_class_memory); +%} + +// Store Compressed Klass Pointer +instruct storeNKlass(iRegN src, memory mem) +%{ + predicate(!needs_releasing_store(n)); + match(Set mem (StoreNKlass mem src)); + + ins_cost(INSN_COST); + format %{ "strw $src, $mem\t# compressed klass ptr" %} + + ins_encode(aarch64_enc_strw(src, mem)); + + ins_pipe(istore_reg_mem); +%} + +// TODO +// implement storeImmD0 and storeDImmPacked + +// prefetch instructions +// Must be safe to execute with invalid address (cannot fault). + +instruct prefetchr( memory mem ) %{ + match(PrefetchRead mem); + + ins_cost(INSN_COST); + format %{ "prfm $mem, PLDL1KEEP\t# Prefetch into level 1 cache read keep" %} + + ins_encode( aarch64_enc_prefetchr(mem) ); + + ins_pipe(iload_prefetch); +%} + +instruct prefetchw( memory mem ) %{ + match(PrefetchAllocation mem); + + ins_cost(INSN_COST); + format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %} + + ins_encode( aarch64_enc_prefetchw(mem) ); + + ins_pipe(iload_prefetch); +%} + +instruct prefetchnta( memory mem ) %{ + match(PrefetchWrite mem); + + ins_cost(INSN_COST); + format %{ "prfm $mem, PSTL1STRM\t# Prefetch into level 1 cache write streaming" %} + + ins_encode( aarch64_enc_prefetchnta(mem) ); + + ins_pipe(iload_prefetch); +%} + +// ---------------- volatile loads and stores ---------------- + +// Load Byte (8 bit signed) +instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (LoadB mem)); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldarsb $dst, $mem\t# byte" %} + + ins_encode(aarch64_enc_ldarsb(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Load Byte (8 bit signed) into long +instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (ConvI2L (LoadB mem))); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldarsb $dst, $mem\t# byte" %} + + ins_encode(aarch64_enc_ldarsb(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Load Byte (8 bit unsigned) +instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (LoadUB mem)); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldarb $dst, $mem\t# byte" %} + + ins_encode(aarch64_enc_ldarb(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Load Byte (8 bit unsigned) into long +instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (ConvI2L (LoadUB mem))); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldarb $dst, $mem\t# byte" %} + + ins_encode(aarch64_enc_ldarb(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Load Short (16 bit signed) +instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (LoadS mem)); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldarshw $dst, $mem\t# short" %} + + ins_encode(aarch64_enc_ldarshw(dst, mem)); + + ins_pipe(pipe_serial); +%} + +instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (LoadUS mem)); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldarhw $dst, $mem\t# short" %} + + ins_encode(aarch64_enc_ldarhw(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Load Short/Char (16 bit unsigned) into long +instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (ConvI2L (LoadUS mem))); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldarh $dst, $mem\t# short" %} + + ins_encode(aarch64_enc_ldarh(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Load Short/Char (16 bit signed) into long +instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (ConvI2L (LoadS mem))); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldarh $dst, $mem\t# short" %} + + ins_encode(aarch64_enc_ldarsh(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Load Integer (32 bit signed) +instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (LoadI mem)); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldarw $dst, $mem\t# int" %} + + ins_encode(aarch64_enc_ldarw(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Load Integer (32 bit unsigned) into long +instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask) +%{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldarw $dst, $mem\t# int" %} + + ins_encode(aarch64_enc_ldarw(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Load Long (64 bit signed) +instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (LoadL mem)); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldar $dst, $mem\t# int" %} + + ins_encode(aarch64_enc_ldar(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Load Pointer +instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (LoadP mem)); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldar $dst, $mem\t# ptr" %} + + ins_encode(aarch64_enc_ldar(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Load Compressed Pointer +instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (LoadN mem)); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldarw $dst, $mem\t# compressed ptr" %} + + ins_encode(aarch64_enc_ldarw(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Load Float +instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (LoadF mem)); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldars $dst, $mem\t# float" %} + + ins_encode( aarch64_enc_fldars(dst, mem) ); + + ins_pipe(pipe_serial); +%} + +// Load Double +instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem) +%{ + match(Set dst (LoadD mem)); + + ins_cost(VOLATILE_REF_COST); + format %{ "ldard $dst, $mem\t# double" %} + + ins_encode( aarch64_enc_fldard(dst, mem) ); + + ins_pipe(pipe_serial); +%} + +// Store Byte +instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem) +%{ + match(Set mem (StoreB mem src)); + + ins_cost(VOLATILE_REF_COST); + format %{ "stlrb $src, $mem\t# byte" %} + + ins_encode(aarch64_enc_stlrb(src, mem)); + + ins_pipe(pipe_class_memory); +%} + +// Store Char/Short +instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem) +%{ + match(Set mem (StoreC mem src)); + + ins_cost(VOLATILE_REF_COST); + format %{ "stlrh $src, $mem\t# short" %} + + ins_encode(aarch64_enc_stlrh(src, mem)); + + ins_pipe(pipe_class_memory); +%} + +// Store Integer + +instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem) +%{ + match(Set mem(StoreI mem src)); + + ins_cost(VOLATILE_REF_COST); + format %{ "stlrw $src, $mem\t# int" %} + + ins_encode(aarch64_enc_stlrw(src, mem)); + + ins_pipe(pipe_class_memory); +%} + +// Store Long (64 bit signed) +instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem) +%{ + match(Set mem (StoreL mem src)); + + ins_cost(VOLATILE_REF_COST); + format %{ "stlr $src, $mem\t# int" %} + + ins_encode(aarch64_enc_stlr(src, mem)); + + ins_pipe(pipe_class_memory); +%} + +// Store Pointer +instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem) +%{ + match(Set mem (StoreP mem src)); + + ins_cost(VOLATILE_REF_COST); + format %{ "stlr $src, $mem\t# ptr" %} + + ins_encode(aarch64_enc_stlr(src, mem)); + + ins_pipe(pipe_class_memory); +%} + +// Store Compressed Pointer +instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem) +%{ + match(Set mem (StoreN mem src)); + + ins_cost(VOLATILE_REF_COST); + format %{ "stlrw $src, $mem\t# compressed ptr" %} + + ins_encode(aarch64_enc_stlrw(src, mem)); + + ins_pipe(pipe_class_memory); +%} + +// Store Float +instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem) +%{ + match(Set mem (StoreF mem src)); + + ins_cost(VOLATILE_REF_COST); + format %{ "stlrs $src, $mem\t# float" %} + + ins_encode( aarch64_enc_fstlrs(src, mem) ); + + ins_pipe(pipe_class_memory); +%} + +// TODO +// implement storeImmF0 and storeFImmPacked + +// Store Double +instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem) +%{ + match(Set mem (StoreD mem src)); + + ins_cost(VOLATILE_REF_COST); + format %{ "stlrd $src, $mem\t# double" %} + + ins_encode( aarch64_enc_fstlrd(src, mem) ); + + ins_pipe(pipe_class_memory); +%} + +// ---------------- end of volatile loads and stores ---------------- + +// ============================================================================ +// BSWAP Instructions + +instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (ReverseBytesI src)); + + ins_cost(INSN_COST); + format %{ "revw $dst, $src" %} + + ins_encode %{ + __ revw(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{ + match(Set dst (ReverseBytesL src)); + + ins_cost(INSN_COST); + format %{ "rev $dst, $src" %} + + ins_encode %{ + __ rev(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (ReverseBytesUS src)); + + ins_cost(INSN_COST); + format %{ "rev16w $dst, $src" %} + + ins_encode %{ + __ rev16w(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (ReverseBytesS src)); + + ins_cost(INSN_COST); + format %{ "rev16w $dst, $src\n\t" + "sbfmw $dst, $dst, #0, #15" %} + + ins_encode %{ + __ rev16w(as_Register($dst$$reg), as_Register($src$$reg)); + __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U); + %} + + ins_pipe(ialu_reg); +%} + +// ============================================================================ +// Zero Count Instructions + +instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (CountLeadingZerosI src)); + + ins_cost(INSN_COST); + format %{ "clzw $dst, $src" %} + ins_encode %{ + __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{ + match(Set dst (CountLeadingZerosL src)); + + ins_cost(INSN_COST); + format %{ "clz $dst, $src" %} + ins_encode %{ + __ clz(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{ + match(Set dst (CountTrailingZerosI src)); + + ins_cost(INSN_COST * 2); + format %{ "rbitw $dst, $src\n\t" + "clzw $dst, $dst" %} + ins_encode %{ + __ rbitw(as_Register($dst$$reg), as_Register($src$$reg)); + __ clzw(as_Register($dst$$reg), as_Register($dst$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{ + match(Set dst (CountTrailingZerosL src)); + + ins_cost(INSN_COST * 2); + format %{ "rbit $dst, $src\n\t" + "clz $dst, $dst" %} + ins_encode %{ + __ rbit(as_Register($dst$$reg), as_Register($src$$reg)); + __ clz(as_Register($dst$$reg), as_Register($dst$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +//---------- Population Count Instructions ------------------------------------- +// + +instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "movw $src, $src\n\t" + "mov $tmp, $src\t# vector (1D)\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0 + __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI (LoadI mem))); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "ldrs $tmp, $mem\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +// Note: Long.bitCount(long) returns an int. +instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "mov $tmp, $src\t# vector (1D)\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL (LoadL mem))); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "ldrd $tmp, $mem\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +// ============================================================================ +// MemBar Instruction + +instruct load_fence() %{ + match(LoadFence); + ins_cost(VOLATILE_REF_COST); + + format %{ "load_fence" %} + + ins_encode %{ + __ membar(Assembler::LoadLoad|Assembler::LoadStore); + %} + ins_pipe(pipe_serial); +%} + +instruct unnecessary_membar_acquire() %{ + predicate(unnecessary_acquire(n)); + match(MemBarAcquire); + ins_cost(0); + + format %{ "membar_acquire (elided)" %} + + ins_encode %{ + __ block_comment("membar_acquire (elided)"); + %} + + ins_pipe(pipe_class_empty); +%} + +instruct membar_acquire() %{ + match(MemBarAcquire); + ins_cost(VOLATILE_REF_COST); + + format %{ "membar_acquire" %} + + ins_encode %{ + __ block_comment("membar_acquire"); + __ membar(Assembler::LoadLoad|Assembler::LoadStore); + %} + + ins_pipe(pipe_serial); +%} + + +instruct membar_acquire_lock() %{ + match(MemBarAcquireLock); + ins_cost(VOLATILE_REF_COST); + + format %{ "membar_acquire_lock (elided)" %} + + ins_encode %{ + __ block_comment("membar_acquire_lock (elided)"); + %} + + ins_pipe(pipe_serial); +%} + +instruct store_fence() %{ + match(StoreFence); + ins_cost(VOLATILE_REF_COST); + + format %{ "store_fence" %} + + ins_encode %{ + __ membar(Assembler::LoadStore|Assembler::StoreStore); + %} + ins_pipe(pipe_serial); +%} + +instruct unnecessary_membar_release() %{ + predicate(unnecessary_release(n)); + match(MemBarRelease); + ins_cost(0); + + format %{ "membar_release (elided)" %} + + ins_encode %{ + __ block_comment("membar_release (elided)"); + %} + ins_pipe(pipe_serial); +%} + +instruct membar_release() %{ + match(MemBarRelease); + ins_cost(VOLATILE_REF_COST); + + format %{ "membar_release" %} + + ins_encode %{ + __ block_comment("membar_release"); + __ membar(Assembler::LoadStore|Assembler::StoreStore); + %} + ins_pipe(pipe_serial); +%} + +instruct membar_storestore() %{ + match(MemBarStoreStore); + ins_cost(VOLATILE_REF_COST); + + format %{ "MEMBAR-store-store" %} + + ins_encode %{ + __ membar(Assembler::StoreStore); + %} + ins_pipe(pipe_serial); +%} + +instruct membar_release_lock() %{ + match(MemBarReleaseLock); + ins_cost(VOLATILE_REF_COST); + + format %{ "membar_release_lock (elided)" %} + + ins_encode %{ + __ block_comment("membar_release_lock (elided)"); + %} + + ins_pipe(pipe_serial); +%} + +instruct unnecessary_membar_volatile() %{ + predicate(unnecessary_volatile(n)); + match(MemBarVolatile); + ins_cost(0); + + format %{ "membar_volatile (elided)" %} + + ins_encode %{ + __ block_comment("membar_volatile (elided)"); + %} + + ins_pipe(pipe_serial); +%} + +instruct membar_volatile() %{ + match(MemBarVolatile); + ins_cost(VOLATILE_REF_COST*100); + + format %{ "membar_volatile" %} + + ins_encode %{ + __ block_comment("membar_volatile"); + __ membar(Assembler::StoreLoad); + %} + + ins_pipe(pipe_serial); +%} + +// ============================================================================ +// Cast/Convert Instructions + +instruct castX2P(iRegPNoSp dst, iRegL src) %{ + match(Set dst (CastX2P src)); + + ins_cost(INSN_COST); + format %{ "mov $dst, $src\t# long -> ptr" %} + + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ mov(as_Register($dst$$reg), as_Register($src$$reg)); + } + %} + + ins_pipe(ialu_reg); +%} + +instruct castP2X(iRegLNoSp dst, iRegP src) %{ + match(Set dst (CastP2X src)); + + ins_cost(INSN_COST); + format %{ "mov $dst, $src\t# ptr -> long" %} + + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ mov(as_Register($dst$$reg), as_Register($src$$reg)); + } + %} + + ins_pipe(ialu_reg); +%} + +// Convert oop into int for vectors alignment masking +instruct convP2I(iRegINoSp dst, iRegP src) %{ + match(Set dst (ConvL2I (CastP2X src))); + + ins_cost(INSN_COST); + format %{ "movw $dst, $src\t# ptr -> int" %} + ins_encode %{ + __ movw($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + +// Convert compressed oop into int for vectors alignment masking +// in case of 32bit oops (heap < 4Gb). +instruct convN2I(iRegINoSp dst, iRegN src) +%{ + predicate(Universe::narrow_oop_shift() == 0); + match(Set dst (ConvL2I (CastP2X (DecodeN src)))); + + ins_cost(INSN_COST); + format %{ "mov dst, $src\t# compressed ptr -> int" %} + ins_encode %{ + __ movw($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + + +// Convert oop pointer into compressed form +instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{ + predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); + match(Set dst (EncodeP src)); + effect(KILL cr); + ins_cost(INSN_COST * 3); + format %{ "encode_heap_oop $dst, $src" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + __ encode_heap_oop(d, s); + %} + ins_pipe(ialu_reg); +%} + +instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{ + predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); + match(Set dst (EncodeP src)); + ins_cost(INSN_COST * 3); + format %{ "encode_heap_oop_not_null $dst, $src" %} + ins_encode %{ + __ encode_heap_oop_not_null($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ + predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && + n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); + match(Set dst (DecodeN src)); + ins_cost(INSN_COST * 3); + format %{ "decode_heap_oop $dst, $src" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + __ decode_heap_oop(d, s); + %} + ins_pipe(ialu_reg); +%} + +instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ + predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || + n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); + match(Set dst (DecodeN src)); + ins_cost(INSN_COST * 3); + format %{ "decode_heap_oop_not_null $dst, $src" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + __ decode_heap_oop_not_null(d, s); + %} + ins_pipe(ialu_reg); +%} + +// n.b. AArch64 implementations of encode_klass_not_null and +// decode_klass_not_null do not modify the flags register so, unlike +// Intel, we don't kill CR as a side effect here + +instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{ + match(Set dst (EncodePKlass src)); + + ins_cost(INSN_COST * 3); + format %{ "encode_klass_not_null $dst,$src" %} + + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + __ encode_klass_not_null(dst_reg, src_reg); + %} + + ins_pipe(ialu_reg); +%} + +instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{ + match(Set dst (DecodeNKlass src)); + + ins_cost(INSN_COST * 3); + format %{ "decode_klass_not_null $dst,$src" %} + + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + if (dst_reg != src_reg) { + __ decode_klass_not_null(dst_reg, src_reg); + } else { + __ decode_klass_not_null(dst_reg); + } + %} + + ins_pipe(ialu_reg); +%} + +instruct checkCastPP(iRegPNoSp dst) +%{ + match(Set dst (CheckCastPP dst)); + + size(0); + format %{ "# checkcastPP of $dst" %} + ins_encode(/* empty encoding */); + ins_pipe(pipe_class_empty); +%} + +instruct castPP(iRegPNoSp dst) +%{ + match(Set dst (CastPP dst)); + + size(0); + format %{ "# castPP of $dst" %} + ins_encode(/* empty encoding */); + ins_pipe(pipe_class_empty); +%} + +instruct castII(iRegI dst) +%{ + match(Set dst (CastII dst)); + + size(0); + format %{ "# castII of $dst" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(pipe_class_empty); +%} + +// ============================================================================ +// Atomic operation instructions +// +// Intel and SPARC both implement Ideal Node LoadPLocked and +// Store{PIL}Conditional instructions using a normal load for the +// LoadPLocked and a CAS for the Store{PIL}Conditional. +// +// The ideal code appears only to use LoadPLocked/StorePLocked as a +// pair to lock object allocations from Eden space when not using +// TLABs. +// +// There does not appear to be a Load{IL}Locked Ideal Node and the +// Ideal code appears to use Store{IL}Conditional as an alias for CAS +// and to use StoreIConditional only for 32-bit and StoreLConditional +// only for 64-bit. +// +// We implement LoadPLocked and StorePLocked instructions using, +// respectively the AArch64 hw load-exclusive and store-conditional +// instructions. Whereas we must implement each of +// Store{IL}Conditional using a CAS which employs a pair of +// instructions comprising a load-exclusive followed by a +// store-conditional. + + +// Locked-load (linked load) of the current heap-top +// used when updating the eden heap top +// implemented using ldaxr on AArch64 + +instruct loadPLocked(iRegPNoSp dst, indirect mem) +%{ + match(Set dst (LoadPLocked mem)); + + ins_cost(VOLATILE_REF_COST); + + format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %} + + ins_encode(aarch64_enc_ldaxr(dst, mem)); + + ins_pipe(pipe_serial); +%} + +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. +// Sets flag (EQ) on success. +// implemented using stlxr on AArch64. + +instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) +%{ + match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); + + ins_cost(VOLATILE_REF_COST); + + // TODO + // do we need to do a store-conditional release or can we just use a + // plain store-conditional? + + format %{ + "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release" + "cmpw rscratch1, zr\t# EQ on successful write" + %} + + ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr)); + + ins_pipe(pipe_serial); +%} + + +// storeLConditional is used by PhaseMacroExpand::expand_lock_node +// when attempting to rebias a lock towards the current thread. We +// must use the acquire form of cmpxchg in order to guarantee acquire +// semantics in this case. +instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) +%{ + match(Set cr (StoreLConditional mem (Binary oldval newval))); + + ins_cost(VOLATILE_REF_COST); + + format %{ + "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" + "cmpw rscratch1, zr\t# EQ on successful write" + %} + + ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval)); + + ins_pipe(pipe_slow); +%} + +// storeIConditional also has acquire semantics, for no better reason +// than matching storeLConditional. At the time of writing this +// comment storeIConditional was not used anywhere by AArch64. +instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) +%{ + match(Set cr (StoreIConditional mem (Binary oldval newval))); + + ins_cost(VOLATILE_REF_COST); + + format %{ + "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" + "cmpw rscratch1, zr\t# EQ on successful write" + %} + + ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval)); + + ins_pipe(pipe_slow); +%} + +// XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher +// can't match them + +// standard CompareAndSwapX when we are using barriers +// these have higher priority than the rules selected by a predicate + +instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{ + + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{ + + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ + + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + + +// alternative CompareAndSwapX when we are eliding barriers + +instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n)); + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + ins_cost(VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n)); + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + ins_cost(VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n)); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + ins_cost(VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n)); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + ins_cost(VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + + +instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{ + match(Set prev (GetAndSetI mem newv)); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchgw $prev, $newv, [$mem]" %} + ins_encode %{ + __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{ + match(Set prev (GetAndSetL mem newv)); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchg $prev, $newv, [$mem]" %} + ins_encode %{ + __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{ + match(Set prev (GetAndSetN mem newv)); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchgw $prev, $newv, [$mem]" %} + ins_encode %{ + __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{ + match(Set prev (GetAndSetP mem newv)); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchg $prev, $newv, [$mem]" %} + ins_encode %{ + __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{ + predicate(needs_acquiring_load_exclusive(n)); + match(Set prev (GetAndSetI mem newv)); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %} + ins_encode %{ + __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{ + predicate(needs_acquiring_load_exclusive(n)); + match(Set prev (GetAndSetL mem newv)); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %} + ins_encode %{ + __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{ + predicate(needs_acquiring_load_exclusive(n)); + match(Set prev (GetAndSetN mem newv)); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %} + ins_encode %{ + __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{ + predicate(needs_acquiring_load_exclusive(n)); + match(Set prev (GetAndSetP mem newv)); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %} + ins_encode %{ + __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + + +instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{ + match(Set newval (GetAndAddL mem incr)); + ins_cost(2 * VOLATILE_REF_COST + 1); + format %{ "get_and_addL $newval, [$mem], $incr" %} + ins_encode %{ + __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddL mem incr)); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "get_and_addL [$mem], $incr" %} + ins_encode %{ + __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{ + match(Set newval (GetAndAddL mem incr)); + ins_cost(2 * VOLATILE_REF_COST + 1); + format %{ "get_and_addL $newval, [$mem], $incr" %} + ins_encode %{ + __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddL mem incr)); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "get_and_addL [$mem], $incr" %} + ins_encode %{ + __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{ + match(Set newval (GetAndAddI mem incr)); + ins_cost(2 * VOLATILE_REF_COST + 1); + format %{ "get_and_addI $newval, [$mem], $incr" %} + ins_encode %{ + __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem incr)); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "get_and_addI [$mem], $incr" %} + ins_encode %{ + __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{ + match(Set newval (GetAndAddI mem incr)); + ins_cost(2 * VOLATILE_REF_COST + 1); + format %{ "get_and_addI $newval, [$mem], $incr" %} + ins_encode %{ + __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem incr)); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "get_and_addI [$mem], $incr" %} + ins_encode %{ + __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{ + predicate(needs_acquiring_load_exclusive(n)); + match(Set newval (GetAndAddL mem incr)); + ins_cost(VOLATILE_REF_COST + 1); + format %{ "get_and_addL_acq $newval, [$mem], $incr" %} + ins_encode %{ + __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{ + predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); + match(Set dummy (GetAndAddL mem incr)); + ins_cost(VOLATILE_REF_COST); + format %{ "get_and_addL_acq [$mem], $incr" %} + ins_encode %{ + __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAddSub incr) %{ + predicate(needs_acquiring_load_exclusive(n)); + match(Set newval (GetAndAddL mem incr)); + ins_cost(VOLATILE_REF_COST + 1); + format %{ "get_and_addL_acq $newval, [$mem], $incr" %} + ins_encode %{ + __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAddSub incr) %{ + predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); + match(Set dummy (GetAndAddL mem incr)); + ins_cost(VOLATILE_REF_COST); + format %{ "get_and_addL_acq [$mem], $incr" %} + ins_encode %{ + __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{ + predicate(needs_acquiring_load_exclusive(n)); + match(Set newval (GetAndAddI mem incr)); + ins_cost(VOLATILE_REF_COST + 1); + format %{ "get_and_addI_acq $newval, [$mem], $incr" %} + ins_encode %{ + __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{ + predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); + match(Set dummy (GetAndAddI mem incr)); + ins_cost(VOLATILE_REF_COST); + format %{ "get_and_addI_acq [$mem], $incr" %} + ins_encode %{ + __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAddSub incr) %{ + predicate(needs_acquiring_load_exclusive(n)); + match(Set newval (GetAndAddI mem incr)); + ins_cost(VOLATILE_REF_COST + 1); + format %{ "get_and_addI_acq $newval, [$mem], $incr" %} + ins_encode %{ + __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAddSub incr) %{ + predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); + match(Set dummy (GetAndAddI mem incr)); + ins_cost(VOLATILE_REF_COST); + format %{ "get_and_addI_acq [$mem], $incr" %} + ins_encode %{ + __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base)); + %} + ins_pipe(pipe_serial); +%} + +// ============================================================================ +// Conditional Move Instructions + +// n.b. we have identical rules for both a signed compare op (cmpOp) +// and an unsigned compare op (cmpOpU). it would be nice if we could +// define an op class which merged both inputs and use it to type the +// argument to a single rule. unfortunatelyt his fails because the +// opclass does not live up to the COND_INTER interface of its +// component operands. When the generic code tries to negate the +// operand it ends up running the generci Machoper::negate method +// which throws a ShouldNotHappen. So, we have to provide two flavours +// of each rule, one for a cmpOp and a second for a cmpOpU (sigh). + +instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2))); + + ins_cost(INSN_COST * 2); + format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int" %} + + ins_encode %{ + __ cselw(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg_reg); +%} + +instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2))); + + ins_cost(INSN_COST * 2); + format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int" %} + + ins_encode %{ + __ cselw(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg_reg); +%} + +// special cases where one arg is zero + +// n.b. this is selected in preference to the rule above because it +// avoids loading constant 0 into a source register + +// TODO +// we ought only to be able to cull one of these variants as the ideal +// transforms ought always to order the zero consistently (to left/right?) + +instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary zero src))); + + ins_cost(INSN_COST * 2); + format %{ "cselw $dst, $src, zr $cmp\t# signed, int" %} + + ins_encode %{ + __ cselw(as_Register($dst$$reg), + as_Register($src$$reg), + zr, + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary zero src))); + + ins_cost(INSN_COST * 2); + format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int" %} + + ins_encode %{ + __ cselw(as_Register($dst$$reg), + as_Register($src$$reg), + zr, + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary src zero))); + + ins_cost(INSN_COST * 2); + format %{ "cselw $dst, zr, $src $cmp\t# signed, int" %} + + ins_encode %{ + __ cselw(as_Register($dst$$reg), + zr, + as_Register($src$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary src zero))); + + ins_cost(INSN_COST * 2); + format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int" %} + + ins_encode %{ + __ cselw(as_Register($dst$$reg), + zr, + as_Register($src$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +// special case for creating a boolean 0 or 1 + +// n.b. this is selected in preference to the rule above because it +// avoids loading constants 0 and 1 into a source register + +instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary one zero))); + + ins_cost(INSN_COST * 2); + format %{ "csincw $dst, zr, zr $cmp\t# signed, int" %} + + ins_encode %{ + // equivalently + // cset(as_Register($dst$$reg), + // negate_condition((Assembler::Condition)$cmp$$cmpcode)); + __ csincw(as_Register($dst$$reg), + zr, + zr, + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_none); +%} + +instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary one zero))); + + ins_cost(INSN_COST * 2); + format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int" %} + + ins_encode %{ + // equivalently + // cset(as_Register($dst$$reg), + // negate_condition((Assembler::Condition)$cmp$$cmpcode)); + __ csincw(as_Register($dst$$reg), + zr, + zr, + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_none); +%} + +instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2))); + + ins_cost(INSN_COST * 2); + format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long" %} + + ins_encode %{ + __ csel(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg_reg); +%} + +instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2))); + + ins_cost(INSN_COST * 2); + format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long" %} + + ins_encode %{ + __ csel(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg_reg); +%} + +// special cases where one arg is zero + +instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{ + match(Set dst (CMoveL (Binary cmp cr) (Binary src zero))); + + ins_cost(INSN_COST * 2); + format %{ "csel $dst, zr, $src $cmp\t# signed, long" %} + + ins_encode %{ + __ csel(as_Register($dst$$reg), + zr, + as_Register($src$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{ + match(Set dst (CMoveL (Binary cmp cr) (Binary src zero))); + + ins_cost(INSN_COST * 2); + format %{ "csel $dst, zr, $src $cmp\t# unsigned, long" %} + + ins_encode %{ + __ csel(as_Register($dst$$reg), + zr, + as_Register($src$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp cr) (Binary zero src))); + + ins_cost(INSN_COST * 2); + format %{ "csel $dst, $src, zr $cmp\t# signed, long" %} + + ins_encode %{ + __ csel(as_Register($dst$$reg), + as_Register($src$$reg), + zr, + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp cr) (Binary zero src))); + + ins_cost(INSN_COST * 2); + format %{ "csel $dst, $src, zr $cmp\t# unsigned, long" %} + + ins_encode %{ + __ csel(as_Register($dst$$reg), + as_Register($src$$reg), + zr, + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{ + match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2))); + + ins_cost(INSN_COST * 2); + format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr" %} + + ins_encode %{ + __ csel(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg_reg); +%} + +instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{ + match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2))); + + ins_cost(INSN_COST * 2); + format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr" %} + + ins_encode %{ + __ csel(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg_reg); +%} + +// special cases where one arg is zero + +instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{ + match(Set dst (CMoveP (Binary cmp cr) (Binary src zero))); + + ins_cost(INSN_COST * 2); + format %{ "csel $dst, zr, $src $cmp\t# signed, ptr" %} + + ins_encode %{ + __ csel(as_Register($dst$$reg), + zr, + as_Register($src$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{ + match(Set dst (CMoveP (Binary cmp cr) (Binary src zero))); + + ins_cost(INSN_COST * 2); + format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr" %} + + ins_encode %{ + __ csel(as_Register($dst$$reg), + zr, + as_Register($src$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp cr) (Binary zero src))); + + ins_cost(INSN_COST * 2); + format %{ "csel $dst, $src, zr $cmp\t# signed, ptr" %} + + ins_encode %{ + __ csel(as_Register($dst$$reg), + as_Register($src$$reg), + zr, + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp cr) (Binary zero src))); + + ins_cost(INSN_COST * 2); + format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr" %} + + ins_encode %{ + __ csel(as_Register($dst$$reg), + as_Register($src$$reg), + zr, + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{ + match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2))); + + ins_cost(INSN_COST * 2); + format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr" %} + + ins_encode %{ + __ cselw(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg_reg); +%} + +instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{ + match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2))); + + ins_cost(INSN_COST * 2); + format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr" %} + + ins_encode %{ + __ cselw(as_Register($dst$$reg), + as_Register($src2$$reg), + as_Register($src1$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg_reg); +%} + +// special cases where one arg is zero + +instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{ + match(Set dst (CMoveN (Binary cmp cr) (Binary src zero))); + + ins_cost(INSN_COST * 2); + format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr" %} + + ins_encode %{ + __ cselw(as_Register($dst$$reg), + zr, + as_Register($src$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{ + match(Set dst (CMoveN (Binary cmp cr) (Binary src zero))); + + ins_cost(INSN_COST * 2); + format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr" %} + + ins_encode %{ + __ cselw(as_Register($dst$$reg), + zr, + as_Register($src$$reg), + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{ + match(Set dst (CMoveN (Binary cmp cr) (Binary zero src))); + + ins_cost(INSN_COST * 2); + format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr" %} + + ins_encode %{ + __ cselw(as_Register($dst$$reg), + as_Register($src$$reg), + zr, + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{ + match(Set dst (CMoveN (Binary cmp cr) (Binary zero src))); + + ins_cost(INSN_COST * 2); + format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr" %} + + ins_encode %{ + __ cselw(as_Register($dst$$reg), + as_Register($src$$reg), + zr, + (Assembler::Condition)$cmp$$cmpcode); + %} + + ins_pipe(icond_reg); +%} + +instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1, vRegF src2) +%{ + match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2))); + + ins_cost(INSN_COST * 3); + + format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %} + ins_encode %{ + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + __ fcsels(as_FloatRegister($dst$$reg), + as_FloatRegister($src2$$reg), + as_FloatRegister($src1$$reg), + cond); + %} + + ins_pipe(fp_cond_reg_reg_s); +%} + +instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1, vRegF src2) +%{ + match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2))); + + ins_cost(INSN_COST * 3); + + format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %} + ins_encode %{ + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + __ fcsels(as_FloatRegister($dst$$reg), + as_FloatRegister($src2$$reg), + as_FloatRegister($src1$$reg), + cond); + %} + + ins_pipe(fp_cond_reg_reg_s); +%} + +instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1, vRegD src2) +%{ + match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2))); + + ins_cost(INSN_COST * 3); + + format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %} + ins_encode %{ + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + __ fcseld(as_FloatRegister($dst$$reg), + as_FloatRegister($src2$$reg), + as_FloatRegister($src1$$reg), + cond); + %} + + ins_pipe(fp_cond_reg_reg_d); +%} + +instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1, vRegD src2) +%{ + match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2))); + + ins_cost(INSN_COST * 3); + + format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %} + ins_encode %{ + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + __ fcseld(as_FloatRegister($dst$$reg), + as_FloatRegister($src2$$reg), + as_FloatRegister($src1$$reg), + cond); + %} + + ins_pipe(fp_cond_reg_reg_d); +%} + +// ============================================================================ +// Arithmetic Instructions +// + +// Integer Addition + +// TODO +// these currently employ operations which do not set CR and hence are +// not flagged as killing CR but we would like to isolate the cases +// where we want to set flags from those where we don't. need to work +// out how to do that. + +instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (AddI src1 src2)); + + ins_cost(INSN_COST); + format %{ "addw $dst, $src1, $src2" %} + + ins_encode %{ + __ addw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{ + match(Set dst (AddI src1 src2)); + + ins_cost(INSN_COST); + format %{ "addw $dst, $src1, $src2" %} + + // use opcode to indicate that this is an add not a sub + opcode(0x0); + + ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2)); + + ins_pipe(ialu_reg_imm); +%} + +instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{ + match(Set dst (AddI (ConvL2I src1) src2)); + + ins_cost(INSN_COST); + format %{ "addw $dst, $src1, $src2" %} + + // use opcode to indicate that this is an add not a sub + opcode(0x0); + + ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2)); + + ins_pipe(ialu_reg_imm); +%} + +// Pointer Addition +instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{ + match(Set dst (AddP src1 src2)); + + ins_cost(INSN_COST); + format %{ "add $dst, $src1, $src2\t# ptr" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{ + match(Set dst (AddP src1 (ConvI2L src2))); + + ins_cost(1.9 * INSN_COST); + format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), ext::sxtw); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{ + match(Set dst (AddP src1 (LShiftL src2 scale))); + + ins_cost(1.9 * INSN_COST); + format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %} + + ins_encode %{ + __ lea(as_Register($dst$$reg), + Address(as_Register($src1$$reg), as_Register($src2$$reg), + Address::lsl($scale$$constant))); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{ + match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale))); + + ins_cost(1.9 * INSN_COST); + format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %} + + ins_encode %{ + __ lea(as_Register($dst$$reg), + Address(as_Register($src1$$reg), as_Register($src2$$reg), + Address::sxtw($scale$$constant))); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{ + match(Set dst (LShiftL (ConvI2L src) scale)); + + ins_cost(INSN_COST); + format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %} + + ins_encode %{ + __ sbfiz(as_Register($dst$$reg), + as_Register($src$$reg), + $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63)); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Pointer Immediate Addition +// n.b. this needs to be more expensive than using an indirect memory +// operand +instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{ + match(Set dst (AddP src1 src2)); + + ins_cost(INSN_COST); + format %{ "add $dst, $src1, $src2\t# ptr" %} + + // use opcode to indicate that this is an add not a sub + opcode(0x0); + + ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) ); + + ins_pipe(ialu_reg_imm); +%} + +// Long Addition +instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + + match(Set dst (AddL src1 src2)); + + ins_cost(INSN_COST); + format %{ "add $dst, $src1, $src2" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// No constant pool entries requiredLong Immediate Addition. +instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{ + match(Set dst (AddL src1 src2)); + + ins_cost(INSN_COST); + format %{ "add $dst, $src1, $src2" %} + + // use opcode to indicate that this is an add not a sub + opcode(0x0); + + ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) ); + + ins_pipe(ialu_reg_imm); +%} + +// Integer Subtraction +instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (SubI src1 src2)); + + ins_cost(INSN_COST); + format %{ "subw $dst, $src1, $src2" %} + + ins_encode %{ + __ subw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Immediate Subtraction +instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{ + match(Set dst (SubI src1 src2)); + + ins_cost(INSN_COST); + format %{ "subw $dst, $src1, $src2" %} + + // use opcode to indicate that this is a sub not an add + opcode(0x1); + + ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2)); + + ins_pipe(ialu_reg_imm); +%} + +// Long Subtraction +instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + + match(Set dst (SubL src1 src2)); + + ins_cost(INSN_COST); + format %{ "sub $dst, $src1, $src2" %} + + ins_encode %{ + __ sub(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +// No constant pool entries requiredLong Immediate Subtraction. +instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{ + match(Set dst (SubL src1 src2)); + + ins_cost(INSN_COST); + format %{ "sub$dst, $src1, $src2" %} + + // use opcode to indicate that this is a sub not an add + opcode(0x1); + + ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) ); + + ins_pipe(ialu_reg_imm); +%} + +// Integer Negation (special case for sub) + +instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{ + match(Set dst (SubI zero src)); + + ins_cost(INSN_COST); + format %{ "negw $dst, $src\t# int" %} + + ins_encode %{ + __ negw(as_Register($dst$$reg), + as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// Long Negation + +instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{ + match(Set dst (SubL zero src)); + + ins_cost(INSN_COST); + format %{ "neg $dst, $src\t# long" %} + + ins_encode %{ + __ neg(as_Register($dst$$reg), + as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +// Integer Multiply + +instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (MulI src1 src2)); + + ins_cost(INSN_COST * 3); + format %{ "mulw $dst, $src1, $src2" %} + + ins_encode %{ + __ mulw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(imul_reg_reg); +%} + +instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (MulL (ConvI2L src1) (ConvI2L src2))); + + ins_cost(INSN_COST * 3); + format %{ "smull $dst, $src1, $src2" %} + + ins_encode %{ + __ smull(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(imul_reg_reg); +%} + +// Long Multiply + +instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (MulL src1 src2)); + + ins_cost(INSN_COST * 5); + format %{ "mul $dst, $src1, $src2" %} + + ins_encode %{ + __ mul(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(lmul_reg_reg); +%} + +instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) +%{ + match(Set dst (MulHiL src1 src2)); + + ins_cost(INSN_COST * 7); + format %{ "smulh $dst, $src1, $src2, \t# mulhi" %} + + ins_encode %{ + __ smulh(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(lmul_reg_reg); +%} + +// Combined Integer Multiply & Add/Sub + +instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{ + match(Set dst (AddI src3 (MulI src1 src2))); + + ins_cost(INSN_COST * 3); + format %{ "madd $dst, $src1, $src2, $src3" %} + + ins_encode %{ + __ maddw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + as_Register($src3$$reg)); + %} + + ins_pipe(imac_reg_reg); +%} + +instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{ + match(Set dst (SubI src3 (MulI src1 src2))); + + ins_cost(INSN_COST * 3); + format %{ "msub $dst, $src1, $src2, $src3" %} + + ins_encode %{ + __ msubw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + as_Register($src3$$reg)); + %} + + ins_pipe(imac_reg_reg); +%} + +// Combined Long Multiply & Add/Sub + +instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{ + match(Set dst (AddL src3 (MulL src1 src2))); + + ins_cost(INSN_COST * 5); + format %{ "madd $dst, $src1, $src2, $src3" %} + + ins_encode %{ + __ madd(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + as_Register($src3$$reg)); + %} + + ins_pipe(lmac_reg_reg); +%} + +instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{ + match(Set dst (SubL src3 (MulL src1 src2))); + + ins_cost(INSN_COST * 5); + format %{ "msub $dst, $src1, $src2, $src3" %} + + ins_encode %{ + __ msub(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + as_Register($src3$$reg)); + %} + + ins_pipe(lmac_reg_reg); +%} + +// Integer Divide + +instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (DivI src1 src2)); + + ins_cost(INSN_COST * 19); + format %{ "sdivw $dst, $src1, $src2" %} + + ins_encode(aarch64_enc_divw(dst, src1, src2)); + ins_pipe(idiv_reg_reg); +%} + +instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{ + match(Set dst (URShiftI (RShiftI src1 div1) div2)); + ins_cost(INSN_COST); + format %{ "lsrw $dst, $src1, $div1" %} + ins_encode %{ + __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31); + %} + ins_pipe(ialu_reg_shift); +%} + +instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{ + match(Set dst (AddI src (URShiftI (RShiftI src div1) div2))); + ins_cost(INSN_COST); + format %{ "addw $dst, $src, LSR $div1" %} + + ins_encode %{ + __ addw(as_Register($dst$$reg), + as_Register($src$$reg), + as_Register($src$$reg), + Assembler::LSR, 31); + %} + ins_pipe(ialu_reg); +%} + +// Long Divide + +instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (DivL src1 src2)); + + ins_cost(INSN_COST * 35); + format %{ "sdiv $dst, $src1, $src2" %} + + ins_encode(aarch64_enc_div(dst, src1, src2)); + ins_pipe(ldiv_reg_reg); +%} + +instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{ + match(Set dst (URShiftL (RShiftL src1 div1) div2)); + ins_cost(INSN_COST); + format %{ "lsr $dst, $src1, $div1" %} + ins_encode %{ + __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63); + %} + ins_pipe(ialu_reg_shift); +%} + +instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{ + match(Set dst (AddL src (URShiftL (RShiftL src div1) div2))); + ins_cost(INSN_COST); + format %{ "add $dst, $src, $div1" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), + as_Register($src$$reg), + as_Register($src$$reg), + Assembler::LSR, 63); + %} + ins_pipe(ialu_reg); +%} + +// Integer Remainder + +instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (ModI src1 src2)); + + ins_cost(INSN_COST * 22); + format %{ "sdivw rscratch1, $src1, $src2\n\t" + "msubw($dst, rscratch1, $src2, $src1" %} + + ins_encode(aarch64_enc_modw(dst, src1, src2)); + ins_pipe(idiv_reg_reg); +%} + +// Long Remainder + +instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (ModL src1 src2)); + + ins_cost(INSN_COST * 38); + format %{ "sdiv rscratch1, $src1, $src2\n" + "msub($dst, rscratch1, $src2, $src1" %} + + ins_encode(aarch64_enc_mod(dst, src1, src2)); + ins_pipe(ldiv_reg_reg); +%} + +// Integer Shifts + +// Shift Left Register +instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (LShiftI src1 src2)); + + ins_cost(INSN_COST * 2); + format %{ "lslvw $dst, $src1, $src2" %} + + ins_encode %{ + __ lslvw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg_vshift); +%} + +// Shift Left Immediate +instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ + match(Set dst (LShiftI src1 src2)); + + ins_cost(INSN_COST); + format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %} + + ins_encode %{ + __ lslw(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Shift Right Logical Register +instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (URShiftI src1 src2)); + + ins_cost(INSN_COST * 2); + format %{ "lsrvw $dst, $src1, $src2" %} + + ins_encode %{ + __ lsrvw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg_vshift); +%} + +// Shift Right Logical Immediate +instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ + match(Set dst (URShiftI src1 src2)); + + ins_cost(INSN_COST); + format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %} + + ins_encode %{ + __ lsrw(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Shift Right Arithmetic Register +instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (RShiftI src1 src2)); + + ins_cost(INSN_COST * 2); + format %{ "asrvw $dst, $src1, $src2" %} + + ins_encode %{ + __ asrvw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg_vshift); +%} + +// Shift Right Arithmetic Immediate +instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ + match(Set dst (RShiftI src1 src2)); + + ins_cost(INSN_COST); + format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %} + + ins_encode %{ + __ asrw(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Combined Int Mask and Right Shift (using UBFM) +// TODO + +// Long Shifts + +// Shift Left Register +instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ + match(Set dst (LShiftL src1 src2)); + + ins_cost(INSN_COST * 2); + format %{ "lslv $dst, $src1, $src2" %} + + ins_encode %{ + __ lslv(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg_vshift); +%} + +// Shift Left Immediate +instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ + match(Set dst (LShiftL src1 src2)); + + ins_cost(INSN_COST); + format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %} + + ins_encode %{ + __ lsl(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Shift Right Logical Register +instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ + match(Set dst (URShiftL src1 src2)); + + ins_cost(INSN_COST * 2); + format %{ "lsrv $dst, $src1, $src2" %} + + ins_encode %{ + __ lsrv(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg_vshift); +%} + +// Shift Right Logical Immediate +instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ + match(Set dst (URShiftL src1 src2)); + + ins_cost(INSN_COST); + format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %} + + ins_encode %{ + __ lsr(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// A special-case pattern for card table stores. +instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ + match(Set dst (URShiftL (CastP2X src1) src2)); + + ins_cost(INSN_COST); + format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %} + + ins_encode %{ + __ lsr(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Shift Right Arithmetic Register +instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ + match(Set dst (RShiftL src1 src2)); + + ins_cost(INSN_COST * 2); + format %{ "asrv $dst, $src1, $src2" %} + + ins_encode %{ + __ asrv(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg_vshift); +%} + +// Shift Right Arithmetic Immediate +instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ + match(Set dst (RShiftL src1 src2)); + + ins_cost(INSN_COST); + format %{ "asr $dst, $src1, ($src2 & 0x3f)" %} + + ins_encode %{ + __ asr(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_shift); +%} + +// BEGIN This section of the file is automatically generated. Do not edit -------------- + +instruct regL_not_reg(iRegLNoSp dst, + iRegL src1, immL_M1 m1, + rFlagsReg cr) %{ + match(Set dst (XorL src1 m1)); + ins_cost(INSN_COST); + format %{ "eon $dst, $src1, zr" %} + + ins_encode %{ + __ eon(as_Register($dst$$reg), + as_Register($src1$$reg), + zr, + Assembler::LSL, 0); + %} + + ins_pipe(ialu_reg); +%} +instruct regI_not_reg(iRegINoSp dst, + iRegIorL2I src1, immI_M1 m1, + rFlagsReg cr) %{ + match(Set dst (XorI src1 m1)); + ins_cost(INSN_COST); + format %{ "eonw $dst, $src1, zr" %} + + ins_encode %{ + __ eonw(as_Register($dst$$reg), + as_Register($src1$$reg), + zr, + Assembler::LSL, 0); + %} + + ins_pipe(ialu_reg); +%} + +instruct AndI_reg_not_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1, + rFlagsReg cr) %{ + match(Set dst (AndI src1 (XorI src2 m1))); + ins_cost(INSN_COST); + format %{ "bicw $dst, $src1, $src2" %} + + ins_encode %{ + __ bicw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, 0); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct AndL_reg_not_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, immL_M1 m1, + rFlagsReg cr) %{ + match(Set dst (AndL src1 (XorL src2 m1))); + ins_cost(INSN_COST); + format %{ "bic $dst, $src1, $src2" %} + + ins_encode %{ + __ bic(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, 0); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct OrI_reg_not_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1, + rFlagsReg cr) %{ + match(Set dst (OrI src1 (XorI src2 m1))); + ins_cost(INSN_COST); + format %{ "ornw $dst, $src1, $src2" %} + + ins_encode %{ + __ ornw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, 0); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct OrL_reg_not_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, immL_M1 m1, + rFlagsReg cr) %{ + match(Set dst (OrL src1 (XorL src2 m1))); + ins_cost(INSN_COST); + format %{ "orn $dst, $src1, $src2" %} + + ins_encode %{ + __ orn(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, 0); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct XorI_reg_not_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1, + rFlagsReg cr) %{ + match(Set dst (XorI m1 (XorI src2 src1))); + ins_cost(INSN_COST); + format %{ "eonw $dst, $src1, $src2" %} + + ins_encode %{ + __ eonw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, 0); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct XorL_reg_not_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, immL_M1 m1, + rFlagsReg cr) %{ + match(Set dst (XorL m1 (XorL src2 src1))); + ins_cost(INSN_COST); + format %{ "eon $dst, $src1, $src2" %} + + ins_encode %{ + __ eon(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, 0); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct AndI_reg_URShift_not_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, immI_M1 src4, rFlagsReg cr) %{ + match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4))); + ins_cost(1.9 * INSN_COST); + format %{ "bicw $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ bicw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AndL_reg_URShift_not_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, immL_M1 src4, rFlagsReg cr) %{ + match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4))); + ins_cost(1.9 * INSN_COST); + format %{ "bic $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ bic(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AndI_reg_RShift_not_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, immI_M1 src4, rFlagsReg cr) %{ + match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4))); + ins_cost(1.9 * INSN_COST); + format %{ "bicw $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ bicw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AndL_reg_RShift_not_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, immL_M1 src4, rFlagsReg cr) %{ + match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4))); + ins_cost(1.9 * INSN_COST); + format %{ "bic $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ bic(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AndI_reg_LShift_not_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, immI_M1 src4, rFlagsReg cr) %{ + match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4))); + ins_cost(1.9 * INSN_COST); + format %{ "bicw $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ bicw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AndL_reg_LShift_not_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, immL_M1 src4, rFlagsReg cr) %{ + match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4))); + ins_cost(1.9 * INSN_COST); + format %{ "bic $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ bic(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct XorI_reg_URShift_not_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, immI_M1 src4, rFlagsReg cr) %{ + match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1))); + ins_cost(1.9 * INSN_COST); + format %{ "eonw $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ eonw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct XorL_reg_URShift_not_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, immL_M1 src4, rFlagsReg cr) %{ + match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1))); + ins_cost(1.9 * INSN_COST); + format %{ "eon $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ eon(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct XorI_reg_RShift_not_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, immI_M1 src4, rFlagsReg cr) %{ + match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1))); + ins_cost(1.9 * INSN_COST); + format %{ "eonw $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ eonw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct XorL_reg_RShift_not_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, immL_M1 src4, rFlagsReg cr) %{ + match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1))); + ins_cost(1.9 * INSN_COST); + format %{ "eon $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ eon(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct XorI_reg_LShift_not_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, immI_M1 src4, rFlagsReg cr) %{ + match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1))); + ins_cost(1.9 * INSN_COST); + format %{ "eonw $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ eonw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct XorL_reg_LShift_not_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, immL_M1 src4, rFlagsReg cr) %{ + match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1))); + ins_cost(1.9 * INSN_COST); + format %{ "eon $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ eon(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct OrI_reg_URShift_not_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, immI_M1 src4, rFlagsReg cr) %{ + match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4))); + ins_cost(1.9 * INSN_COST); + format %{ "ornw $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ ornw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct OrL_reg_URShift_not_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, immL_M1 src4, rFlagsReg cr) %{ + match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4))); + ins_cost(1.9 * INSN_COST); + format %{ "orn $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ orn(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct OrI_reg_RShift_not_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, immI_M1 src4, rFlagsReg cr) %{ + match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4))); + ins_cost(1.9 * INSN_COST); + format %{ "ornw $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ ornw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct OrL_reg_RShift_not_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, immL_M1 src4, rFlagsReg cr) %{ + match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4))); + ins_cost(1.9 * INSN_COST); + format %{ "orn $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ orn(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct OrI_reg_LShift_not_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, immI_M1 src4, rFlagsReg cr) %{ + match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4))); + ins_cost(1.9 * INSN_COST); + format %{ "ornw $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ ornw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct OrL_reg_LShift_not_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, immL_M1 src4, rFlagsReg cr) %{ + match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4))); + ins_cost(1.9 * INSN_COST); + format %{ "orn $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ orn(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AndI_reg_URShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (AndI src1 (URShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "andw $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ andw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AndL_reg_URShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (AndL src1 (URShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "andr $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ andr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AndI_reg_RShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (AndI src1 (RShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "andw $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ andw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AndL_reg_RShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (AndL src1 (RShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "andr $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ andr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AndI_reg_LShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (AndI src1 (LShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "andw $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ andw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AndL_reg_LShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (AndL src1 (LShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "andr $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ andr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct XorI_reg_URShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (XorI src1 (URShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "eorw $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ eorw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct XorL_reg_URShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (XorL src1 (URShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "eor $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ eor(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct XorI_reg_RShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (XorI src1 (RShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "eorw $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ eorw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct XorL_reg_RShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (XorL src1 (RShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "eor $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ eor(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct XorI_reg_LShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (XorI src1 (LShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "eorw $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ eorw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct XorL_reg_LShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (XorL src1 (LShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "eor $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ eor(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct OrI_reg_URShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (OrI src1 (URShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "orrw $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ orrw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct OrL_reg_URShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (OrL src1 (URShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "orr $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ orr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct OrI_reg_RShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (OrI src1 (RShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "orrw $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ orrw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct OrL_reg_RShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (OrL src1 (RShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "orr $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ orr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct OrI_reg_LShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (OrI src1 (LShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "orrw $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ orrw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct OrL_reg_LShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (OrL src1 (LShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "orr $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ orr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AddI_reg_URShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (AddI src1 (URShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "addw $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ addw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AddL_reg_URShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (AddL src1 (URShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "add $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AddI_reg_RShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (AddI src1 (RShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "addw $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ addw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AddL_reg_RShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (AddL src1 (RShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "add $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AddI_reg_LShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (AddI src1 (LShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "addw $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ addw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct AddL_reg_LShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (AddL src1 (LShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "add $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct SubI_reg_URShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (SubI src1 (URShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "subw $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ subw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct SubL_reg_URShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (SubL src1 (URShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "sub $dst, $src1, $src2, LSR $src3" %} + + ins_encode %{ + __ sub(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct SubI_reg_RShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (SubI src1 (RShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "subw $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ subw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct SubL_reg_RShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (SubL src1 (RShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "sub $dst, $src1, $src2, ASR $src3" %} + + ins_encode %{ + __ sub(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::ASR, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct SubI_reg_LShift_reg(iRegINoSp dst, + iRegIorL2I src1, iRegIorL2I src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (SubI src1 (LShiftI src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "subw $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ subw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + +instruct SubL_reg_LShift_reg(iRegLNoSp dst, + iRegL src1, iRegL src2, + immI src3, rFlagsReg cr) %{ + match(Set dst (SubL src1 (LShiftL src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "sub $dst, $src1, $src2, LSL $src3" %} + + ins_encode %{ + __ sub(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, + $src3$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_reg_shift); +%} + + + +// Shift Left followed by Shift Right. +// This idiom is used by the compiler for the i2b bytecode etc. +instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count) +%{ + match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count)); + // Make sure we are not going to exceed what sbfm can do. + predicate((unsigned int)n->in(2)->get_int() <= 63 + && (unsigned int)n->in(1)->in(2)->get_int() <= 63); + + ins_cost(INSN_COST * 2); + format %{ "sbfm $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %} + ins_encode %{ + int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant; + int s = 63 - lshift; + int r = (rshift - lshift) & 63; + __ sbfm(as_Register($dst$$reg), + as_Register($src$$reg), + r, s); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Shift Left followed by Shift Right. +// This idiom is used by the compiler for the i2b bytecode etc. +instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count) +%{ + match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count)); + // Make sure we are not going to exceed what sbfmw can do. + predicate((unsigned int)n->in(2)->get_int() <= 31 + && (unsigned int)n->in(1)->in(2)->get_int() <= 31); + + ins_cost(INSN_COST * 2); + format %{ "sbfmw $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %} + ins_encode %{ + int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant; + int s = 31 - lshift; + int r = (rshift - lshift) & 31; + __ sbfmw(as_Register($dst$$reg), + as_Register($src$$reg), + r, s); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Shift Left followed by Shift Right. +// This idiom is used by the compiler for the i2b bytecode etc. +instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count) +%{ + match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count)); + // Make sure we are not going to exceed what ubfm can do. + predicate((unsigned int)n->in(2)->get_int() <= 63 + && (unsigned int)n->in(1)->in(2)->get_int() <= 63); + + ins_cost(INSN_COST * 2); + format %{ "ubfm $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %} + ins_encode %{ + int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant; + int s = 63 - lshift; + int r = (rshift - lshift) & 63; + __ ubfm(as_Register($dst$$reg), + as_Register($src$$reg), + r, s); + %} + + ins_pipe(ialu_reg_shift); +%} + +// Shift Left followed by Shift Right. +// This idiom is used by the compiler for the i2b bytecode etc. +instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count) +%{ + match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count)); + // Make sure we are not going to exceed what ubfmw can do. + predicate((unsigned int)n->in(2)->get_int() <= 31 + && (unsigned int)n->in(1)->in(2)->get_int() <= 31); + + ins_cost(INSN_COST * 2); + format %{ "ubfmw $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %} + ins_encode %{ + int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant; + int s = 31 - lshift; + int r = (rshift - lshift) & 31; + __ ubfmw(as_Register($dst$$reg), + as_Register($src$$reg), + r, s); + %} + + ins_pipe(ialu_reg_shift); +%} +// Bitfield extract with shift & mask + +instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask) +%{ + match(Set dst (AndI (URShiftI src rshift) mask)); + // Make sure we are not going to exceed what ubfxw can do. + predicate((exact_log2(n->in(2)->get_int() + 1) + (n->in(1)->in(2)->get_int() & 31)) <= (31 + 1)); + + ins_cost(INSN_COST); + format %{ "ubfxw $dst, $src, $mask" %} + ins_encode %{ + int rshift = $rshift$$constant & 31; + long mask = $mask$$constant; + int width = exact_log2(mask+1); + __ ubfxw(as_Register($dst$$reg), + as_Register($src$$reg), rshift, width); + %} + ins_pipe(ialu_reg_shift); +%} +instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask) +%{ + match(Set dst (AndL (URShiftL src rshift) mask)); + // Make sure we are not going to exceed what ubfx can do. + predicate((exact_log2_long(n->in(2)->get_long() + 1) + (n->in(1)->in(2)->get_int() & 63)) <= (63 + 1)); + + ins_cost(INSN_COST); + format %{ "ubfx $dst, $src, $mask" %} + ins_encode %{ + int rshift = $rshift$$constant & 63; + long mask = $mask$$constant; + int width = exact_log2_long(mask+1); + __ ubfx(as_Register($dst$$reg), + as_Register($src$$reg), rshift, width); + %} + ins_pipe(ialu_reg_shift); +%} + +// We can use ubfx when extending an And with a mask when we know mask +// is positive. We know that because immI_bitmask guarantees it. +instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask) +%{ + match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask))); + // Make sure we are not going to exceed what ubfxw can do. + predicate((exact_log2(n->in(1)->in(2)->get_int() + 1) + (n->in(1)->in(1)->in(2)->get_int() & 31)) <= (31 + 1)); + + ins_cost(INSN_COST * 2); + format %{ "ubfx $dst, $src, $mask" %} + ins_encode %{ + int rshift = $rshift$$constant & 31; + long mask = $mask$$constant; + int width = exact_log2(mask+1); + __ ubfx(as_Register($dst$$reg), + as_Register($src$$reg), rshift, width); + %} + ins_pipe(ialu_reg_shift); +%} + +// Rotations + +instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr) +%{ + match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift))); + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63)); + + ins_cost(INSN_COST); + format %{ "extr $dst, $src1, $src2, #$rshift" %} + + ins_encode %{ + __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), + $rshift$$constant & 63); + %} + ins_pipe(ialu_reg_reg_extr); +%} + +instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr) +%{ + match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift))); + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31)); + + ins_cost(INSN_COST); + format %{ "extr $dst, $src1, $src2, #$rshift" %} + + ins_encode %{ + __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), + $rshift$$constant & 31); + %} + ins_pipe(ialu_reg_reg_extr); +%} + +instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr) +%{ + match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift))); + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63)); + + ins_cost(INSN_COST); + format %{ "extr $dst, $src1, $src2, #$rshift" %} + + ins_encode %{ + __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), + $rshift$$constant & 63); + %} + ins_pipe(ialu_reg_reg_extr); +%} + +instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr) +%{ + match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift))); + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31)); + + ins_cost(INSN_COST); + format %{ "extr $dst, $src1, $src2, #$rshift" %} + + ins_encode %{ + __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), + $rshift$$constant & 31); + %} + ins_pipe(ialu_reg_reg_extr); +%} + + +// rol expander + +instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr) +%{ + effect(DEF dst, USE src, USE shift); + + format %{ "rol $dst, $src, $shift" %} + ins_cost(INSN_COST * 3); + ins_encode %{ + __ subw(rscratch1, zr, as_Register($shift$$reg)); + __ rorv(as_Register($dst$$reg), as_Register($src$$reg), + rscratch1); + %} + ins_pipe(ialu_reg_reg_vshift); +%} + +// rol expander + +instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr) +%{ + effect(DEF dst, USE src, USE shift); + + format %{ "rol $dst, $src, $shift" %} + ins_cost(INSN_COST * 3); + ins_encode %{ + __ subw(rscratch1, zr, as_Register($shift$$reg)); + __ rorvw(as_Register($dst$$reg), as_Register($src$$reg), + rscratch1); + %} + ins_pipe(ialu_reg_reg_vshift); +%} + +instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr) +%{ + match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift)))); + + expand %{ + rolL_rReg(dst, src, shift, cr); + %} +%} + +instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) +%{ + match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift)))); + + expand %{ + rolL_rReg(dst, src, shift, cr); + %} +%} + +instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr) +%{ + match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift)))); + + expand %{ + rolI_rReg(dst, src, shift, cr); + %} +%} + +instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr) +%{ + match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift)))); + + expand %{ + rolI_rReg(dst, src, shift, cr); + %} +%} + +// ror expander + +instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr) +%{ + effect(DEF dst, USE src, USE shift); + + format %{ "ror $dst, $src, $shift" %} + ins_cost(INSN_COST); + ins_encode %{ + __ rorv(as_Register($dst$$reg), as_Register($src$$reg), + as_Register($shift$$reg)); + %} + ins_pipe(ialu_reg_reg_vshift); +%} + +// ror expander + +instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr) +%{ + effect(DEF dst, USE src, USE shift); + + format %{ "ror $dst, $src, $shift" %} + ins_cost(INSN_COST); + ins_encode %{ + __ rorvw(as_Register($dst$$reg), as_Register($src$$reg), + as_Register($shift$$reg)); + %} + ins_pipe(ialu_reg_reg_vshift); +%} + +instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr) +%{ + match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift)))); + + expand %{ + rorL_rReg(dst, src, shift, cr); + %} +%} + +instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) +%{ + match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift)))); + + expand %{ + rorL_rReg(dst, src, shift, cr); + %} +%} + +instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr) +%{ + match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift)))); + + expand %{ + rorI_rReg(dst, src, shift, cr); + %} +%} + +instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr) +%{ + match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift)))); + + expand %{ + rorI_rReg(dst, src, shift, cr); + %} +%} + +// Add/subtract (extended) + +instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr) +%{ + match(Set dst (AddL src1 (ConvI2L src2))); + ins_cost(INSN_COST); + format %{ "add $dst, $src1, sxtw $src2" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::sxtw); + %} + ins_pipe(ialu_reg_reg); +%}; + +instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr) +%{ + match(Set dst (SubL src1 (ConvI2L src2))); + ins_cost(INSN_COST); + format %{ "sub $dst, $src1, sxtw $src2" %} + + ins_encode %{ + __ sub(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::sxtw); + %} + ins_pipe(ialu_reg_reg); +%}; + + +instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr) +%{ + match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift))); + ins_cost(INSN_COST); + format %{ "add $dst, $src1, sxth $src2" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::sxth); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr) +%{ + match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift))); + ins_cost(INSN_COST); + format %{ "add $dst, $src1, sxtb $src2" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::sxtb); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr) +%{ + match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift))); + ins_cost(INSN_COST); + format %{ "add $dst, $src1, uxtb $src2" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::uxtb); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr) +%{ + match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift))); + ins_cost(INSN_COST); + format %{ "add $dst, $src1, sxth $src2" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::sxth); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr) +%{ + match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift))); + ins_cost(INSN_COST); + format %{ "add $dst, $src1, sxtw $src2" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::sxtw); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr) +%{ + match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift))); + ins_cost(INSN_COST); + format %{ "add $dst, $src1, sxtb $src2" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::sxtb); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr) +%{ + match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift))); + ins_cost(INSN_COST); + format %{ "add $dst, $src1, uxtb $src2" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::uxtb); + %} + ins_pipe(ialu_reg_reg); +%} + + +instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr) +%{ + match(Set dst (AddI src1 (AndI src2 mask))); + ins_cost(INSN_COST); + format %{ "addw $dst, $src1, $src2, uxtb" %} + + ins_encode %{ + __ addw(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::uxtb); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr) +%{ + match(Set dst (AddI src1 (AndI src2 mask))); + ins_cost(INSN_COST); + format %{ "addw $dst, $src1, $src2, uxth" %} + + ins_encode %{ + __ addw(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::uxth); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr) +%{ + match(Set dst (AddL src1 (AndL src2 mask))); + ins_cost(INSN_COST); + format %{ "add $dst, $src1, $src2, uxtb" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::uxtb); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr) +%{ + match(Set dst (AddL src1 (AndL src2 mask))); + ins_cost(INSN_COST); + format %{ "add $dst, $src1, $src2, uxth" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::uxth); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr) +%{ + match(Set dst (AddL src1 (AndL src2 mask))); + ins_cost(INSN_COST); + format %{ "add $dst, $src1, $src2, uxtw" %} + + ins_encode %{ + __ add(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::uxtw); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr) +%{ + match(Set dst (SubI src1 (AndI src2 mask))); + ins_cost(INSN_COST); + format %{ "subw $dst, $src1, $src2, uxtb" %} + + ins_encode %{ + __ subw(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::uxtb); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr) +%{ + match(Set dst (SubI src1 (AndI src2 mask))); + ins_cost(INSN_COST); + format %{ "subw $dst, $src1, $src2, uxth" %} + + ins_encode %{ + __ subw(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::uxth); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr) +%{ + match(Set dst (SubL src1 (AndL src2 mask))); + ins_cost(INSN_COST); + format %{ "sub $dst, $src1, $src2, uxtb" %} + + ins_encode %{ + __ sub(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::uxtb); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr) +%{ + match(Set dst (SubL src1 (AndL src2 mask))); + ins_cost(INSN_COST); + format %{ "sub $dst, $src1, $src2, uxth" %} + + ins_encode %{ + __ sub(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::uxth); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr) +%{ + match(Set dst (SubL src1 (AndL src2 mask))); + ins_cost(INSN_COST); + format %{ "sub $dst, $src1, $src2, uxtw" %} + + ins_encode %{ + __ sub(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::uxtw); + %} + ins_pipe(ialu_reg_reg); +%} + +// END This section of the file is automatically generated. Do not edit -------------- + +// ============================================================================ +// Floating Point Arithmetic Instructions + +instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ + match(Set dst (AddF src1 src2)); + + ins_cost(INSN_COST * 5); + format %{ "fadds $dst, $src1, $src2" %} + + ins_encode %{ + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_dop_reg_reg_s); +%} + +instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ + match(Set dst (AddD src1 src2)); + + ins_cost(INSN_COST * 5); + format %{ "faddd $dst, $src1, $src2" %} + + ins_encode %{ + __ faddd(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_dop_reg_reg_d); +%} + +instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ + match(Set dst (SubF src1 src2)); + + ins_cost(INSN_COST * 5); + format %{ "fsubs $dst, $src1, $src2" %} + + ins_encode %{ + __ fsubs(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_dop_reg_reg_s); +%} + +instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ + match(Set dst (SubD src1 src2)); + + ins_cost(INSN_COST * 5); + format %{ "fsubd $dst, $src1, $src2" %} + + ins_encode %{ + __ fsubd(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_dop_reg_reg_d); +%} + +instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ + match(Set dst (MulF src1 src2)); + + ins_cost(INSN_COST * 6); + format %{ "fmuls $dst, $src1, $src2" %} + + ins_encode %{ + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_dop_reg_reg_s); +%} + +instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ + match(Set dst (MulD src1 src2)); + + ins_cost(INSN_COST * 6); + format %{ "fmuld $dst, $src1, $src2" %} + + ins_encode %{ + __ fmuld(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_dop_reg_reg_d); +%} + +// We cannot use these fused mul w add/sub ops because they don't +// produce the same result as the equivalent separated ops +// (essentially they don't round the intermediate result). that's a +// shame. leaving them here in case we can idenitfy cases where it is +// legitimate to use them + + +// instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ +// match(Set dst (AddF (MulF src1 src2) src3)); + +// format %{ "fmadds $dst, $src1, $src2, $src3" %} + +// ins_encode %{ +// __ fmadds(as_FloatRegister($dst$$reg), +// as_FloatRegister($src1$$reg), +// as_FloatRegister($src2$$reg), +// as_FloatRegister($src3$$reg)); +// %} + +// ins_pipe(pipe_class_default); +// %} + +// instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{ +// match(Set dst (AddD (MulD src1 src2) src3)); + +// format %{ "fmaddd $dst, $src1, $src2, $src3" %} + +// ins_encode %{ +// __ fmaddd(as_FloatRegister($dst$$reg), +// as_FloatRegister($src1$$reg), +// as_FloatRegister($src2$$reg), +// as_FloatRegister($src3$$reg)); +// %} + +// ins_pipe(pipe_class_default); +// %} + +// instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ +// match(Set dst (AddF (MulF (NegF src1) src2) src3)); +// match(Set dst (AddF (NegF (MulF src1 src2)) src3)); + +// format %{ "fmsubs $dst, $src1, $src2, $src3" %} + +// ins_encode %{ +// __ fmsubs(as_FloatRegister($dst$$reg), +// as_FloatRegister($src1$$reg), +// as_FloatRegister($src2$$reg), +// as_FloatRegister($src3$$reg)); +// %} + +// ins_pipe(pipe_class_default); +// %} + +// instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{ +// match(Set dst (AddD (MulD (NegD src1) src2) src3)); +// match(Set dst (AddD (NegD (MulD src1 src2)) src3)); + +// format %{ "fmsubd $dst, $src1, $src2, $src3" %} + +// ins_encode %{ +// __ fmsubd(as_FloatRegister($dst$$reg), +// as_FloatRegister($src1$$reg), +// as_FloatRegister($src2$$reg), +// as_FloatRegister($src3$$reg)); +// %} + +// ins_pipe(pipe_class_default); +// %} + +// instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ +// match(Set dst (SubF (MulF (NegF src1) src2) src3)); +// match(Set dst (SubF (NegF (MulF src1 src2)) src3)); + +// format %{ "fnmadds $dst, $src1, $src2, $src3" %} + +// ins_encode %{ +// __ fnmadds(as_FloatRegister($dst$$reg), +// as_FloatRegister($src1$$reg), +// as_FloatRegister($src2$$reg), +// as_FloatRegister($src3$$reg)); +// %} + +// ins_pipe(pipe_class_default); +// %} + +// instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{ +// match(Set dst (SubD (MulD (NegD src1) src2) src3)); +// match(Set dst (SubD (NegD (MulD src1 src2)) src3)); + +// format %{ "fnmaddd $dst, $src1, $src2, $src3" %} + +// ins_encode %{ +// __ fnmaddd(as_FloatRegister($dst$$reg), +// as_FloatRegister($src1$$reg), +// as_FloatRegister($src2$$reg), +// as_FloatRegister($src3$$reg)); +// %} + +// ins_pipe(pipe_class_default); +// %} + +// instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{ +// match(Set dst (SubF (MulF src1 src2) src3)); + +// format %{ "fnmsubs $dst, $src1, $src2, $src3" %} + +// ins_encode %{ +// __ fnmsubs(as_FloatRegister($dst$$reg), +// as_FloatRegister($src1$$reg), +// as_FloatRegister($src2$$reg), +// as_FloatRegister($src3$$reg)); +// %} + +// ins_pipe(pipe_class_default); +// %} + +// instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{ +// match(Set dst (SubD (MulD src1 src2) src3)); + +// format %{ "fnmsubd $dst, $src1, $src2, $src3" %} + +// ins_encode %{ +// // n.b. insn name should be fnmsubd +// __ fnmsub(as_FloatRegister($dst$$reg), +// as_FloatRegister($src1$$reg), +// as_FloatRegister($src2$$reg), +// as_FloatRegister($src3$$reg)); +// %} + +// ins_pipe(pipe_class_default); +// %} + + +instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ + match(Set dst (DivF src1 src2)); + + ins_cost(INSN_COST * 18); + format %{ "fdivs $dst, $src1, $src2" %} + + ins_encode %{ + __ fdivs(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_div_s); +%} + +instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ + match(Set dst (DivD src1 src2)); + + ins_cost(INSN_COST * 32); + format %{ "fdivd $dst, $src1, $src2" %} + + ins_encode %{ + __ fdivd(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_div_d); +%} + +instruct negF_reg_reg(vRegF dst, vRegF src) %{ + match(Set dst (NegF src)); + + ins_cost(INSN_COST * 3); + format %{ "fneg $dst, $src" %} + + ins_encode %{ + __ fnegs(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_uop_s); +%} + +instruct negD_reg_reg(vRegD dst, vRegD src) %{ + match(Set dst (NegD src)); + + ins_cost(INSN_COST * 3); + format %{ "fnegd $dst, $src" %} + + ins_encode %{ + __ fnegd(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_uop_d); +%} + +instruct absF_reg(vRegF dst, vRegF src) %{ + match(Set dst (AbsF src)); + + ins_cost(INSN_COST * 3); + format %{ "fabss $dst, $src" %} + ins_encode %{ + __ fabss(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_uop_s); +%} + +instruct absD_reg(vRegD dst, vRegD src) %{ + match(Set dst (AbsD src)); + + ins_cost(INSN_COST * 3); + format %{ "fabsd $dst, $src" %} + ins_encode %{ + __ fabsd(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_uop_d); +%} + +instruct sqrtD_reg(vRegD dst, vRegD src) %{ + match(Set dst (SqrtD src)); + + ins_cost(INSN_COST * 50); + format %{ "fsqrtd $dst, $src" %} + ins_encode %{ + __ fsqrtd(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_div_s); +%} + +instruct sqrtF_reg(vRegF dst, vRegF src) %{ + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + + ins_cost(INSN_COST * 50); + format %{ "fsqrts $dst, $src" %} + ins_encode %{ + __ fsqrts(as_FloatRegister($dst$$reg), + as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_div_d); +%} + +// ============================================================================ +// Logical Instructions + +// Integer Logical Instructions + +// And Instructions + + +instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{ + match(Set dst (AndI src1 src2)); + + format %{ "andw $dst, $src1, $src2\t# int" %} + + ins_cost(INSN_COST); + ins_encode %{ + __ andw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{ + match(Set dst (AndI src1 src2)); + + format %{ "andsw $dst, $src1, $src2\t# int" %} + + ins_cost(INSN_COST); + ins_encode %{ + __ andw(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned long)($src2$$constant)); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Or Instructions + +instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (OrI src1 src2)); + + format %{ "orrw $dst, $src1, $src2\t# int" %} + + ins_cost(INSN_COST); + ins_encode %{ + __ orrw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{ + match(Set dst (OrI src1 src2)); + + format %{ "orrw $dst, $src1, $src2\t# int" %} + + ins_cost(INSN_COST); + ins_encode %{ + __ orrw(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned long)($src2$$constant)); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Xor Instructions + +instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ + match(Set dst (XorI src1 src2)); + + format %{ "eorw $dst, $src1, $src2\t# int" %} + + ins_cost(INSN_COST); + ins_encode %{ + __ eorw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{ + match(Set dst (XorI src1 src2)); + + format %{ "eorw $dst, $src1, $src2\t# int" %} + + ins_cost(INSN_COST); + ins_encode %{ + __ eorw(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned long)($src2$$constant)); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Long Logical Instructions +// TODO + +instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{ + match(Set dst (AndL src1 src2)); + + format %{ "and $dst, $src1, $src2\t# int" %} + + ins_cost(INSN_COST); + ins_encode %{ + __ andr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{ + match(Set dst (AndL src1 src2)); + + format %{ "and $dst, $src1, $src2\t# int" %} + + ins_cost(INSN_COST); + ins_encode %{ + __ andr(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned long)($src2$$constant)); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Or Instructions + +instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (OrL src1 src2)); + + format %{ "orr $dst, $src1, $src2\t# int" %} + + ins_cost(INSN_COST); + ins_encode %{ + __ orr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{ + match(Set dst (OrL src1 src2)); + + format %{ "orr $dst, $src1, $src2\t# int" %} + + ins_cost(INSN_COST); + ins_encode %{ + __ orr(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned long)($src2$$constant)); + %} + + ins_pipe(ialu_reg_imm); +%} + +// Xor Instructions + +instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ + match(Set dst (XorL src1 src2)); + + format %{ "eor $dst, $src1, $src2\t# int" %} + + ins_cost(INSN_COST); + ins_encode %{ + __ eor(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{ + match(Set dst (XorL src1 src2)); + + ins_cost(INSN_COST); + format %{ "eor $dst, $src1, $src2\t# int" %} + + ins_encode %{ + __ eor(as_Register($dst$$reg), + as_Register($src1$$reg), + (unsigned long)($src2$$constant)); + %} + + ins_pipe(ialu_reg_imm); +%} + +instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src) +%{ + match(Set dst (ConvI2L src)); + + ins_cost(INSN_COST); + format %{ "sxtw $dst, $src\t# i2l" %} + ins_encode %{ + __ sbfm($dst$$Register, $src$$Register, 0, 31); + %} + ins_pipe(ialu_reg_shift); +%} + +// this pattern occurs in bigmath arithmetic +instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) +%{ + match(Set dst (AndL (ConvI2L src) mask)); + + ins_cost(INSN_COST); + format %{ "ubfm $dst, $src, 0, 31\t# ui2l" %} + ins_encode %{ + __ ubfm($dst$$Register, $src$$Register, 0, 31); + %} + + ins_pipe(ialu_reg_shift); +%} + +instruct convL2I_reg(iRegINoSp dst, iRegL src) %{ + match(Set dst (ConvL2I src)); + + ins_cost(INSN_COST); + format %{ "movw $dst, $src \t// l2i" %} + + ins_encode %{ + __ movw(as_Register($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(ialu_reg); +%} + +instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) +%{ + match(Set dst (Conv2B src)); + effect(KILL cr); + + format %{ + "cmpw $src, zr\n\t" + "cset $dst, ne" + %} + + ins_encode %{ + __ cmpw(as_Register($src$$reg), zr); + __ cset(as_Register($dst$$reg), Assembler::NE); + %} + + ins_pipe(ialu_reg); +%} + +instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr) +%{ + match(Set dst (Conv2B src)); + effect(KILL cr); + + format %{ + "cmp $src, zr\n\t" + "cset $dst, ne" + %} + + ins_encode %{ + __ cmp(as_Register($src$$reg), zr); + __ cset(as_Register($dst$$reg), Assembler::NE); + %} + + ins_pipe(ialu_reg); +%} + +instruct convD2F_reg(vRegF dst, vRegD src) %{ + match(Set dst (ConvD2F src)); + + ins_cost(INSN_COST * 5); + format %{ "fcvtd $dst, $src \t// d2f" %} + + ins_encode %{ + __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_d2f); +%} + +instruct convF2D_reg(vRegD dst, vRegF src) %{ + match(Set dst (ConvF2D src)); + + ins_cost(INSN_COST * 5); + format %{ "fcvts $dst, $src \t// f2d" %} + + ins_encode %{ + __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_f2d); +%} + +instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{ + match(Set dst (ConvF2I src)); + + ins_cost(INSN_COST * 5); + format %{ "fcvtzsw $dst, $src \t// f2i" %} + + ins_encode %{ + __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_f2i); +%} + +instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{ + match(Set dst (ConvF2L src)); + + ins_cost(INSN_COST * 5); + format %{ "fcvtzs $dst, $src \t// f2l" %} + + ins_encode %{ + __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_f2l); +%} + +instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{ + match(Set dst (ConvI2F src)); + + ins_cost(INSN_COST * 5); + format %{ "scvtfws $dst, $src \t// i2f" %} + + ins_encode %{ + __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(fp_i2f); +%} + +instruct convL2F_reg_reg(vRegF dst, iRegL src) %{ + match(Set dst (ConvL2F src)); + + ins_cost(INSN_COST * 5); + format %{ "scvtfs $dst, $src \t// l2f" %} + + ins_encode %{ + __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(fp_l2f); +%} + +instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{ + match(Set dst (ConvD2I src)); + + ins_cost(INSN_COST * 5); + format %{ "fcvtzdw $dst, $src \t// d2i" %} + + ins_encode %{ + __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_d2i); +%} + +instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{ + match(Set dst (ConvD2L src)); + + ins_cost(INSN_COST * 5); + format %{ "fcvtzd $dst, $src \t// d2l" %} + + ins_encode %{ + __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_d2l); +%} + +instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{ + match(Set dst (ConvI2D src)); + + ins_cost(INSN_COST * 5); + format %{ "scvtfwd $dst, $src \t// i2d" %} + + ins_encode %{ + __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(fp_i2d); +%} + +instruct convL2D_reg_reg(vRegD dst, iRegL src) %{ + match(Set dst (ConvL2D src)); + + ins_cost(INSN_COST * 5); + format %{ "scvtfd $dst, $src \t// l2d" %} + + ins_encode %{ + __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + + ins_pipe(fp_l2d); +%} + +// stack <-> reg and reg <-> reg shuffles with no conversion + +instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{ + + match(Set dst (MoveF2I src)); + + effect(DEF dst, USE src); + + ins_cost(4 * INSN_COST); + + format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %} + + ins_encode %{ + __ ldrw($dst$$Register, Address(sp, $src$$disp)); + %} + + ins_pipe(iload_reg_reg); + +%} + +instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{ + + match(Set dst (MoveI2F src)); + + effect(DEF dst, USE src); + + ins_cost(4 * INSN_COST); + + format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %} + + ins_encode %{ + __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); + %} + + ins_pipe(pipe_class_memory); + +%} + +instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{ + + match(Set dst (MoveD2L src)); + + effect(DEF dst, USE src); + + ins_cost(4 * INSN_COST); + + format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %} + + ins_encode %{ + __ ldr($dst$$Register, Address(sp, $src$$disp)); + %} + + ins_pipe(iload_reg_reg); + +%} + +instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{ + + match(Set dst (MoveL2D src)); + + effect(DEF dst, USE src); + + ins_cost(4 * INSN_COST); + + format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %} + + ins_encode %{ + __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); + %} + + ins_pipe(pipe_class_memory); + +%} + +instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{ + + match(Set dst (MoveF2I src)); + + effect(DEF dst, USE src); + + ins_cost(INSN_COST); + + format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %} + + ins_encode %{ + __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); + %} + + ins_pipe(pipe_class_memory); + +%} + +instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{ + + match(Set dst (MoveI2F src)); + + effect(DEF dst, USE src); + + ins_cost(INSN_COST); + + format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %} + + ins_encode %{ + __ strw($src$$Register, Address(sp, $dst$$disp)); + %} + + ins_pipe(istore_reg_reg); + +%} + +instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{ + + match(Set dst (MoveD2L src)); + + effect(DEF dst, USE src); + + ins_cost(INSN_COST); + + format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %} + + ins_encode %{ + __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); + %} + + ins_pipe(pipe_class_memory); + +%} + +instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{ + + match(Set dst (MoveL2D src)); + + effect(DEF dst, USE src); + + ins_cost(INSN_COST); + + format %{ "str $src, $dst\t# MoveL2D_reg_stack" %} + + ins_encode %{ + __ str($src$$Register, Address(sp, $dst$$disp)); + %} + + ins_pipe(istore_reg_reg); + +%} + +instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{ + + match(Set dst (MoveF2I src)); + + effect(DEF dst, USE src); + + ins_cost(INSN_COST); + + format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %} + + ins_encode %{ + __ fmovs($dst$$Register, as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_f2i); + +%} + +instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{ + + match(Set dst (MoveI2F src)); + + effect(DEF dst, USE src); + + ins_cost(INSN_COST); + + format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %} + + ins_encode %{ + __ fmovs(as_FloatRegister($dst$$reg), $src$$Register); + %} + + ins_pipe(fp_i2f); + +%} + +instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{ + + match(Set dst (MoveD2L src)); + + effect(DEF dst, USE src); + + ins_cost(INSN_COST); + + format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %} + + ins_encode %{ + __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); + %} + + ins_pipe(fp_d2l); + +%} + +instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{ + + match(Set dst (MoveL2D src)); + + effect(DEF dst, USE src); + + ins_cost(INSN_COST); + + format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %} + + ins_encode %{ + __ fmovd(as_FloatRegister($dst$$reg), $src$$Register); + %} + + ins_pipe(fp_l2d); + +%} + +// ============================================================================ +// clearing of an array + +instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr) +%{ + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL cnt, USE_KILL base, KILL cr); + + ins_cost(4 * INSN_COST); + format %{ "ClearArray $cnt, $base" %} + + ins_encode %{ + __ zero_words($base$$Register, $cnt$$Register); + %} + + ins_pipe(pipe_class_memory); +%} + +instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr) +%{ + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL base, TEMP tmp, KILL cr); + + ins_cost(4 * INSN_COST); + format %{ "ClearArray $cnt, $base" %} + + ins_encode %{ + __ zero_words($base$$Register, (u_int64_t)$cnt$$constant); + %} + + ins_pipe(pipe_class_memory); +%} + +// ============================================================================ +// Overflow Math Instructions + +instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2) +%{ + match(Set cr (OverflowAddI op1 op2)); + + format %{ "cmnw $op1, $op2\t# overflow check int" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmnw($op1$$Register, $op2$$Register); + %} + + ins_pipe(icmp_reg_reg); +%} + +instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2) +%{ + match(Set cr (OverflowAddI op1 op2)); + + format %{ "cmnw $op1, $op2\t# overflow check int" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmnw($op1$$Register, $op2$$constant); + %} + + ins_pipe(icmp_reg_imm); +%} + +instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2) +%{ + match(Set cr (OverflowAddL op1 op2)); + + format %{ "cmn $op1, $op2\t# overflow check long" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmn($op1$$Register, $op2$$Register); + %} + + ins_pipe(icmp_reg_reg); +%} + +instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2) +%{ + match(Set cr (OverflowAddL op1 op2)); + + format %{ "cmn $op1, $op2\t# overflow check long" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmn($op1$$Register, $op2$$constant); + %} + + ins_pipe(icmp_reg_imm); +%} + +instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2) +%{ + match(Set cr (OverflowSubI op1 op2)); + + format %{ "cmpw $op1, $op2\t# overflow check int" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmpw($op1$$Register, $op2$$Register); + %} + + ins_pipe(icmp_reg_reg); +%} + +instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2) +%{ + match(Set cr (OverflowSubI op1 op2)); + + format %{ "cmpw $op1, $op2\t# overflow check int" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmpw($op1$$Register, $op2$$constant); + %} + + ins_pipe(icmp_reg_imm); +%} + +instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2) +%{ + match(Set cr (OverflowSubL op1 op2)); + + format %{ "cmp $op1, $op2\t# overflow check long" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmp($op1$$Register, $op2$$Register); + %} + + ins_pipe(icmp_reg_reg); +%} + +instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2) +%{ + match(Set cr (OverflowSubL op1 op2)); + + format %{ "cmp $op1, $op2\t# overflow check long" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmp($op1$$Register, $op2$$constant); + %} + + ins_pipe(icmp_reg_imm); +%} + +instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1) +%{ + match(Set cr (OverflowSubI zero op1)); + + format %{ "cmpw zr, $op1\t# overflow check int" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmpw(zr, $op1$$Register); + %} + + ins_pipe(icmp_reg_imm); +%} + +instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1) +%{ + match(Set cr (OverflowSubL zero op1)); + + format %{ "cmp zr, $op1\t# overflow check long" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmp(zr, $op1$$Register); + %} + + ins_pipe(icmp_reg_imm); +%} + +instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2) +%{ + match(Set cr (OverflowMulI op1 op2)); + + format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t" + "cmp rscratch1, rscratch1, sxtw\n\t" + "movw rscratch1, #0x80000000\n\t" + "cselw rscratch1, rscratch1, zr, NE\n\t" + "cmpw rscratch1, #1" %} + ins_cost(5 * INSN_COST); + ins_encode %{ + __ smull(rscratch1, $op1$$Register, $op2$$Register); + __ subs(zr, rscratch1, rscratch1, ext::sxtw); // NE => overflow + __ movw(rscratch1, 0x80000000); // Develop 0 (EQ), + __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE) + __ cmpw(rscratch1, 1); // 0x80000000 - 1 => VS + %} + + ins_pipe(pipe_slow); +%} + +instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr) +%{ + match(If cmp (OverflowMulI op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow + || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow); + effect(USE labl, KILL cr); + + format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t" + "cmp rscratch1, rscratch1, sxtw\n\t" + "b$cmp $labl" %} + ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + __ smull(rscratch1, $op1$$Register, $op2$$Register); + __ subs(zr, rscratch1, rscratch1, ext::sxtw); // NE => overflow + __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L); + %} + + ins_pipe(pipe_serial); +%} + +instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2) +%{ + match(Set cr (OverflowMulL op1 op2)); + + format %{ "mul rscratch1, $op1, $op2\t#overflow check long\n\t" + "smulh rscratch2, $op1, $op2\n\t" + "cmp rscratch2, rscratch1, ASR #31\n\t" + "movw rscratch1, #0x80000000\n\t" + "cselw rscratch1, rscratch1, zr, NE\n\t" + "cmpw rscratch1, #1" %} + ins_cost(6 * INSN_COST); + ins_encode %{ + __ mul(rscratch1, $op1$$Register, $op2$$Register); // Result bits 0..63 + __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127 + __ cmp(rscratch2, rscratch1, Assembler::ASR, 31); // Top is pure sign ext + __ movw(rscratch1, 0x80000000); // Develop 0 (EQ), + __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE) + __ cmpw(rscratch1, 1); // 0x80000000 - 1 => VS + %} + + ins_pipe(pipe_slow); +%} + +instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr) +%{ + match(If cmp (OverflowMulL op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow + || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow); + effect(USE labl, KILL cr); + + format %{ "mul rscratch1, $op1, $op2\t#overflow check long\n\t" + "smulh rscratch2, $op1, $op2\n\t" + "cmp rscratch2, rscratch1, ASR #31\n\t" + "b$cmp $labl" %} + ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + __ mul(rscratch1, $op1$$Register, $op2$$Register); // Result bits 0..63 + __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127 + __ cmp(rscratch2, rscratch1, Assembler::ASR, 31); // Top is pure sign ext + __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L); + %} + + ins_pipe(pipe_serial); +%} + +// ============================================================================ +// Compare Instructions + +instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2) +%{ + match(Set cr (CmpI op1 op2)); + + effect(DEF cr, USE op1, USE op2); + + ins_cost(INSN_COST); + format %{ "cmpw $op1, $op2" %} + + ins_encode(aarch64_enc_cmpw(op1, op2)); + + ins_pipe(icmp_reg_reg); +%} + +instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero) +%{ + match(Set cr (CmpI op1 zero)); + + effect(DEF cr, USE op1); + + ins_cost(INSN_COST); + format %{ "cmpw $op1, 0" %} + + ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero)); + + ins_pipe(icmp_reg_imm); +%} + +instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2) +%{ + match(Set cr (CmpI op1 op2)); + + effect(DEF cr, USE op1); + + ins_cost(INSN_COST); + format %{ "cmpw $op1, $op2" %} + + ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2)); + + ins_pipe(icmp_reg_imm); +%} + +instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2) +%{ + match(Set cr (CmpI op1 op2)); + + effect(DEF cr, USE op1); + + ins_cost(INSN_COST * 2); + format %{ "cmpw $op1, $op2" %} + + ins_encode(aarch64_enc_cmpw_imm(op1, op2)); + + ins_pipe(icmp_reg_imm); +%} + +// Unsigned compare Instructions; really, same as signed compare +// except it should only be used to feed an If or a CMovI which takes a +// cmpOpU. + +instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2) +%{ + match(Set cr (CmpU op1 op2)); + + effect(DEF cr, USE op1, USE op2); + + ins_cost(INSN_COST); + format %{ "cmpw $op1, $op2\t# unsigned" %} + + ins_encode(aarch64_enc_cmpw(op1, op2)); + + ins_pipe(icmp_reg_reg); +%} + +instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero) +%{ + match(Set cr (CmpU op1 zero)); + + effect(DEF cr, USE op1); + + ins_cost(INSN_COST); + format %{ "cmpw $op1, #0\t# unsigned" %} + + ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero)); + + ins_pipe(icmp_reg_imm); +%} + +instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2) +%{ + match(Set cr (CmpU op1 op2)); + + effect(DEF cr, USE op1); + + ins_cost(INSN_COST); + format %{ "cmpw $op1, $op2\t# unsigned" %} + + ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2)); + + ins_pipe(icmp_reg_imm); +%} + +instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2) +%{ + match(Set cr (CmpU op1 op2)); + + effect(DEF cr, USE op1); + + ins_cost(INSN_COST * 2); + format %{ "cmpw $op1, $op2\t# unsigned" %} + + ins_encode(aarch64_enc_cmpw_imm(op1, op2)); + + ins_pipe(icmp_reg_imm); +%} + +instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2) +%{ + match(Set cr (CmpL op1 op2)); + + effect(DEF cr, USE op1, USE op2); + + ins_cost(INSN_COST); + format %{ "cmp $op1, $op2" %} + + ins_encode(aarch64_enc_cmp(op1, op2)); + + ins_pipe(icmp_reg_reg); +%} + +instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero) +%{ + match(Set cr (CmpL op1 zero)); + + effect(DEF cr, USE op1); + + ins_cost(INSN_COST); + format %{ "tst $op1" %} + + ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero)); + + ins_pipe(icmp_reg_imm); +%} + +instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2) +%{ + match(Set cr (CmpL op1 op2)); + + effect(DEF cr, USE op1); + + ins_cost(INSN_COST); + format %{ "cmp $op1, $op2" %} + + ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2)); + + ins_pipe(icmp_reg_imm); +%} + +instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2) +%{ + match(Set cr (CmpL op1 op2)); + + effect(DEF cr, USE op1); + + ins_cost(INSN_COST * 2); + format %{ "cmp $op1, $op2" %} + + ins_encode(aarch64_enc_cmp_imm(op1, op2)); + + ins_pipe(icmp_reg_imm); +%} + +instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2) +%{ + match(Set cr (CmpUL op1 op2)); + + effect(DEF cr, USE op1, USE op2); + + ins_cost(INSN_COST); + format %{ "cmp $op1, $op2" %} + + ins_encode(aarch64_enc_cmp(op1, op2)); + + ins_pipe(icmp_reg_reg); +%} + +instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero) +%{ + match(Set cr (CmpUL op1 zero)); + + effect(DEF cr, USE op1); + + ins_cost(INSN_COST); + format %{ "tst $op1" %} + + ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero)); + + ins_pipe(icmp_reg_imm); +%} + +instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2) +%{ + match(Set cr (CmpUL op1 op2)); + + effect(DEF cr, USE op1); + + ins_cost(INSN_COST); + format %{ "cmp $op1, $op2" %} + + ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2)); + + ins_pipe(icmp_reg_imm); +%} + +instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2) +%{ + match(Set cr (CmpUL op1 op2)); + + effect(DEF cr, USE op1); + + ins_cost(INSN_COST * 2); + format %{ "cmp $op1, $op2" %} + + ins_encode(aarch64_enc_cmp_imm(op1, op2)); + + ins_pipe(icmp_reg_imm); +%} + +instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2) +%{ + match(Set cr (CmpP op1 op2)); + + effect(DEF cr, USE op1, USE op2); + + ins_cost(INSN_COST); + format %{ "cmp $op1, $op2\t // ptr" %} + + ins_encode(aarch64_enc_cmpp(op1, op2)); + + ins_pipe(icmp_reg_reg); +%} + +instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2) +%{ + match(Set cr (CmpN op1 op2)); + + effect(DEF cr, USE op1, USE op2); + + ins_cost(INSN_COST); + format %{ "cmp $op1, $op2\t // compressed ptr" %} + + ins_encode(aarch64_enc_cmpn(op1, op2)); + + ins_pipe(icmp_reg_reg); +%} + +instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero) +%{ + match(Set cr (CmpP op1 zero)); + + effect(DEF cr, USE op1, USE zero); + + ins_cost(INSN_COST); + format %{ "cmp $op1, 0\t // ptr" %} + + ins_encode(aarch64_enc_testp(op1)); + + ins_pipe(icmp_reg_imm); +%} + +instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero) +%{ + match(Set cr (CmpN op1 zero)); + + effect(DEF cr, USE op1, USE zero); + + ins_cost(INSN_COST); + format %{ "cmp $op1, 0\t // compressed ptr" %} + + ins_encode(aarch64_enc_testn(op1)); + + ins_pipe(icmp_reg_imm); +%} + +// FP comparisons +// +// n.b. CmpF/CmpD set a normal flags reg which then gets compared +// using normal cmpOp. See declaration of rFlagsReg for details. + +instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2) +%{ + match(Set cr (CmpF src1 src2)); + + ins_cost(3 * INSN_COST); + format %{ "fcmps $src1, $src2" %} + + ins_encode %{ + __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + + ins_pipe(pipe_class_compare); +%} + +instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2) +%{ + match(Set cr (CmpF src1 src2)); + + ins_cost(3 * INSN_COST); + format %{ "fcmps $src1, 0.0" %} + + ins_encode %{ + __ fcmps(as_FloatRegister($src1$$reg), 0.0); + %} + + ins_pipe(pipe_class_compare); +%} +// FROM HERE + +instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2) +%{ + match(Set cr (CmpD src1 src2)); + + ins_cost(3 * INSN_COST); + format %{ "fcmpd $src1, $src2" %} + + ins_encode %{ + __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + + ins_pipe(pipe_class_compare); +%} + +instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2) +%{ + match(Set cr (CmpD src1 src2)); + + ins_cost(3 * INSN_COST); + format %{ "fcmpd $src1, 0.0" %} + + ins_encode %{ + __ fcmpd(as_FloatRegister($src1$$reg), 0.0); + %} + + ins_pipe(pipe_class_compare); +%} + +instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr) +%{ + match(Set dst (CmpF3 src1 src2)); + effect(KILL cr); + + ins_cost(5 * INSN_COST); + format %{ "fcmps $src1, $src2\n\t" + "csinvw($dst, zr, zr, eq\n\t" + "csnegw($dst, $dst, $dst, lt)" + %} + + ins_encode %{ + Label done; + FloatRegister s1 = as_FloatRegister($src1$$reg); + FloatRegister s2 = as_FloatRegister($src2$$reg); + Register d = as_Register($dst$$reg); + __ fcmps(s1, s2); + // installs 0 if EQ else -1 + __ csinvw(d, zr, zr, Assembler::EQ); + // keeps -1 if less or unordered else installs 1 + __ csnegw(d, d, d, Assembler::LT); + __ bind(done); + %} + + ins_pipe(pipe_class_default); + +%} + +instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr) +%{ + match(Set dst (CmpD3 src1 src2)); + effect(KILL cr); + + ins_cost(5 * INSN_COST); + format %{ "fcmpd $src1, $src2\n\t" + "csinvw($dst, zr, zr, eq\n\t" + "csnegw($dst, $dst, $dst, lt)" + %} + + ins_encode %{ + Label done; + FloatRegister s1 = as_FloatRegister($src1$$reg); + FloatRegister s2 = as_FloatRegister($src2$$reg); + Register d = as_Register($dst$$reg); + __ fcmpd(s1, s2); + // installs 0 if EQ else -1 + __ csinvw(d, zr, zr, Assembler::EQ); + // keeps -1 if less or unordered else installs 1 + __ csnegw(d, d, d, Assembler::LT); + __ bind(done); + %} + ins_pipe(pipe_class_default); + +%} + +instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr) +%{ + match(Set dst (CmpF3 src1 zero)); + effect(KILL cr); + + ins_cost(5 * INSN_COST); + format %{ "fcmps $src1, 0.0\n\t" + "csinvw($dst, zr, zr, eq\n\t" + "csnegw($dst, $dst, $dst, lt)" + %} + + ins_encode %{ + Label done; + FloatRegister s1 = as_FloatRegister($src1$$reg); + Register d = as_Register($dst$$reg); + __ fcmps(s1, 0.0); + // installs 0 if EQ else -1 + __ csinvw(d, zr, zr, Assembler::EQ); + // keeps -1 if less or unordered else installs 1 + __ csnegw(d, d, d, Assembler::LT); + __ bind(done); + %} + + ins_pipe(pipe_class_default); + +%} + +instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr) +%{ + match(Set dst (CmpD3 src1 zero)); + effect(KILL cr); + + ins_cost(5 * INSN_COST); + format %{ "fcmpd $src1, 0.0\n\t" + "csinvw($dst, zr, zr, eq\n\t" + "csnegw($dst, $dst, $dst, lt)" + %} + + ins_encode %{ + Label done; + FloatRegister s1 = as_FloatRegister($src1$$reg); + Register d = as_Register($dst$$reg); + __ fcmpd(s1, 0.0); + // installs 0 if EQ else -1 + __ csinvw(d, zr, zr, Assembler::EQ); + // keeps -1 if less or unordered else installs 1 + __ csnegw(d, d, d, Assembler::LT); + __ bind(done); + %} + ins_pipe(pipe_class_default); + +%} + +// Manifest a CmpL result in an integer register. +// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0) +instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags) +%{ + match(Set dst (CmpL3 src1 src2)); + effect(KILL flags); + + ins_cost(INSN_COST * 6); + format %{ + "cmp $src1, $src2" + "csetw $dst, ne" + "cnegw $dst, lt" + %} + // format %{ "CmpL3 $dst, $src1, $src2" %} + ins_encode %{ + __ cmp($src1$$Register, $src2$$Register); + __ csetw($dst$$Register, Assembler::NE); + __ cnegw($dst$$Register, $dst$$Register, Assembler::LT); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr) +%{ + match(Set dst (CmpLTMask p q)); + effect(KILL cr); + + ins_cost(3 * INSN_COST); + + format %{ "cmpw $p, $q\t# cmpLTMask\n\t" + "csetw $dst, lt\n\t" + "subw $dst, zr, $dst" + %} + + ins_encode %{ + __ cmpw(as_Register($p$$reg), as_Register($q$$reg)); + __ csetw(as_Register($dst$$reg), Assembler::LT); + __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + +instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) +%{ + match(Set dst (CmpLTMask src zero)); + effect(KILL cr); + + ins_cost(INSN_COST); + + format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %} + + ins_encode %{ + __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31); + %} + + ins_pipe(ialu_reg_shift); +%} + +// ============================================================================ +// Max and Min + +instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr) +%{ + match(Set dst (MinI src1 src2)); + + effect(DEF dst, USE src1, USE src2, KILL cr); + size(8); + + ins_cost(INSN_COST * 3); + format %{ + "cmpw $src1 $src2\t signed int\n\t" + "cselw $dst, $src1, $src2 lt\t" + %} + + ins_encode %{ + __ cmpw(as_Register($src1$$reg), + as_Register($src2$$reg)); + __ cselw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LT); + %} + + ins_pipe(ialu_reg_reg); +%} +// FROM HERE + +instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr) +%{ + match(Set dst (MaxI src1 src2)); + + effect(DEF dst, USE src1, USE src2, KILL cr); + size(8); + + ins_cost(INSN_COST * 3); + format %{ + "cmpw $src1 $src2\t signed int\n\t" + "cselw $dst, $src1, $src2 gt\t" + %} + + ins_encode %{ + __ cmpw(as_Register($src1$$reg), + as_Register($src2$$reg)); + __ cselw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::GT); + %} + + ins_pipe(ialu_reg_reg); +%} + +// ============================================================================ +// Branch Instructions + +// Direct Branch. +instruct branch(label lbl) +%{ + match(Goto); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + format %{ "b $lbl" %} + + ins_encode(aarch64_enc_b(lbl)); + + ins_pipe(pipe_branch); +%} + +// Conditional Near Branch +instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl) +%{ + // Same match rule as `branchConFar'. + match(If cmp cr); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + // If set to 1 this indicates that the current instruction is a + // short variant of a long branch. This avoids using this + // instruction in first-pass matching. It will then only be used in + // the `Shorten_branches' pass. + // ins_short_branch(1); + format %{ "b$cmp $lbl" %} + + ins_encode(aarch64_enc_br_con(cmp, lbl)); + + ins_pipe(pipe_branch_cond); +%} + +// Conditional Near Branch Unsigned +instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl) +%{ + // Same match rule as `branchConFar'. + match(If cmp cr); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + // If set to 1 this indicates that the current instruction is a + // short variant of a long branch. This avoids using this + // instruction in first-pass matching. It will then only be used in + // the `Shorten_branches' pass. + // ins_short_branch(1); + format %{ "b$cmp $lbl\t# unsigned" %} + + ins_encode(aarch64_enc_br_conU(cmp, lbl)); + + ins_pipe(pipe_branch_cond); +%} + +// Make use of CBZ and CBNZ. These instructions, as well as being +// shorter than (cmp; branch), have the additional benefit of not +// killing the flags. + +instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{ + match(If cmp (CmpI op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cbw$cmp $op1, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + if (cond == Assembler::EQ) + __ cbzw($op1$$Register, *L); + else + __ cbnzw($op1$$Register, *L); + %} + ins_pipe(pipe_cmp_branch); +%} + +instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{ + match(If cmp (CmpL op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cb$cmp $op1, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + if (cond == Assembler::EQ) + __ cbz($op1$$Register, *L); + else + __ cbnz($op1$$Register, *L); + %} + ins_pipe(pipe_cmp_branch); +%} + +instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{ + match(If cmp (CmpP op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cb$cmp $op1, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + if (cond == Assembler::EQ) + __ cbz($op1$$Register, *L); + else + __ cbnz($op1$$Register, *L); + %} + ins_pipe(pipe_cmp_branch); +%} + +instruct cmpN_imm0_branch(cmpOp cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{ + match(If cmp (CmpN op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cbw$cmp $op1, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + if (cond == Assembler::EQ) + __ cbzw($op1$$Register, *L); + else + __ cbnzw($op1$$Register, *L); + %} + ins_pipe(pipe_cmp_branch); +%} + +instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{ + match(If cmp (CmpP (DecodeN oop) zero)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cb$cmp $oop, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + if (cond == Assembler::EQ) + __ cbzw($oop$$Register, *L); + else + __ cbnzw($oop$$Register, *L); + %} + ins_pipe(pipe_cmp_branch); +%} + +instruct cmpUI_imm0_branch(cmpOpU cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{ + match(If cmp (CmpU op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq + || n->in(1)->as_Bool()->_test._test == BoolTest::gt + || n->in(1)->as_Bool()->_test._test == BoolTest::le); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cbw$cmp $op1, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + if (cond == Assembler::EQ || cond == Assembler::LS) + __ cbzw($op1$$Register, *L); + else + __ cbnzw($op1$$Register, *L); + %} + ins_pipe(pipe_cmp_branch); +%} + +instruct cmpUL_imm0_branch(cmpOpU cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{ + match(If cmp (CmpUL op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq + || n->in(1)->as_Bool()->_test._test == BoolTest::gt + || n->in(1)->as_Bool()->_test._test == BoolTest::le); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cb$cmp $op1, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + if (cond == Assembler::EQ || cond == Assembler::LS) + __ cbz($op1$$Register, *L); + else + __ cbnz($op1$$Register, *L); + %} + ins_pipe(pipe_cmp_branch); +%} + +// Test bit and Branch + +// Patterns for short (< 32KiB) variants +instruct cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{ + match(If cmp (CmpL op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt + || n->in(1)->as_Bool()->_test._test == BoolTest::ge); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cb$cmp $op1, $labl # long" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = + ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ; + __ tbr(cond, $op1$$Register, 63, *L); + %} + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +instruct cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{ + match(If cmp (CmpI op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt + || n->in(1)->as_Bool()->_test._test == BoolTest::ge); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cb$cmp $op1, $labl # int" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = + ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ; + __ tbr(cond, $op1$$Register, 31, *L); + %} + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +instruct cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{ + match(If cmp (CmpL (AndL op1 op2) op3)); + predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq) + && is_power_of_2(n->in(2)->in(1)->in(2)->get_long())); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "tb$cmp $op1, $op2, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + int bit = exact_log2($op2$$constant); + __ tbr(cond, $op1$$Register, bit, *L); + %} + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +instruct cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{ + match(If cmp (CmpI (AndI op1 op2) op3)); + predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq) + && is_power_of_2(n->in(2)->in(1)->in(2)->get_int())); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "tb$cmp $op1, $op2, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + int bit = exact_log2($op2$$constant); + __ tbr(cond, $op1$$Register, bit, *L); + %} + ins_pipe(pipe_cmp_branch); + ins_short_branch(1); +%} + +// And far variants +instruct far_cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{ + match(If cmp (CmpL op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt + || n->in(1)->as_Bool()->_test._test == BoolTest::ge); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cb$cmp $op1, $labl # long" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = + ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ; + __ tbr(cond, $op1$$Register, 63, *L, /*far*/true); + %} + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{ + match(If cmp (CmpI op1 op2)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt + || n->in(1)->as_Bool()->_test._test == BoolTest::ge); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cb$cmp $op1, $labl # int" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = + ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ; + __ tbr(cond, $op1$$Register, 31, *L, /*far*/true); + %} + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{ + match(If cmp (CmpL (AndL op1 op2) op3)); + predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq) + && is_power_of_2(n->in(2)->in(1)->in(2)->get_long())); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "tb$cmp $op1, $op2, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + int bit = exact_log2($op2$$constant); + __ tbr(cond, $op1$$Register, bit, *L, /*far*/true); + %} + ins_pipe(pipe_cmp_branch); +%} + +instruct far_cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{ + match(If cmp (CmpI (AndI op1 op2) op3)); + predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq) + && is_power_of_2(n->in(2)->in(1)->in(2)->get_int())); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "tb$cmp $op1, $op2, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + int bit = exact_log2($op2$$constant); + __ tbr(cond, $op1$$Register, bit, *L, /*far*/true); + %} + ins_pipe(pipe_cmp_branch); +%} + +// Test bits + +instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{ + match(Set cr (CmpL (AndL op1 op2) op3)); + predicate(Assembler::operand_valid_for_logical_immediate + (/*is_32*/false, n->in(1)->in(2)->get_long())); + + ins_cost(INSN_COST); + format %{ "tst $op1, $op2 # long" %} + ins_encode %{ + __ tst($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{ + match(Set cr (CmpI (AndI op1 op2) op3)); + predicate(Assembler::operand_valid_for_logical_immediate + (/*is_32*/true, n->in(1)->in(2)->get_int())); + + ins_cost(INSN_COST); + format %{ "tst $op1, $op2 # int" %} + ins_encode %{ + __ tstw($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{ + match(Set cr (CmpL (AndL op1 op2) op3)); + + ins_cost(INSN_COST); + format %{ "tst $op1, $op2 # long" %} + ins_encode %{ + __ tst($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{ + match(Set cr (CmpI (AndI op1 op2) op3)); + + ins_cost(INSN_COST); + format %{ "tstw $op1, $op2 # int" %} + ins_encode %{ + __ tstw($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + + +// Conditional Far Branch +// Conditional Far Branch Unsigned +// TODO: fixme + +// counted loop end branch near +instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl) +%{ + match(CountedLoopEnd cmp cr); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + // short variant. + // ins_short_branch(1); + format %{ "b$cmp $lbl \t// counted loop end" %} + + ins_encode(aarch64_enc_br_con(cmp, lbl)); + + ins_pipe(pipe_branch); +%} + +// counted loop end branch near Unsigned +instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl) +%{ + match(CountedLoopEnd cmp cr); + + effect(USE lbl); + + ins_cost(BRANCH_COST); + // short variant. + // ins_short_branch(1); + format %{ "b$cmp $lbl \t// counted loop end unsigned" %} + + ins_encode(aarch64_enc_br_conU(cmp, lbl)); + + ins_pipe(pipe_branch); +%} + +// counted loop end branch far +// counted loop end branch far unsigned +// TODO: fixme + +// ============================================================================ +// inlined locking and unlocking + +instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) +%{ + match(Set cr (FastLock object box)); + effect(TEMP tmp, TEMP tmp2); + + // TODO + // identify correct cost + ins_cost(5 * INSN_COST); + format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %} + + ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2)); + + ins_pipe(pipe_serial); +%} + +instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) +%{ + match(Set cr (FastUnlock object box)); + effect(TEMP tmp, TEMP tmp2); + + ins_cost(5 * INSN_COST); + format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %} + + ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2)); + + ins_pipe(pipe_serial); +%} + + +// ============================================================================ +// Safepoint Instructions + +// TODO +// provide a near and far version of this code + +instruct safePoint(rFlagsReg cr, iRegP poll) +%{ + match(SafePoint poll); + effect(KILL cr); + + format %{ + "ldrw zr, [$poll]\t# Safepoint: poll for GC" + %} + ins_encode %{ + __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type); + %} + ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem); +%} + + +// ============================================================================ +// Procedure Call/Return Instructions + +// Call Java Static Instruction + +instruct CallStaticJavaDirect(method meth) +%{ + match(CallStaticJava); + + effect(USE meth); + + predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke()); + + ins_cost(CALL_COST); + + format %{ "call,static $meth \t// ==> " %} + + ins_encode( aarch64_enc_java_static_call(meth), + aarch64_enc_call_epilog ); + + ins_pipe(pipe_class_call); +%} + +// TO HERE + +// Call Java Static Instruction (method handle version) + +instruct CallStaticJavaDirectHandle(method meth, iRegP_FP reg_mh_save) +%{ + match(CallStaticJava); + + effect(USE meth); + + predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke()); + + ins_cost(CALL_COST); + + format %{ "call,static $meth \t// (methodhandle) ==> " %} + + ins_encode( aarch64_enc_java_handle_call(meth), + aarch64_enc_call_epilog ); + + ins_pipe(pipe_class_call); +%} + +// Call Java Dynamic Instruction +instruct CallDynamicJavaDirect(method meth) +%{ + match(CallDynamicJava); + + effect(USE meth); + + ins_cost(CALL_COST); + + format %{ "CALL,dynamic $meth \t// ==> " %} + + ins_encode( aarch64_enc_java_dynamic_call(meth), + aarch64_enc_call_epilog ); + + ins_pipe(pipe_class_call); +%} + +// Call Runtime Instruction + +instruct CallRuntimeDirect(method meth) +%{ + match(CallRuntime); + + effect(USE meth); + + ins_cost(CALL_COST); + + format %{ "CALL, runtime $meth" %} + + ins_encode( aarch64_enc_java_to_runtime(meth) ); + + ins_pipe(pipe_class_call); +%} + +// Call Runtime Instruction + +instruct CallLeafDirect(method meth) +%{ + match(CallLeaf); + + effect(USE meth); + + ins_cost(CALL_COST); + + format %{ "CALL, runtime leaf $meth" %} + + ins_encode( aarch64_enc_java_to_runtime(meth) ); + + ins_pipe(pipe_class_call); +%} + +// Call Runtime Instruction + +instruct CallLeafNoFPDirect(method meth) +%{ + match(CallLeafNoFP); + + effect(USE meth); + + ins_cost(CALL_COST); + + format %{ "CALL, runtime leaf nofp $meth" %} + + ins_encode( aarch64_enc_java_to_runtime(meth) ); + + ins_pipe(pipe_class_call); +%} + +// Tail Call; Jump from runtime stub to Java code. +// Also known as an 'interprocedural jump'. +// Target of jump will eventually return to caller. +// TailJump below removes the return address. +instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop) +%{ + match(TailCall jump_target method_oop); + + ins_cost(CALL_COST); + + format %{ "br $jump_target\t# $method_oop holds method oop" %} + + ins_encode(aarch64_enc_tail_call(jump_target)); + + ins_pipe(pipe_class_call); +%} + +instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop) +%{ + match(TailJump jump_target ex_oop); + + ins_cost(CALL_COST); + + format %{ "br $jump_target\t# $ex_oop holds exception oop" %} + + ins_encode(aarch64_enc_tail_jmp(jump_target)); + + ins_pipe(pipe_class_call); +%} + +// Create exception oop: created by stack-crawling runtime code. +// Created exception is now available to this handler, and is setup +// just prior to jumping to this handler. No code emitted. +// TODO check +// should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1 +instruct CreateException(iRegP_R0 ex_oop) +%{ + match(Set ex_oop (CreateEx)); + + format %{ " -- \t// exception oop; no code emitted" %} + + size(0); + + ins_encode( /*empty*/ ); + + ins_pipe(pipe_class_empty); +%} + +// Rethrow exception: The exception oop will come in the first +// argument position. Then JUMP (not call) to the rethrow stub code. +instruct RethrowException() %{ + match(Rethrow); + ins_cost(CALL_COST); + + format %{ "b rethrow_stub" %} + + ins_encode( aarch64_enc_rethrow() ); + + ins_pipe(pipe_class_call); +%} + + +// Return Instruction +// epilog node loads ret address into lr as part of frame pop +instruct Ret() +%{ + match(Return); + + format %{ "ret\t// return register" %} + + ins_encode( aarch64_enc_ret() ); + + ins_pipe(pipe_branch); +%} + +// Die now. +instruct ShouldNotReachHere() %{ + match(Halt); + + ins_cost(CALL_COST); + format %{ "ShouldNotReachHere" %} + + ins_encode %{ + // TODO + // implement proper trap call here + __ brk(999); + %} + + ins_pipe(pipe_class_default); +%} + +// ============================================================================ +// Partial Subtype Check +// +// superklass array for an instance of the superklass. Set a hidden +// internal cache on a hit (cache is checked with exposed code in +// gen_subtype_check()). Return NZ for a miss or zero for a hit. The +// encoding ALSO sets flags. + +instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr) +%{ + match(Set result (PartialSubtypeCheck sub super)); + effect(KILL cr, KILL temp); + + ins_cost(1100); // slightly larger than the next version + format %{ "partialSubtypeCheck $result, $sub, $super" %} + + ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result)); + + opcode(0x1); // Force zero of result reg on hit + + ins_pipe(pipe_class_memory); +%} + +instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr) +%{ + match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); + effect(KILL temp, KILL result); + + ins_cost(1100); // slightly larger than the next version + format %{ "partialSubtypeCheck $result, $sub, $super == 0" %} + + ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result)); + + opcode(0x0); // Don't zero result reg on hit + + ins_pipe(pipe_class_memory); +%} + +instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2, + iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr) +%{ + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, $result$$Register, + $tmp1$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2, + iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %} + + ins_encode %{ + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + -1, $result$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, + immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %} + + ins_encode %{ + int icnt2 = (int)$int_cnt2$$constant; + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, zr, + $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register, $tmp4$$Register, + icnt2, $result$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt, + iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr) +%{ + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + + format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp" %} + ins_encode %{ + __ string_equals($str1$$Register, $str2$$Register, + $cnt$$Register, $result$$Register, + $tmp$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result, + iRegP_R10 tmp, rFlagsReg cr) +%{ + match(Set result (AryEq ary1 ary2)); + effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr); + + format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %} + ins_encode %{ + __ char_arrays_equals($ary1$$Register, $ary2$$Register, + $result$$Register, $tmp$$Register); + %} + ins_pipe(pipe_class_memory); +%} + +// encode char[] to byte[] in ISO_8859_1 +instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len, + vRegD_V0 Vtmp1, vRegD_V1 Vtmp2, + vRegD_V2 Vtmp3, vRegD_V3 Vtmp4, + iRegI_R0 result, rFlagsReg cr) +%{ + match(Set result (EncodeISOArray src (Binary dst len))); + effect(USE_KILL src, USE_KILL dst, USE_KILL len, + KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr); + + format %{ "Encode array $src,$dst,$len -> $result" %} + ins_encode %{ + __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, + $result$$Register, $Vtmp1$$FloatRegister, $Vtmp2$$FloatRegister, + $Vtmp3$$FloatRegister, $Vtmp4$$FloatRegister); + %} + ins_pipe( pipe_class_memory ); +%} + +// ============================================================================ +// This name is KNOWN by the ADLC and cannot be changed. +// The ADLC forces a 'TypeRawPtr::BOTTOM' output type +// for this guy. +instruct tlsLoadP(thread_RegP dst) +%{ + match(Set dst (ThreadLocal)); + + ins_cost(0); + + format %{ " -- \t// $dst=Thread::current(), empty" %} + + size(0); + + ins_encode( /*empty*/ ); + + ins_pipe(pipe_class_empty); +%} + +// ====================VECTOR INSTRUCTIONS===================================== + +// Load vector (32 bits) +instruct loadV4(vecD dst, vmem4 mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 4); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrs $dst,$mem\t# vector (32 bits)" %} + ins_encode( aarch64_enc_ldrvS(dst, mem) ); + ins_pipe(vload_reg_mem64); +%} + +// Load vector (64 bits) +instruct loadV8(vecD dst, vmem8 mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrd $dst,$mem\t# vector (64 bits)" %} + ins_encode( aarch64_enc_ldrvD(dst, mem) ); + ins_pipe(vload_reg_mem64); +%} + +// Load Vector (128 bits) +instruct loadV16(vecX dst, vmem16 mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} + ins_encode( aarch64_enc_ldrvQ(dst, mem) ); + ins_pipe(vload_reg_mem128); +%} + +// Store Vector (32 bits) +instruct storeV4(vecD src, vmem4 mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 4); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strs $mem,$src\t# vector (32 bits)" %} + ins_encode( aarch64_enc_strvS(src, mem) ); + ins_pipe(vstore_reg_mem64); +%} + +// Store Vector (64 bits) +instruct storeV8(vecD src, vmem8 mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strd $mem,$src\t# vector (64 bits)" %} + ins_encode( aarch64_enc_strvD(src, mem) ); + ins_pipe(vstore_reg_mem64); +%} + +// Store Vector (128 bits) +instruct storeV16(vecX src, vmem16 mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 16); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strq $mem,$src\t# vector (128 bits)" %} + ins_encode( aarch64_enc_strvQ(src, mem) ); + ins_pipe(vstore_reg_mem128); +%} + +instruct replicate8B(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (8B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg64); +%} + +instruct replicate16B(vecX dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (16B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg128); +%} + +instruct replicate8B_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (ReplicateB con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(8B)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff); + %} + ins_pipe(vmovi_reg_imm64); +%} + +instruct replicate16B_imm(vecX dst, immI con) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(16B)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff); + %} + ins_pipe(vmovi_reg_imm128); +%} + +instruct replicate4S(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4S)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg64); +%} + +instruct replicate8S(vecX dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (8S)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg128); +%} + +instruct replicate4S_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (ReplicateS con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(4H)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff); + %} + ins_pipe(vmovi_reg_imm64); +%} + +instruct replicate8S_imm(vecX dst, immI con) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(8H)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff); + %} + ins_pipe(vmovi_reg_imm128); +%} + +instruct replicate2I(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2I)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg64); +%} + +instruct replicate4I(vecX dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4I)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg128); +%} + +instruct replicate2I_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(2I)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant); + %} + ins_pipe(vmovi_reg_imm64); +%} + +instruct replicate4I_imm(vecX dst, immI con) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(4I)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant); + %} + ins_pipe(vmovi_reg_imm128); +%} + +instruct replicate2L(vecX dst, iRegL src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2L)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg)); + %} + ins_pipe(vdup_reg_reg128); +%} + +instruct replicate2L_zero(vecX dst, immI0 zero) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI zero)); + ins_cost(INSN_COST); + format %{ "movi $dst, $zero\t# vector(4I)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg)); + %} + ins_pipe(vmovi_reg_imm128); +%} + +instruct replicate2F(vecD dst, vRegF src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2F)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(vdup_reg_freg64); +%} + +instruct replicate4F(vecX dst, vRegF src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateF src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4F)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(vdup_reg_freg128); +%} + +instruct replicate2D(vecX dst, vRegD src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateD src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2D)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + %} + ins_pipe(vdup_reg_dreg128); +%} + +// ====================VECTOR ARITHMETIC======================================= + +// --------------------------------- ADD -------------------------------------- + +instruct vadd8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (AddVB src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vadd16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vadd4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (AddVS src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vadd8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vadd2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vadd4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vadd2L(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (2L)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vadd2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp64); +%} + +instruct vadd4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +instruct vadd2D(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (AddVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +// --------------------------------- SUB -------------------------------------- + +instruct vsub8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (SubVB src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vsub16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVB src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vsub4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (SubVS src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vsub8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vsub2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVI src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vsub4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vsub2L(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVL src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (2L)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vsub2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp64); +%} + +instruct vsub4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +instruct vsub2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop_fp128); +%} + +// --------------------------------- MUL -------------------------------------- + +instruct vmul4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (MulVS src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmul64); +%} + +instruct vmul8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (MulVS src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmul128); +%} + +instruct vmul2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVI src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmul64); +%} + +instruct vmul4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVI src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmul128); +%} + +instruct vmul2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp64); +%} + +instruct vmul4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +instruct vmul2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +// --------------------------------- MLA -------------------------------------- + +instruct vmla4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (AddVS dst (MulVS src1 src2))); + ins_cost(INSN_COST); + format %{ "mlav $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ mlav(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla64); +%} + +instruct vmla8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS dst (MulVS src1 src2))); + ins_cost(INSN_COST); + format %{ "mlav $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ mlav(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla128); +%} + +instruct vmla2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI dst (MulVI src1 src2))); + ins_cost(INSN_COST); + format %{ "mlav $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ mlav(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla64); +%} + +instruct vmla4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI dst (MulVI src1 src2))); + ins_cost(INSN_COST); + format %{ "mlav $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ mlav(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla128); +%} + +// --------------------------------- MLS -------------------------------------- + +instruct vmls4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (SubVS dst (MulVS src1 src2))); + ins_cost(INSN_COST); + format %{ "mlsv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ mlsv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla64); +%} + +instruct vmls8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS dst (MulVS src1 src2))); + ins_cost(INSN_COST); + format %{ "mlsv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ mlsv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla128); +%} + +instruct vmls2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVI dst (MulVI src1 src2))); + ins_cost(INSN_COST); + format %{ "mlsv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ mlsv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla64); +%} + +instruct vmls4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI dst (MulVI src1 src2))); + ins_cost(INSN_COST); + format %{ "mlsv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ mlsv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmla128); +%} + +// --------------------------------- DIV -------------------------------------- + +instruct vdiv2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp64); +%} + +instruct vdiv4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (DivVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +instruct vdiv2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vmuldiv_fp128); +%} + +// --------------------------------- AND -------------------------------------- + +instruct vand8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (AndV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ andr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical64); +%} + +instruct vand16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (AndV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ andr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical128); +%} + +// --------------------------------- OR --------------------------------------- + +instruct vor8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (OrV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical64); +%} + +instruct vor16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (OrV src1 src2)); + ins_cost(INSN_COST); + format %{ "orr $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical128); +%} + +// --------------------------------- XOR -------------------------------------- + +instruct vxor8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (XorV src1 src2)); + ins_cost(INSN_COST); + format %{ "xor $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical64); +%} + +instruct vxor16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV src1 src2)); + ins_cost(INSN_COST); + format %{ "xor $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vlogical128); +%} + +// ------------------------------ Shift --------------------------------------- +instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 8); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "dup $dst, $cnt\t# shift count vector (8B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg)); + %} + ins_pipe(vdup_reg_reg64); +%} + +instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "dup $dst, $cnt\t# shift count vector (16B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg)); + %} + ins_pipe(vdup_reg_reg128); +%} + +instruct vsll8B(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (8B)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsll16B(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (16B)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift128); +%} + +// Right shifts with vector shift count on aarch64 SIMD are implemented +// as left shift by negative shift count. +// There are two cases for vector shift count. +// +// Case 1: The vector shift count is from replication. +// | | +// LoadVector RShiftCntV +// | / +// RShiftVI +// Note: In inner loop, multiple neg instructions are used, which can be +// moved to outer loop and merge into one neg instruction. +// +// Case 2: The vector shift count is from loading. +// This case isn't supported by middle-end now. But it's supported by +// panama/vectorIntrinsics(JEP 338: Vector API). +// | | +// LoadVector LoadVector +// | / +// RShiftVI +// + +instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (8B)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (16B)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (8B)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (16B)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift64_imm); +%} + +instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift128_imm); +%} + +instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) sh = 7; + sh = -sh & 7; + __ sshr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) sh = 7; + sh = -sh & 7; + __ sshr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), -sh & 7); + } + %} + ins_pipe(vshift64_imm); +%} + +instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), -sh & 7); + } + %} + ins_pipe(vshift128_imm); +%} + +instruct vsll4S(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (4H)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsll8S(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (8H)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (4H)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (8H)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (4H)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (8H)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift64_imm); +%} + +instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift128_imm); +%} + +instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) sh = 15; + sh = -sh & 15; + __ sshr(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) sh = 15; + sh = -sh & 15; + __ sshr(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), -sh & 15); + } + %} + ins_pipe(vshift64_imm); +%} + +instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), -sh & 15); + } + %} + ins_pipe(vshift128_imm); +%} + +instruct vsll2I(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (2S)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsll4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (4S)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (2S)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (4S)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (2S)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift64); +%} + +instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (4S)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + (int)$shift$$constant & 31); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + (int)$shift$$constant & 31); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsll2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (2D)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "sshl $dst,$src,$tmp\t# vector (2D)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ sshl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + ins_cost(INSN_COST); + effect(TEMP tmp); + format %{ "negr $tmp,$shift\t" + "ushl $dst,$src,$tmp\t# vector (2D)" %} + ins_encode %{ + __ negr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($shift$$reg)); + __ ushl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + as_FloatRegister($tmp$$reg)); + %} + ins_pipe(vshift128); +%} + +instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + (int)$shift$$constant & 63); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 63); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 63); + %} + ins_pipe(vshift128_imm); +%} + +//----------PEEPHOLE RULES----------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// peepmatch ( root_instr_name [preceding_instruction]* ); +// +// peepconstraint %{ +// (instruction_number.operand_name relational_op instruction_number.operand_name +// [, ...] ); +// // instruction numbers are zero-based using left to right order in peepmatch +// +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); +// // provide an instruction_number.operand_name for each operand that appears +// // in the replacement instruction's match rule +// +// ---------VM FLAGS--------------------------------------------------------- +// +// All peephole optimizations can be turned off using -XX:-OptoPeephole +// +// Each peephole rule is given an identifying number starting with zero and +// increasing by one in the order seen by the parser. An individual peephole +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# +// on the command-line. +// +// ---------CURRENT LIMITATIONS---------------------------------------------- +// +// Only match adjacent instructions in same basic block +// Only equality constraints +// Only constraints between operands, not (0.dest_reg == RAX_enc) +// Only one replacement instruction +// +// ---------EXAMPLE---------------------------------------------------------- +// +// // pertinent parts of existing instructions in architecture description +// instruct movI(iRegINoSp dst, iRegI src) +// %{ +// match(Set dst (CopyI src)); +// %} +// +// instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr) +// %{ +// match(Set dst (AddI dst src)); +// effect(KILL cr); +// %} +// +// // Change (inc mov) to lea +// peephole %{ +// // increment preceeded by register-register move +// peepmatch ( incI_iReg movI ); +// // require that the destination register of the increment +// // match the destination register of the move +// peepconstraint ( 0.dst == 1.dst ); +// // construct a replacement instruction that sets +// // the destination to ( move's source register + one ) +// peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) ); +// %} +// + +// Implementation no longer uses movX instructions since +// machine-independent system no longer uses CopyX nodes. +// +// peephole +// %{ +// peepmatch (incI_iReg movI); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaI_iReg_immI(0.dst 1.src 0.src)); +// %} + +// peephole +// %{ +// peepmatch (decI_iReg movI); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaI_iReg_immI(0.dst 1.src 0.src)); +// %} + +// peephole +// %{ +// peepmatch (addI_iReg_imm movI); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaI_iReg_immI(0.dst 1.src 0.src)); +// %} + +// peephole +// %{ +// peepmatch (incL_iReg movL); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaL_iReg_immL(0.dst 1.src 0.src)); +// %} + +// peephole +// %{ +// peepmatch (decL_iReg movL); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaL_iReg_immL(0.dst 1.src 0.src)); +// %} + +// peephole +// %{ +// peepmatch (addL_iReg_imm movL); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaL_iReg_immL(0.dst 1.src 0.src)); +// %} + +// peephole +// %{ +// peepmatch (addP_iReg_imm movP); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaP_iReg_imm(0.dst 1.src 0.src)); +// %} + +// // Change load of spilled value to only a spill +// instruct storeI(memory mem, iRegI src) +// %{ +// match(Set mem (StoreI mem src)); +// %} +// +// instruct loadI(iRegINoSp dst, memory mem) +// %{ +// match(Set dst (LoadI mem)); +// %} +// + +//----------SMARTSPILL RULES--------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. + +// Local Variables: +// mode: c++ +// End: --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/aarch64Test.cpp 2021-01-25 19:31:23.812340635 +0000 @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2014, 2020, Red Hat Inc. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include + +#include "precompiled.hpp" +#include "code/codeBlob.hpp" +#include "asm/macroAssembler.hpp" + +// hook routine called during JVM bootstrap to test AArch64 assembler + +extern "C" void entry(CodeBuffer*); + +#ifdef ASSERT +void aarch64TestHook() +{ + BufferBlob* b = BufferBlob::create("aarch64Test", 500000); + CodeBuffer code(b); + entry(&code); + BufferBlob::free(b); +} +#endif --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/aarch64_ad.m4 2021-01-25 19:31:24.252345260 +0000 @@ -0,0 +1,371 @@ +dnl Copyright (c) 2014, Red Hat Inc. All rights reserved. +dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +dnl +dnl This code is free software; you can redistribute it and/or modify it +dnl under the terms of the GNU General Public License version 2 only, as +dnl published by the Free Software Foundation. +dnl +dnl This code is distributed in the hope that it will be useful, but WITHOUT +dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +dnl FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl version 2 for more details (a copy is included in the LICENSE file that +dnl accompanied this code). +dnl +dnl You should have received a copy of the GNU General Public License version +dnl 2 along with this work; if not, write to the Free Software Foundation, +dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +dnl +dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +dnl or visit www.oracle.com if you need additional information or have any +dnl questions. +dnl +dnl +dnl Process this file with m4 aarch64_ad.m4 to generate the arithmetic +dnl and shift patterns patterns used in aarch64.ad. +dnl +// BEGIN This section of the file is automatically generated. Do not edit -------------- +dnl +define(`ORL2I', `ifelse($1,I,orL2I)') +dnl +define(`BASE_SHIFT_INSN', +` +instruct $2$1_reg_$4_reg(iReg$1NoSp dst, + iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, + immI src3, rFlagsReg cr) %{ + match(Set dst ($2$1 src1 ($4$1 src2 src3))); + + ins_cost(1.9 * INSN_COST); + format %{ "$3 $dst, $src1, $src2, $5 $src3" %} + + ins_encode %{ + __ $3(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::$5, + $src3$$constant & ifelse($1,I,0x1f,0x3f)); + %} + + ins_pipe(ialu_reg_reg_shift); +%}')dnl +define(`BASE_INVERTED_INSN', +` +instruct $2$1_reg_not_reg(iReg$1NoSp dst, + iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, imm$1_M1 m1, + rFlagsReg cr) %{ +dnl This ifelse is because hotspot reassociates (xor (xor ..)..) +dnl into this canonical form. + ifelse($2,Xor, + match(Set dst (Xor$1 m1 (Xor$1 src2 src1)));, + match(Set dst ($2$1 src1 (Xor$1 src2 m1)));) + ins_cost(INSN_COST); + format %{ "$3 $dst, $src1, $src2" %} + + ins_encode %{ + __ $3(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::LSL, 0); + %} + + ins_pipe(ialu_reg_reg); +%}')dnl +define(`INVERTED_SHIFT_INSN', +` +instruct $2$1_reg_$4_not_reg(iReg$1NoSp dst, + iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, + immI src3, imm$1_M1 src4, rFlagsReg cr) %{ +dnl This ifelse is because hotspot reassociates (xor (xor ..)..) +dnl into this canonical form. + ifelse($2,Xor, + match(Set dst ($2$1 src4 (Xor$1($4$1 src2 src3) src1)));, + match(Set dst ($2$1 src1 (Xor$1($4$1 src2 src3) src4)));) + ins_cost(1.9 * INSN_COST); + format %{ "$3 $dst, $src1, $src2, $5 $src3" %} + + ins_encode %{ + __ $3(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg), + Assembler::$5, + $src3$$constant & ifelse($1,I,0x1f,0x3f)); + %} + + ins_pipe(ialu_reg_reg_shift); +%}')dnl +define(`NOT_INSN', +`instruct reg$1_not_reg(iReg$1NoSp dst, + iReg$1`'ORL2I($1) src1, imm$1_M1 m1, + rFlagsReg cr) %{ + match(Set dst (Xor$1 src1 m1)); + ins_cost(INSN_COST); + format %{ "$2 $dst, $src1, zr" %} + + ins_encode %{ + __ $2(as_Register($dst$$reg), + as_Register($src1$$reg), + zr, + Assembler::LSL, 0); + %} + + ins_pipe(ialu_reg); +%}')dnl +dnl +define(`BOTH_SHIFT_INSNS', +`BASE_SHIFT_INSN(I, $1, ifelse($2,andr,andw,$2w), $3, $4) +BASE_SHIFT_INSN(L, $1, $2, $3, $4)')dnl +dnl +define(`BOTH_INVERTED_INSNS', +`BASE_INVERTED_INSN(I, $1, $2w, $3, $4) +BASE_INVERTED_INSN(L, $1, $2, $3, $4)')dnl +dnl +define(`BOTH_INVERTED_SHIFT_INSNS', +`INVERTED_SHIFT_INSN(I, $1, $2w, $3, $4, ~0, int) +INVERTED_SHIFT_INSN(L, $1, $2, $3, $4, ~0l, long)')dnl +dnl +define(`ALL_SHIFT_KINDS', +`BOTH_SHIFT_INSNS($1, $2, URShift, LSR) +BOTH_SHIFT_INSNS($1, $2, RShift, ASR) +BOTH_SHIFT_INSNS($1, $2, LShift, LSL)')dnl +dnl +define(`ALL_INVERTED_SHIFT_KINDS', +`BOTH_INVERTED_SHIFT_INSNS($1, $2, URShift, LSR) +BOTH_INVERTED_SHIFT_INSNS($1, $2, RShift, ASR) +BOTH_INVERTED_SHIFT_INSNS($1, $2, LShift, LSL)')dnl +dnl +NOT_INSN(L, eon) +NOT_INSN(I, eonw) +BOTH_INVERTED_INSNS(And, bic) +BOTH_INVERTED_INSNS(Or, orn) +BOTH_INVERTED_INSNS(Xor, eon) +ALL_INVERTED_SHIFT_KINDS(And, bic) +ALL_INVERTED_SHIFT_KINDS(Xor, eon) +ALL_INVERTED_SHIFT_KINDS(Or, orn) +ALL_SHIFT_KINDS(And, andr) +ALL_SHIFT_KINDS(Xor, eor) +ALL_SHIFT_KINDS(Or, orr) +ALL_SHIFT_KINDS(Add, add) +ALL_SHIFT_KINDS(Sub, sub) +dnl +dnl EXTEND mode, rshift_op, src, lshift_count, rshift_count +define(`EXTEND', `($2$1 (LShift$1 $3 $4) $5)') +define(`BFM_INSN',` +// Shift Left followed by Shift Right. +// This idiom is used by the compiler for the i2b bytecode etc. +instruct $4$1(iReg$1NoSp dst, iReg$1`'ORL2I($1) src, immI lshift_count, immI rshift_count) +%{ + match(Set dst EXTEND($1, $3, src, lshift_count, rshift_count)); + // Make sure we are not going to exceed what $4 can do. + predicate((unsigned int)n->in(2)->get_int() <= $2 + && (unsigned int)n->in(1)->in(2)->get_int() <= $2); + + ins_cost(INSN_COST * 2); + format %{ "$4 $dst, $src, $rshift_count - $lshift_count, #$2 - $lshift_count" %} + ins_encode %{ + int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant; + int s = $2 - lshift; + int r = (rshift - lshift) & $2; + __ $4(as_Register($dst$$reg), + as_Register($src$$reg), + r, s); + %} + + ins_pipe(ialu_reg_shift); +%}') +BFM_INSN(L, 63, RShift, sbfm) +BFM_INSN(I, 31, RShift, sbfmw) +BFM_INSN(L, 63, URShift, ubfm) +BFM_INSN(I, 31, URShift, ubfmw) +dnl +// Bitfield extract with shift & mask +define(`BFX_INSN', +`instruct $3$1(iReg$1NoSp dst, iReg$1`'ORL2I($1) src, immI rshift, imm$1_bitmask mask) +%{ + match(Set dst (And$1 ($2$1 src rshift) mask)); + // Make sure we are not going to exceed what $3 can do. + predicate((exact_log2$6(n->in(2)->get_$5() + 1) + (n->in(1)->in(2)->get_int() & $4)) <= ($4 + 1)); + + ins_cost(INSN_COST); + format %{ "$3 $dst, $src, $mask" %} + ins_encode %{ + int rshift = $rshift$$constant & $4; + long mask = $mask$$constant; + int width = exact_log2$6(mask+1); + __ $3(as_Register($dst$$reg), + as_Register($src$$reg), rshift, width); + %} + ins_pipe(ialu_reg_shift); +%}') +BFX_INSN(I, URShift, ubfxw, 31, int) +BFX_INSN(L, URShift, ubfx, 63, long, _long) + +// We can use ubfx when extending an And with a mask when we know mask +// is positive. We know that because immI_bitmask guarantees it. +instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask) +%{ + match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask))); + // Make sure we are not going to exceed what ubfxw can do. + predicate((exact_log2(n->in(1)->in(2)->get_int() + 1) + (n->in(1)->in(1)->in(2)->get_int() & 31)) <= (31 + 1)); + + ins_cost(INSN_COST * 2); + format %{ "ubfx $dst, $src, $mask" %} + ins_encode %{ + int rshift = $rshift$$constant & 31; + long mask = $mask$$constant; + int width = exact_log2(mask+1); + __ ubfx(as_Register($dst$$reg), + as_Register($src$$reg), rshift, width); + %} + ins_pipe(ialu_reg_shift); +%} + +// Rotations + +define(`EXTRACT_INSN', +`instruct extr$3$1(iReg$1NoSp dst, iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, immI lshift, immI rshift, rFlagsReg cr) +%{ + match(Set dst ($3$1 (LShift$1 src1 lshift) (URShift$1 src2 rshift))); + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & $2)); + + ins_cost(INSN_COST); + format %{ "extr $dst, $src1, $src2, #$rshift" %} + + ins_encode %{ + __ $4(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), + $rshift$$constant & $2); + %} + ins_pipe(ialu_reg_reg_extr); +%} +')dnl +EXTRACT_INSN(L, 63, Or, extr) +EXTRACT_INSN(I, 31, Or, extrw) +EXTRACT_INSN(L, 63, Add, extr) +EXTRACT_INSN(I, 31, Add, extrw) +define(`ROL_EXPAND', ` +// $2 expander + +instruct $2$1_rReg(iReg$1NoSp dst, iReg$1 src, iRegI shift, rFlagsReg cr) +%{ + effect(DEF dst, USE src, USE shift); + + format %{ "$2 $dst, $src, $shift" %} + ins_cost(INSN_COST * 3); + ins_encode %{ + __ subw(rscratch1, zr, as_Register($shift$$reg)); + __ $3(as_Register($dst$$reg), as_Register($src$$reg), + rscratch1); + %} + ins_pipe(ialu_reg_reg_vshift); +%}')dnl +define(`ROR_EXPAND', ` +// $2 expander + +instruct $2$1_rReg(iReg$1NoSp dst, iReg$1 src, iRegI shift, rFlagsReg cr) +%{ + effect(DEF dst, USE src, USE shift); + + format %{ "$2 $dst, $src, $shift" %} + ins_cost(INSN_COST); + ins_encode %{ + __ $3(as_Register($dst$$reg), as_Register($src$$reg), + as_Register($shift$$reg)); + %} + ins_pipe(ialu_reg_reg_vshift); +%}')dnl +define(ROL_INSN, ` +instruct $3$1_rReg_Var_C$2(iReg$1NoSp dst, iReg$1 src, iRegI shift, immI$2 c$2, rFlagsReg cr) +%{ + match(Set dst (Or$1 (LShift$1 src shift) (URShift$1 src (SubI c$2 shift)))); + + expand %{ + $3$1_rReg(dst, src, shift, cr); + %} +%}')dnl +define(ROR_INSN, ` +instruct $3$1_rReg_Var_C$2(iReg$1NoSp dst, iReg$1 src, iRegI shift, immI$2 c$2, rFlagsReg cr) +%{ + match(Set dst (Or$1 (URShift$1 src shift) (LShift$1 src (SubI c$2 shift)))); + + expand %{ + $3$1_rReg(dst, src, shift, cr); + %} +%}')dnl +ROL_EXPAND(L, rol, rorv) +ROL_EXPAND(I, rol, rorvw) +ROL_INSN(L, _64, rol) +ROL_INSN(L, 0, rol) +ROL_INSN(I, _32, rol) +ROL_INSN(I, 0, rol) +ROR_EXPAND(L, ror, rorv) +ROR_EXPAND(I, ror, rorvw) +ROR_INSN(L, _64, ror) +ROR_INSN(L, 0, ror) +ROR_INSN(I, _32, ror) +ROR_INSN(I, 0, ror) + +// Add/subtract (extended) +dnl ADD_SUB_EXTENDED(mode, size, add node, shift node, insn, shift type, wordsize +define(`ADD_SUB_CONV', ` +instruct $3Ext$1(iReg$2NoSp dst, iReg$2`'ORL2I($2) src1, iReg$1`'ORL2I($1) src2, rFlagsReg cr) +%{ + match(Set dst ($3$2 src1 (ConvI2L src2))); + ins_cost(INSN_COST); + format %{ "$4 $dst, $src1, $5 $src2" %} + + ins_encode %{ + __ $4(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::$5); + %} + ins_pipe(ialu_reg_reg); +%}')dnl +ADD_SUB_CONV(I,L,Add,add,sxtw); +ADD_SUB_CONV(I,L,Sub,sub,sxtw); +dnl +define(`ADD_SUB_EXTENDED', ` +instruct $3Ext$1_$6(iReg$1NoSp dst, iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, immI_`'eval($7-$2) lshift, immI_`'eval($7-$2) rshift, rFlagsReg cr) +%{ + match(Set dst ($3$1 src1 EXTEND($1, $4, src2, lshift, rshift))); + ins_cost(INSN_COST); + format %{ "$5 $dst, $src1, $6 $src2" %} + + ins_encode %{ + __ $5(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::$6); + %} + ins_pipe(ialu_reg_reg); +%}') +ADD_SUB_EXTENDED(I,16,Add,RShift,add,sxth,32) +ADD_SUB_EXTENDED(I,8,Add,RShift,add,sxtb,32) +ADD_SUB_EXTENDED(I,8,Add,URShift,add,uxtb,32) +ADD_SUB_EXTENDED(L,16,Add,RShift,add,sxth,64) +ADD_SUB_EXTENDED(L,32,Add,RShift,add,sxtw,64) +ADD_SUB_EXTENDED(L,8,Add,RShift,add,sxtb,64) +ADD_SUB_EXTENDED(L,8,Add,URShift,add,uxtb,64) +dnl +dnl ADD_SUB_ZERO_EXTEND(mode, size, add node, insn, shift type) +define(`ADD_SUB_ZERO_EXTEND', ` +instruct $3Ext$1_$5_and(iReg$1NoSp dst, iReg$1`'ORL2I($1) src1, iReg$1`'ORL2I($1) src2, imm$1_$2 mask, rFlagsReg cr) +%{ + match(Set dst ($3$1 src1 (And$1 src2 mask))); + ins_cost(INSN_COST); + format %{ "$4 $dst, $src1, $src2, $5" %} + + ins_encode %{ + __ $4(as_Register($dst$$reg), as_Register($src1$$reg), + as_Register($src2$$reg), ext::$5); + %} + ins_pipe(ialu_reg_reg); +%}') +dnl +ADD_SUB_ZERO_EXTEND(I,255,Add,addw,uxtb) +ADD_SUB_ZERO_EXTEND(I,65535,Add,addw,uxth) +ADD_SUB_ZERO_EXTEND(L,255,Add,add,uxtb) +ADD_SUB_ZERO_EXTEND(L,65535,Add,add,uxth) +ADD_SUB_ZERO_EXTEND(L,4294967295,Add,add,uxtw) +dnl +ADD_SUB_ZERO_EXTEND(I,255,Sub,subw,uxtb) +ADD_SUB_ZERO_EXTEND(I,65535,Sub,subw,uxth) +ADD_SUB_ZERO_EXTEND(L,255,Sub,sub,uxtb) +ADD_SUB_ZERO_EXTEND(L,65535,Sub,sub,uxth) +ADD_SUB_ZERO_EXTEND(L,4294967295,Sub,sub,uxtw) + +// END This section of the file is automatically generated. Do not edit -------------- --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/ad_encode.m4 2021-01-25 19:31:24.691349874 +0000 @@ -0,0 +1,73 @@ +define(choose, `loadStore($1, &MacroAssembler::$3, $2, $4, + $5, $6, $7, $8);dnl + + %}')dnl +define(access, ` + $3Register $1_reg = as_$3Register($$1$$reg); + $4choose(MacroAssembler(&cbuf), $1_reg,$2,$mem->opcode(), + as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)')dnl +define(load,` + enc_class aarch64_enc_$2($1 dst, memory mem) %{dnl +access(dst,$2,$3)')dnl +load(iRegI,ldrsbw) +load(iRegI,ldrsb) +load(iRegI,ldrb) +load(iRegL,ldrb) +load(iRegI,ldrshw) +load(iRegI,ldrsh) +load(iRegI,ldrh) +load(iRegL,ldrh) +load(iRegI,ldrw) +load(iRegL,ldrw) +load(iRegL,ldrsw) +load(iRegL,ldr) +load(vRegF,ldrs,Float) +load(vRegD,ldrd,Float) +define(STORE,` + enc_class aarch64_enc_$2($1 src, memory mem) %{dnl +access(src,$2,$3,$4)')dnl +define(STORE0,` + enc_class aarch64_enc_$2`'0(memory mem) %{ + MacroAssembler _masm(&cbuf); + choose(_masm,zr,$2,$mem->opcode(), + as_$3Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)')dnl +STORE(iRegI,strb) +STORE0(iRegI,strb) +STORE(iRegI,strh) +STORE0(iRegI,strh) +STORE(iRegI,strw) +STORE0(iRegI,strw) +STORE(iRegL,str,, +`// we sometimes get asked to store the stack pointer into the + // current thread -- we cannot do that directly on AArch64 + if (src_reg == r31_sp) { + MacroAssembler _masm(&cbuf); + assert(as_Register($mem$$base) == rthread, "unexpected store for sp"); + __ mov(rscratch2, sp); + src_reg = rscratch2; + } + ') +STORE0(iRegL,str) +STORE(vRegF,strs,Float) +STORE(vRegD,strd,Float) + + enc_class aarch64_enc_strw_immn(immN src, memory mem) %{ + MacroAssembler _masm(&cbuf); + address con = (address)$src$$constant; + // need to do this the hard way until we can manage relocs + // for 32 bit constants + __ movoop(rscratch2, (jobject)con); + if (con) __ encode_heap_oop_not_null(rscratch2); + choose(_masm,rscratch2,strw,$mem->opcode(), + as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp) + + enc_class aarch64_enc_strw_immnk(immN src, memory mem) %{ + MacroAssembler _masm(&cbuf); + address con = (address)$src$$constant; + // need to do this the hard way until we can manage relocs + // for 32 bit constants + __ movoop(rscratch2, (jobject)con); + __ encode_klass_not_null(rscratch2); + choose(_masm,rscratch2,strw,$mem->opcode(), + as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp) + --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/assembler_aarch64.cpp 2021-01-25 19:31:25.135354540 +0000 @@ -0,0 +1,1528 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020 Red Hat Inc. All rights reserved. + * reserved. DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE + * HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "interpreter/interpreter.hpp" + +#ifndef PRODUCT +const unsigned long Assembler::asm_bp = 0x00007fffee09ac88; +#endif + +#include "compiler/disassembler.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/sharedRuntime.hpp" +#include "immediate_aarch64.hpp" + +// #include "gc_interface/collectedHeap.inline.hpp" +// #include "interpreter/interpreter.hpp" +// #include "memory/cardTableModRefBS.hpp" +// #include "prims/methodHandles.hpp" +// #include "runtime/biasedLocking.hpp" +// #include "runtime/interfaceSupport.hpp" +// #include "runtime/objectMonitor.hpp" +// #include "runtime/os.hpp" +// #include "runtime/sharedRuntime.hpp" +// #include "runtime/stubRoutines.hpp" +// #if INCLUDE_ALL_GCS +// #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +// #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +// #include "gc_implementation/g1/heapRegion.hpp" +// #endif + + +extern "C" void entry(CodeBuffer *cb); + +#define __ _masm. +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) block_comment(str) +#endif + +#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") + +static float unpack(unsigned value); + +#ifdef ASSERT + +void entry(CodeBuffer *cb) { + + // { + // for (int i = 0; i < 256; i+=16) + // { + // printf("\"%20.20g\", ", unpack(i)); + // printf("\"%20.20g\", ", unpack(i+1)); + // } + // printf("\n"); + // } + + Assembler _masm(cb); + address entry = __ pc(); + + // Smoke test for assembler + +// BEGIN Generated code -- do not edit +// Generated by aarch64-asmtest.py + Label back, forth; + __ bind(back); + +// ArithOp + __ add(r19, r22, r7, Assembler::LSL, 28); // add x19, x22, x7, LSL #28 + __ sub(r16, r11, r10, Assembler::LSR, 13); // sub x16, x11, x10, LSR #13 + __ adds(r27, r13, r28, Assembler::ASR, 2); // adds x27, x13, x28, ASR #2 + __ subs(r20, r28, r26, Assembler::ASR, 41); // subs x20, x28, x26, ASR #41 + __ addw(r8, r19, r19, Assembler::ASR, 19); // add w8, w19, w19, ASR #19 + __ subw(r4, r9, r10, Assembler::LSL, 14); // sub w4, w9, w10, LSL #14 + __ addsw(r8, r11, r30, Assembler::LSL, 13); // adds w8, w11, w30, LSL #13 + __ subsw(r0, r25, r19, Assembler::LSL, 9); // subs w0, w25, w19, LSL #9 + __ andr(r20, r0, r21, Assembler::LSL, 19); // and x20, x0, x21, LSL #19 + __ orr(r21, r14, r20, Assembler::LSL, 17); // orr x21, x14, x20, LSL #17 + __ eor(r25, r28, r1, Assembler::LSL, 51); // eor x25, x28, x1, LSL #51 + __ ands(r10, r27, r11, Assembler::ASR, 15); // ands x10, x27, x11, ASR #15 + __ andw(r25, r5, r12, Assembler::ASR, 23); // and w25, w5, w12, ASR #23 + __ orrw(r18, r14, r10, Assembler::LSR, 4); // orr w18, w14, w10, LSR #4 + __ eorw(r4, r21, r5, Assembler::ASR, 22); // eor w4, w21, w5, ASR #22 + __ andsw(r21, r0, r5, Assembler::ASR, 29); // ands w21, w0, w5, ASR #29 + __ bic(r26, r30, r6, Assembler::ASR, 37); // bic x26, x30, x6, ASR #37 + __ orn(r3, r1, r13, Assembler::LSR, 29); // orn x3, x1, x13, LSR #29 + __ eon(r0, r28, r9, Assembler::LSL, 47); // eon x0, x28, x9, LSL #47 + __ bics(r29, r5, r28, Assembler::LSL, 46); // bics x29, x5, x28, LSL #46 + __ bicw(r9, r18, r7, Assembler::LSR, 20); // bic w9, w18, w7, LSR #20 + __ ornw(r26, r13, r25, Assembler::ASR, 24); // orn w26, w13, w25, ASR #24 + __ eonw(r25, r4, r19, Assembler::LSL, 6); // eon w25, w4, w19, LSL #6 + __ bicsw(r5, r26, r4, Assembler::LSR, 24); // bics w5, w26, w4, LSR #24 + +// AddSubImmOp + __ addw(r7, r19, 340u); // add w7, w19, #340 + __ addsw(r8, r0, 401u); // adds w8, w0, #401 + __ subw(r29, r20, 163u); // sub w29, w20, #163 + __ subsw(r8, r23, 759u); // subs w8, w23, #759 + __ add(r1, r12, 523u); // add x1, x12, #523 + __ adds(r2, r11, 426u); // adds x2, x11, #426 + __ sub(r14, r29, 716u); // sub x14, x29, #716 + __ subs(r11, r5, 582u); // subs x11, x5, #582 + +// LogicalImmOp + __ andw(r23, r22, 32768ul); // and w23, w22, #0x8000 + __ orrw(r4, r10, 4042322160ul); // orr w4, w10, #0xf0f0f0f0 + __ eorw(r0, r24, 4042322160ul); // eor w0, w24, #0xf0f0f0f0 + __ andsw(r19, r29, 2139127680ul); // ands w19, w29, #0x7f807f80 + __ andr(r5, r10, 4503599627354112ul); // and x5, x10, #0xfffffffffc000 + __ orr(r12, r30, 18445618178097414144ul); // orr x12, x30, #0xfffc0000fffc0000 + __ eor(r30, r5, 262128ul); // eor x30, x5, #0x3fff0 + __ ands(r26, r23, 4194300ul); // ands x26, x23, #0x3ffffc + +// AbsOp + __ b(__ pc()); // b . + __ b(back); // b back + __ b(forth); // b forth + __ bl(__ pc()); // bl . + __ bl(back); // bl back + __ bl(forth); // bl forth + +// RegAndAbsOp + __ cbzw(r12, __ pc()); // cbz w12, . + __ cbzw(r12, back); // cbz w12, back + __ cbzw(r12, forth); // cbz w12, forth + __ cbnzw(r20, __ pc()); // cbnz w20, . + __ cbnzw(r20, back); // cbnz w20, back + __ cbnzw(r20, forth); // cbnz w20, forth + __ cbz(r12, __ pc()); // cbz x12, . + __ cbz(r12, back); // cbz x12, back + __ cbz(r12, forth); // cbz x12, forth + __ cbnz(r24, __ pc()); // cbnz x24, . + __ cbnz(r24, back); // cbnz x24, back + __ cbnz(r24, forth); // cbnz x24, forth + __ adr(r6, __ pc()); // adr x6, . + __ adr(r6, back); // adr x6, back + __ adr(r6, forth); // adr x6, forth + __ _adrp(r21, __ pc()); // adrp x21, . + +// RegImmAbsOp + __ tbz(r1, 1, __ pc()); // tbz x1, #1, . + __ tbz(r1, 1, back); // tbz x1, #1, back + __ tbz(r1, 1, forth); // tbz x1, #1, forth + __ tbnz(r8, 9, __ pc()); // tbnz x8, #9, . + __ tbnz(r8, 9, back); // tbnz x8, #9, back + __ tbnz(r8, 9, forth); // tbnz x8, #9, forth + +// MoveWideImmOp + __ movnw(r12, 23175, 0); // movn w12, #23175, lsl 0 + __ movzw(r11, 20476, 16); // movz w11, #20476, lsl 16 + __ movkw(r21, 3716, 0); // movk w21, #3716, lsl 0 + __ movn(r29, 28661, 48); // movn x29, #28661, lsl 48 + __ movz(r3, 6927, 0); // movz x3, #6927, lsl 0 + __ movk(r22, 9828, 16); // movk x22, #9828, lsl 16 + +// BitfieldOp + __ sbfm(r12, r8, 6, 22); // sbfm x12, x8, #6, #22 + __ bfmw(r19, r25, 25, 19); // bfm w19, w25, #25, #19 + __ ubfmw(r9, r12, 29, 15); // ubfm w9, w12, #29, #15 + __ sbfm(r28, r25, 16, 16); // sbfm x28, x25, #16, #16 + __ bfm(r12, r5, 4, 25); // bfm x12, x5, #4, #25 + __ ubfm(r0, r10, 6, 8); // ubfm x0, x10, #6, #8 + +// ExtractOp + __ extrw(r4, r13, r26, 24); // extr w4, w13, w26, #24 + __ extr(r23, r30, r24, 31); // extr x23, x30, x24, #31 + +// CondBranchOp + __ br(Assembler::EQ, __ pc()); // b.EQ . + __ br(Assembler::EQ, back); // b.EQ back + __ br(Assembler::EQ, forth); // b.EQ forth + __ br(Assembler::NE, __ pc()); // b.NE . + __ br(Assembler::NE, back); // b.NE back + __ br(Assembler::NE, forth); // b.NE forth + __ br(Assembler::HS, __ pc()); // b.HS . + __ br(Assembler::HS, back); // b.HS back + __ br(Assembler::HS, forth); // b.HS forth + __ br(Assembler::CS, __ pc()); // b.CS . + __ br(Assembler::CS, back); // b.CS back + __ br(Assembler::CS, forth); // b.CS forth + __ br(Assembler::LO, __ pc()); // b.LO . + __ br(Assembler::LO, back); // b.LO back + __ br(Assembler::LO, forth); // b.LO forth + __ br(Assembler::CC, __ pc()); // b.CC . + __ br(Assembler::CC, back); // b.CC back + __ br(Assembler::CC, forth); // b.CC forth + __ br(Assembler::MI, __ pc()); // b.MI . + __ br(Assembler::MI, back); // b.MI back + __ br(Assembler::MI, forth); // b.MI forth + __ br(Assembler::PL, __ pc()); // b.PL . + __ br(Assembler::PL, back); // b.PL back + __ br(Assembler::PL, forth); // b.PL forth + __ br(Assembler::VS, __ pc()); // b.VS . + __ br(Assembler::VS, back); // b.VS back + __ br(Assembler::VS, forth); // b.VS forth + __ br(Assembler::VC, __ pc()); // b.VC . + __ br(Assembler::VC, back); // b.VC back + __ br(Assembler::VC, forth); // b.VC forth + __ br(Assembler::HI, __ pc()); // b.HI . + __ br(Assembler::HI, back); // b.HI back + __ br(Assembler::HI, forth); // b.HI forth + __ br(Assembler::LS, __ pc()); // b.LS . + __ br(Assembler::LS, back); // b.LS back + __ br(Assembler::LS, forth); // b.LS forth + __ br(Assembler::GE, __ pc()); // b.GE . + __ br(Assembler::GE, back); // b.GE back + __ br(Assembler::GE, forth); // b.GE forth + __ br(Assembler::LT, __ pc()); // b.LT . + __ br(Assembler::LT, back); // b.LT back + __ br(Assembler::LT, forth); // b.LT forth + __ br(Assembler::GT, __ pc()); // b.GT . + __ br(Assembler::GT, back); // b.GT back + __ br(Assembler::GT, forth); // b.GT forth + __ br(Assembler::LE, __ pc()); // b.LE . + __ br(Assembler::LE, back); // b.LE back + __ br(Assembler::LE, forth); // b.LE forth + __ br(Assembler::AL, __ pc()); // b.AL . + __ br(Assembler::AL, back); // b.AL back + __ br(Assembler::AL, forth); // b.AL forth + __ br(Assembler::NV, __ pc()); // b.NV . + __ br(Assembler::NV, back); // b.NV back + __ br(Assembler::NV, forth); // b.NV forth + +// ImmOp + __ svc(12729); // svc #12729 + __ hvc(6788); // hvc #6788 + __ smc(1535); // smc #1535 + __ brk(16766); // brk #16766 + __ hlt(9753); // hlt #9753 + +// Op + __ nop(); // nop + __ eret(); // eret + __ drps(); // drps + __ isb(); // isb + +// SystemOp + __ dsb(Assembler::SY); // dsb SY + __ dmb(Assembler::ISHST); // dmb ISHST + +// OneRegOp + __ br(r2); // br x2 + __ blr(r5); // blr x5 + +// LoadStoreExclusiveOp + __ stxr(r20, r21, r2); // stxr w20, x21, [x2] + __ stlxr(r5, r29, r7); // stlxr w5, x29, [x7] + __ ldxr(r5, r16); // ldxr x5, [x16] + __ ldaxr(r27, r29); // ldaxr x27, [x29] + __ stlr(r0, r29); // stlr x0, [x29] + __ ldar(r21, r28); // ldar x21, [x28] + +// LoadStoreExclusiveOp + __ stxrw(r21, r24, r7); // stxr w21, w24, [x7] + __ stlxrw(r21, r26, r28); // stlxr w21, w26, [x28] + __ ldxrw(r21, r6); // ldxr w21, [x6] + __ ldaxrw(r15, r30); // ldaxr w15, [x30] + __ stlrw(r19, r3); // stlr w19, [x3] + __ ldarw(r22, r2); // ldar w22, [x2] + +// LoadStoreExclusiveOp + __ stxrh(r18, r15, r0); // stxrh w18, w15, [x0] + __ stlxrh(r11, r5, r28); // stlxrh w11, w5, [x28] + __ ldxrh(r29, r6); // ldxrh w29, [x6] + __ ldaxrh(r18, r7); // ldaxrh w18, [x7] + __ stlrh(r25, r28); // stlrh w25, [x28] + __ ldarh(r2, r19); // ldarh w2, [x19] + +// LoadStoreExclusiveOp + __ stxrb(r10, r30, r1); // stxrb w10, w30, [x1] + __ stlxrb(r20, r21, r22); // stlxrb w20, w21, [x22] + __ ldxrb(r25, r2); // ldxrb w25, [x2] + __ ldaxrb(r24, r5); // ldaxrb w24, [x5] + __ stlrb(r16, r3); // stlrb w16, [x3] + __ ldarb(r22, r29); // ldarb w22, [x29] + +// LoadStoreExclusiveOp + __ ldxp(r8, r2, r19); // ldxp x8, x2, [x19] + __ ldaxp(r7, r19, r14); // ldaxp x7, x19, [x14] + __ stxp(r8, r27, r28, r5); // stxp w8, x27, x28, [x5] + __ stlxp(r5, r8, r14, r6); // stlxp w5, x8, x14, [x6] + +// LoadStoreExclusiveOp + __ ldxpw(r25, r4, r22); // ldxp w25, w4, [x22] + __ ldaxpw(r13, r14, r15); // ldaxp w13, w14, [x15] + __ stxpw(r20, r26, r8, r10); // stxp w20, w26, w8, [x10] + __ stlxpw(r23, r18, r18, r18); // stlxp w23, w18, w18, [x18] + +// base_plus_unscaled_offset +// LoadStoreOp + __ str(r30, Address(r11, 99)); // str x30, [x11, 99] + __ strw(r23, Address(r25, -77)); // str w23, [x25, -77] + __ strb(r2, Address(r14, 3)); // strb w2, [x14, 3] + __ strh(r9, Address(r10, 5)); // strh w9, [x10, 5] + __ ldr(r20, Address(r15, 57)); // ldr x20, [x15, 57] + __ ldrw(r12, Address(r16, -78)); // ldr w12, [x16, -78] + __ ldrb(r22, Address(r26, -3)); // ldrb w22, [x26, -3] + __ ldrh(r30, Address(r19, -47)); // ldrh w30, [x19, -47] + __ ldrsb(r9, Address(r10, -12)); // ldrsb x9, [x10, -12] + __ ldrsh(r28, Address(r17, 14)); // ldrsh x28, [x17, 14] + __ ldrshw(r3, Address(r5, 10)); // ldrsh w3, [x5, 10] + __ ldrsw(r17, Address(r17, -91)); // ldrsw x17, [x17, -91] + __ ldrd(v2, Address(r20, -17)); // ldr d2, [x20, -17] + __ ldrs(v22, Address(r7, -10)); // ldr s22, [x7, -10] + __ strd(v30, Address(r18, -223)); // str d30, [x18, -223] + __ strs(v13, Address(r22, 21)); // str s13, [x22, 21] + +// pre +// LoadStoreOp + __ str(r9, Address(__ pre(r18, -112))); // str x9, [x18, -112]! + __ strw(r29, Address(__ pre(r23, 11))); // str w29, [x23, 11]! + __ strb(r18, Address(__ pre(r12, -1))); // strb w18, [x12, -1]! + __ strh(r16, Address(__ pre(r20, -23))); // strh w16, [x20, -23]! + __ ldr(r3, Address(__ pre(r29, 9))); // ldr x3, [x29, 9]! + __ ldrw(r25, Address(__ pre(r3, 19))); // ldr w25, [x3, 19]! + __ ldrb(r1, Address(__ pre(r29, -1))); // ldrb w1, [x29, -1]! + __ ldrh(r8, Address(__ pre(r29, -57))); // ldrh w8, [x29, -57]! + __ ldrsb(r5, Address(__ pre(r14, -13))); // ldrsb x5, [x14, -13]! + __ ldrsh(r10, Address(__ pre(r27, 1))); // ldrsh x10, [x27, 1]! + __ ldrshw(r11, Address(__ pre(r10, 25))); // ldrsh w11, [x10, 25]! + __ ldrsw(r4, Address(__ pre(r22, -92))); // ldrsw x4, [x22, -92]! + __ ldrd(v11, Address(__ pre(r23, 8))); // ldr d11, [x23, 8]! + __ ldrs(v25, Address(__ pre(r19, 54))); // ldr s25, [x19, 54]! + __ strd(v1, Address(__ pre(r7, -174))); // str d1, [x7, -174]! + __ strs(v8, Address(__ pre(r25, 54))); // str s8, [x25, 54]! + +// post +// LoadStoreOp + __ str(r5, Address(__ post(r11, 37))); // str x5, [x11], 37 + __ strw(r24, Address(__ post(r15, 19))); // str w24, [x15], 19 + __ strb(r15, Address(__ post(r26, -1))); // strb w15, [x26], -1 + __ strh(r18, Address(__ post(r18, -6))); // strh w18, [x18], -6 + __ ldr(r7, Address(__ post(r2, -230))); // ldr x7, [x2], -230 + __ ldrw(r27, Address(__ post(r11, -27))); // ldr w27, [x11], -27 + __ ldrb(r18, Address(__ post(r3, -25))); // ldrb w18, [x3], -25 + __ ldrh(r10, Address(__ post(r24, -32))); // ldrh w10, [x24], -32 + __ ldrsb(r22, Address(__ post(r10, 4))); // ldrsb x22, [x10], 4 + __ ldrsh(r17, Address(__ post(r12, 25))); // ldrsh x17, [x12], 25 + __ ldrshw(r8, Address(__ post(r7, -62))); // ldrsh w8, [x7], -62 + __ ldrsw(r23, Address(__ post(r22, -51))); // ldrsw x23, [x22], -51 + __ ldrd(v24, Address(__ post(r25, 48))); // ldr d24, [x25], 48 + __ ldrs(v21, Address(__ post(r12, -10))); // ldr s21, [x12], -10 + __ strd(v18, Address(__ post(r13, -222))); // str d18, [x13], -222 + __ strs(v16, Address(__ post(r1, -41))); // str s16, [x1], -41 + +// base_plus_reg +// LoadStoreOp + __ str(r2, Address(r22, r15, Address::sxtw(0))); // str x2, [x22, w15, sxtw #0] + __ strw(r2, Address(r16, r29, Address::lsl(0))); // str w2, [x16, x29, lsl #0] + __ strb(r20, Address(r18, r14, Address::uxtw(0))); // strb w20, [x18, w14, uxtw #0] + __ strh(r6, Address(r19, r20, Address::sxtx(1))); // strh w6, [x19, x20, sxtx #1] + __ ldr(r14, Address(r29, r14, Address::sxtw(0))); // ldr x14, [x29, w14, sxtw #0] + __ ldrw(r16, Address(r20, r12, Address::sxtw(2))); // ldr w16, [x20, w12, sxtw #2] + __ ldrb(r9, Address(r12, r0, Address::sxtw(0))); // ldrb w9, [x12, w0, sxtw #0] + __ ldrh(r12, Address(r17, r3, Address::lsl(1))); // ldrh w12, [x17, x3, lsl #1] + __ ldrsb(r2, Address(r17, r3, Address::sxtx(0))); // ldrsb x2, [x17, x3, sxtx #0] + __ ldrsh(r7, Address(r1, r17, Address::uxtw(1))); // ldrsh x7, [x1, w17, uxtw #1] + __ ldrshw(r25, Address(r15, r18, Address::sxtw(1))); // ldrsh w25, [x15, w18, sxtw #1] + __ ldrsw(r23, Address(r21, r12, Address::lsl(0))); // ldrsw x23, [x21, x12, lsl #0] + __ ldrd(v5, Address(r13, r8, Address::lsl(3))); // ldr d5, [x13, x8, lsl #3] + __ ldrs(v3, Address(r10, r22, Address::lsl(2))); // ldr s3, [x10, x22, lsl #2] + __ strd(v14, Address(r2, r27, Address::sxtw(0))); // str d14, [x2, w27, sxtw #0] + __ strs(v20, Address(r6, r25, Address::lsl(0))); // str s20, [x6, x25, lsl #0] + +// base_plus_scaled_offset +// LoadStoreOp + __ str(r30, Address(r7, 16256)); // str x30, [x7, 16256] + __ strw(r15, Address(r8, 7588)); // str w15, [x8, 7588] + __ strb(r11, Address(r0, 1866)); // strb w11, [x0, 1866] + __ strh(r3, Address(r17, 3734)); // strh w3, [x17, 3734] + __ ldr(r2, Address(r7, 14224)); // ldr x2, [x7, 14224] + __ ldrw(r5, Address(r9, 7396)); // ldr w5, [x9, 7396] + __ ldrb(r28, Address(r9, 1721)); // ldrb w28, [x9, 1721] + __ ldrh(r2, Address(r20, 3656)); // ldrh w2, [x20, 3656] + __ ldrsb(r22, Address(r14, 1887)); // ldrsb x22, [x14, 1887] + __ ldrsh(r8, Address(r0, 4080)); // ldrsh x8, [x0, 4080] + __ ldrshw(r0, Address(r30, 3916)); // ldrsh w0, [x30, 3916] + __ ldrsw(r24, Address(r19, 6828)); // ldrsw x24, [x19, 6828] + __ ldrd(v24, Address(r12, 13032)); // ldr d24, [x12, 13032] + __ ldrs(v8, Address(r8, 7452)); // ldr s8, [x8, 7452] + __ strd(v10, Address(r15, 15992)); // str d10, [x15, 15992] + __ strs(v26, Address(r19, 6688)); // str s26, [x19, 6688] + +// pcrel +// LoadStoreOp + __ ldr(r10, forth); // ldr x10, forth + __ ldrw(r3, __ pc()); // ldr w3, . + +// LoadStoreOp + __ prfm(Address(r23, 9)); // prfm PLDL1KEEP, [x23, 9] + +// LoadStoreOp + __ prfm(back); // prfm PLDL1KEEP, back + +// LoadStoreOp + __ prfm(Address(r3, r8, Address::uxtw(0))); // prfm PLDL1KEEP, [x3, w8, uxtw #0] + +// LoadStoreOp + __ prfm(Address(r11, 15080)); // prfm PLDL1KEEP, [x11, 15080] + +// AddSubCarryOp + __ adcw(r13, r9, r28); // adc w13, w9, w28 + __ adcsw(r27, r19, r28); // adcs w27, w19, w28 + __ sbcw(r19, r18, r6); // sbc w19, w18, w6 + __ sbcsw(r14, r20, r3); // sbcs w14, w20, w3 + __ adc(r16, r14, r8); // adc x16, x14, x8 + __ adcs(r0, r29, r8); // adcs x0, x29, x8 + __ sbc(r8, r24, r20); // sbc x8, x24, x20 + __ sbcs(r12, r28, r0); // sbcs x12, x28, x0 + +// AddSubExtendedOp + __ addw(r23, r6, r16, ext::uxtb, 4); // add w23, w6, w16, uxtb #4 + __ addsw(r25, r25, r23, ext::sxth, 2); // adds w25, w25, w23, sxth #2 + __ sub(r26, r22, r4, ext::uxtx, 1); // sub x26, x22, x4, uxtx #1 + __ subsw(r17, r29, r19, ext::sxtx, 3); // subs w17, w29, w19, sxtx #3 + __ add(r11, r30, r21, ext::uxtb, 3); // add x11, x30, x21, uxtb #3 + __ adds(r16, r19, r0, ext::sxtb, 2); // adds x16, x19, x0, sxtb #2 + __ sub(r11, r9, r25, ext::sxtx, 1); // sub x11, x9, x25, sxtx #1 + __ subs(r17, r20, r12, ext::sxtb, 4); // subs x17, x20, x12, sxtb #4 + +// ConditionalCompareOp + __ ccmnw(r13, r11, 3u, Assembler::LE); // ccmn w13, w11, #3, LE + __ ccmpw(r13, r12, 2u, Assembler::HI); // ccmp w13, w12, #2, HI + __ ccmn(r3, r2, 12u, Assembler::NE); // ccmn x3, x2, #12, NE + __ ccmp(r7, r21, 3u, Assembler::VS); // ccmp x7, x21, #3, VS + +// ConditionalCompareImmedOp + __ ccmnw(r2, 14, 4, Assembler::CC); // ccmn w2, #14, #4, CC + __ ccmpw(r17, 17, 6, Assembler::PL); // ccmp w17, #17, #6, PL + __ ccmn(r10, 12, 0, Assembler::CS); // ccmn x10, #12, #0, CS + __ ccmp(r21, 18, 14, Assembler::GE); // ccmp x21, #18, #14, GE + +// ConditionalSelectOp + __ cselw(r21, r13, r12, Assembler::GT); // csel w21, w13, w12, GT + __ csincw(r10, r27, r15, Assembler::LS); // csinc w10, w27, w15, LS + __ csinvw(r0, r13, r9, Assembler::HI); // csinv w0, w13, w9, HI + __ csnegw(r18, r4, r26, Assembler::VS); // csneg w18, w4, w26, VS + __ csel(r12, r29, r7, Assembler::LS); // csel x12, x29, x7, LS + __ csinc(r6, r7, r20, Assembler::VC); // csinc x6, x7, x20, VC + __ csinv(r22, r21, r3, Assembler::LE); // csinv x22, x21, x3, LE + __ csneg(r19, r12, r27, Assembler::LS); // csneg x19, x12, x27, LS + +// TwoRegOp + __ rbitw(r0, r16); // rbit w0, w16 + __ rev16w(r17, r23); // rev16 w17, w23 + __ revw(r17, r14); // rev w17, w14 + __ clzw(r24, r30); // clz w24, w30 + __ clsw(r24, r22); // cls w24, w22 + __ rbit(r3, r17); // rbit x3, x17 + __ rev16(r12, r13); // rev16 x12, x13 + __ rev32(r9, r22); // rev32 x9, x22 + __ rev(r0, r0); // rev x0, x0 + __ clz(r5, r16); // clz x5, x16 + __ cls(r25, r22); // cls x25, x22 + +// ThreeRegOp + __ udivw(r29, r4, r0); // udiv w29, w4, w0 + __ sdivw(r0, r29, r29); // sdiv w0, w29, w29 + __ lslvw(r5, r17, r21); // lslv w5, w17, w21 + __ lsrvw(r9, r9, r18); // lsrv w9, w9, w18 + __ asrvw(r1, r27, r8); // asrv w1, w27, w8 + __ rorvw(r18, r20, r13); // rorv w18, w20, w13 + __ udiv(r8, r25, r12); // udiv x8, x25, x12 + __ sdiv(r7, r5, r28); // sdiv x7, x5, x28 + __ lslv(r5, r17, r27); // lslv x5, x17, x27 + __ lsrv(r23, r26, r20); // lsrv x23, x26, x20 + __ asrv(r28, r8, r28); // asrv x28, x8, x28 + __ rorv(r3, r29, r4); // rorv x3, x29, x4 + +// FourRegMulOp + __ maddw(r17, r14, r26, r21); // madd w17, w14, w26, w21 + __ msubw(r1, r30, r11, r11); // msub w1, w30, w11, w11 + __ madd(r1, r17, r6, r28); // madd x1, x17, x6, x28 + __ msub(r30, r6, r30, r8); // msub x30, x6, x30, x8 + __ smaddl(r21, r6, r14, r8); // smaddl x21, w6, w14, x8 + __ smsubl(r10, r10, r24, r19); // smsubl x10, w10, w24, x19 + __ umaddl(r20, r18, r14, r24); // umaddl x20, w18, w14, x24 + __ umsubl(r18, r2, r5, r5); // umsubl x18, w2, w5, x5 + +// ThreeRegFloatOp + __ fmuls(v8, v18, v13); // fmul s8, s18, s13 + __ fdivs(v2, v14, v28); // fdiv s2, s14, s28 + __ fadds(v15, v12, v28); // fadd s15, s12, s28 + __ fsubs(v0, v12, v1); // fsub s0, s12, s1 + __ fmuls(v15, v29, v4); // fmul s15, s29, s4 + __ fmuld(v12, v1, v23); // fmul d12, d1, d23 + __ fdivd(v27, v8, v18); // fdiv d27, d8, d18 + __ faddd(v23, v20, v11); // fadd d23, d20, d11 + __ fsubd(v8, v12, v18); // fsub d8, d12, d18 + __ fmuld(v26, v24, v23); // fmul d26, d24, d23 + +// FourRegFloatOp + __ fmadds(v21, v23, v13, v25); // fmadd s21, s23, s13, s25 + __ fmsubs(v22, v10, v1, v14); // fmsub s22, s10, s1, s14 + __ fnmadds(v14, v20, v2, v30); // fnmadd s14, s20, s2, s30 + __ fnmadds(v7, v29, v22, v22); // fnmadd s7, s29, s22, s22 + __ fmaddd(v13, v5, v15, v5); // fmadd d13, d5, d15, d5 + __ fmsubd(v14, v12, v5, v10); // fmsub d14, d12, d5, d10 + __ fnmaddd(v10, v19, v0, v1); // fnmadd d10, d19, d0, d1 + __ fnmaddd(v20, v2, v2, v0); // fnmadd d20, d2, d2, d0 + +// TwoRegFloatOp + __ fmovs(v25, v9); // fmov s25, s9 + __ fabss(v20, v4); // fabs s20, s4 + __ fnegs(v3, v27); // fneg s3, s27 + __ fsqrts(v1, v2); // fsqrt s1, s2 + __ fcvts(v30, v0); // fcvt d30, s0 + __ fmovd(v12, v4); // fmov d12, d4 + __ fabsd(v1, v27); // fabs d1, d27 + __ fnegd(v8, v22); // fneg d8, d22 + __ fsqrtd(v11, v11); // fsqrt d11, d11 + __ fcvtd(v22, v28); // fcvt s22, d28 + +// FloatConvertOp + __ fcvtzsw(r28, v22); // fcvtzs w28, s22 + __ fcvtzs(r20, v27); // fcvtzs x20, s27 + __ fcvtzdw(r14, v0); // fcvtzs w14, d0 + __ fcvtzd(r26, v11); // fcvtzs x26, d11 + __ scvtfws(v28, r22); // scvtf s28, w22 + __ scvtfs(v16, r10); // scvtf s16, x10 + __ scvtfwd(v8, r21); // scvtf d8, w21 + __ scvtfd(v21, r28); // scvtf d21, x28 + __ fmovs(r24, v24); // fmov w24, s24 + __ fmovd(r8, v19); // fmov x8, d19 + __ fmovs(v8, r12); // fmov s8, w12 + __ fmovd(v6, r7); // fmov d6, x7 + +// TwoRegFloatOp + __ fcmps(v30, v16); // fcmp s30, s16 + __ fcmpd(v25, v11); // fcmp d25, d11 + __ fcmps(v11, 0.0); // fcmp s11, #0.0 + __ fcmpd(v11, 0.0); // fcmp d11, #0.0 + +// LoadStorePairOp + __ stpw(r29, r12, Address(r17, 128)); // stp w29, w12, [x17, #128] + __ ldpw(r22, r18, Address(r14, -96)); // ldp w22, w18, [x14, #-96] + __ ldpsw(r11, r16, Address(r1, 64)); // ldpsw x11, x16, [x1, #64] + __ stp(r0, r11, Address(r26, 112)); // stp x0, x11, [x26, #112] + __ ldp(r7, r1, Address(r26, 16)); // ldp x7, x1, [x26, #16] + +// LoadStorePairOp + __ stpw(r10, r7, Address(__ pre(r24, 0))); // stp w10, w7, [x24, #0]! + __ ldpw(r7, r28, Address(__ pre(r24, -256))); // ldp w7, w28, [x24, #-256]! + __ ldpsw(r25, r28, Address(__ pre(r21, -240))); // ldpsw x25, x28, [x21, #-240]! + __ stp(r20, r18, Address(__ pre(r14, -16))); // stp x20, x18, [x14, #-16]! + __ ldp(r8, r10, Address(__ pre(r13, 80))); // ldp x8, x10, [x13, #80]! + +// LoadStorePairOp + __ stpw(r26, r24, Address(__ post(r2, -128))); // stp w26, w24, [x2], #-128 + __ ldpw(r2, r25, Address(__ post(r21, -192))); // ldp w2, w25, [x21], #-192 + __ ldpsw(r17, r2, Address(__ post(r21, -144))); // ldpsw x17, x2, [x21], #-144 + __ stp(r12, r10, Address(__ post(r11, 96))); // stp x12, x10, [x11], #96 + __ ldp(r24, r6, Address(__ post(r17, -32))); // ldp x24, x6, [x17], #-32 + +// LoadStorePairOp + __ stnpw(r3, r30, Address(r14, -224)); // stnp w3, w30, [x14, #-224] + __ ldnpw(r15, r20, Address(r26, -144)); // ldnp w15, w20, [x26, #-144] + __ stnp(r22, r25, Address(r12, -128)); // stnp x22, x25, [x12, #-128] + __ ldnp(r27, r22, Address(r17, -176)); // ldnp x27, x22, [x17, #-176] + +// FloatImmediateOp + __ fmovd(v0, 2.0); // fmov d0, #2.0 + __ fmovd(v0, 2.125); // fmov d0, #2.125 + __ fmovd(v0, 4.0); // fmov d0, #4.0 + __ fmovd(v0, 4.25); // fmov d0, #4.25 + __ fmovd(v0, 8.0); // fmov d0, #8.0 + __ fmovd(v0, 8.5); // fmov d0, #8.5 + __ fmovd(v0, 16.0); // fmov d0, #16.0 + __ fmovd(v0, 17.0); // fmov d0, #17.0 + __ fmovd(v0, 0.125); // fmov d0, #0.125 + __ fmovd(v0, 0.1328125); // fmov d0, #0.1328125 + __ fmovd(v0, 0.25); // fmov d0, #0.25 + __ fmovd(v0, 0.265625); // fmov d0, #0.265625 + __ fmovd(v0, 0.5); // fmov d0, #0.5 + __ fmovd(v0, 0.53125); // fmov d0, #0.53125 + __ fmovd(v0, 1.0); // fmov d0, #1.0 + __ fmovd(v0, 1.0625); // fmov d0, #1.0625 + __ fmovd(v0, -2.0); // fmov d0, #-2.0 + __ fmovd(v0, -2.125); // fmov d0, #-2.125 + __ fmovd(v0, -4.0); // fmov d0, #-4.0 + __ fmovd(v0, -4.25); // fmov d0, #-4.25 + __ fmovd(v0, -8.0); // fmov d0, #-8.0 + __ fmovd(v0, -8.5); // fmov d0, #-8.5 + __ fmovd(v0, -16.0); // fmov d0, #-16.0 + __ fmovd(v0, -17.0); // fmov d0, #-17.0 + __ fmovd(v0, -0.125); // fmov d0, #-0.125 + __ fmovd(v0, -0.1328125); // fmov d0, #-0.1328125 + __ fmovd(v0, -0.25); // fmov d0, #-0.25 + __ fmovd(v0, -0.265625); // fmov d0, #-0.265625 + __ fmovd(v0, -0.5); // fmov d0, #-0.5 + __ fmovd(v0, -0.53125); // fmov d0, #-0.53125 + __ fmovd(v0, -1.0); // fmov d0, #-1.0 + __ fmovd(v0, -1.0625); // fmov d0, #-1.0625 + + __ bind(forth); + +/* +aarch64ops.o: file format elf64-littleaarch64 + + +Disassembly of section .text: + +0000000000000000 : + 0: 8b0772d3 add x19, x22, x7, lsl #28 + 4: cb4a3570 sub x16, x11, x10, lsr #13 + 8: ab9c09bb adds x27, x13, x28, asr #2 + c: eb9aa794 subs x20, x28, x26, asr #41 + 10: 0b934e68 add w8, w19, w19, asr #19 + 14: 4b0a3924 sub w4, w9, w10, lsl #14 + 18: 2b1e3568 adds w8, w11, w30, lsl #13 + 1c: 6b132720 subs w0, w25, w19, lsl #9 + 20: 8a154c14 and x20, x0, x21, lsl #19 + 24: aa1445d5 orr x21, x14, x20, lsl #17 + 28: ca01cf99 eor x25, x28, x1, lsl #51 + 2c: ea8b3f6a ands x10, x27, x11, asr #15 + 30: 0a8c5cb9 and w25, w5, w12, asr #23 + 34: 2a4a11d2 orr w18, w14, w10, lsr #4 + 38: 4a855aa4 eor w4, w21, w5, asr #22 + 3c: 6a857415 ands w21, w0, w5, asr #29 + 40: 8aa697da bic x26, x30, x6, asr #37 + 44: aa6d7423 orn x3, x1, x13, lsr #29 + 48: ca29bf80 eon x0, x28, x9, lsl #47 + 4c: ea3cb8bd bics x29, x5, x28, lsl #46 + 50: 0a675249 bic w9, w18, w7, lsr #20 + 54: 2ab961ba orn w26, w13, w25, asr #24 + 58: 4a331899 eon w25, w4, w19, lsl #6 + 5c: 6a646345 bics w5, w26, w4, lsr #24 + 60: 11055267 add w7, w19, #0x154 + 64: 31064408 adds w8, w0, #0x191 + 68: 51028e9d sub w29, w20, #0xa3 + 6c: 710bdee8 subs w8, w23, #0x2f7 + 70: 91082d81 add x1, x12, #0x20b + 74: b106a962 adds x2, x11, #0x1aa + 78: d10b33ae sub x14, x29, #0x2cc + 7c: f10918ab subs x11, x5, #0x246 + 80: 121102d7 and w23, w22, #0x8000 + 84: 3204cd44 orr w4, w10, #0xf0f0f0f0 + 88: 5204cf00 eor w0, w24, #0xf0f0f0f0 + 8c: 72099fb3 ands w19, w29, #0x7f807f80 + 90: 92729545 and x5, x10, #0xfffffffffc000 + 94: b20e37cc orr x12, x30, #0xfffc0000fffc0000 + 98: d27c34be eor x30, x5, #0x3fff0 + 9c: f27e4efa ands x26, x23, #0x3ffffc + a0: 14000000 b a0 + a4: 17ffffd7 b 0 + a8: 1400017f b 6a4 + ac: 94000000 bl ac + b0: 97ffffd4 bl 0 + b4: 9400017c bl 6a4 + b8: 3400000c cbz w12, b8 + bc: 34fffa2c cbz w12, 0 + c0: 34002f2c cbz w12, 6a4 + c4: 35000014 cbnz w20, c4 + c8: 35fff9d4 cbnz w20, 0 + cc: 35002ed4 cbnz w20, 6a4 + d0: b400000c cbz x12, d0 + d4: b4fff96c cbz x12, 0 + d8: b4002e6c cbz x12, 6a4 + dc: b5000018 cbnz x24, dc + e0: b5fff918 cbnz x24, 0 + e4: b5002e18 cbnz x24, 6a4 + e8: 10000006 adr x6, e8 + ec: 10fff8a6 adr x6, 0 + f0: 10002da6 adr x6, 6a4 + f4: 90000015 adrp x21, 0 + f8: 36080001 tbz w1, #1, f8 + fc: 360ff821 tbz w1, #1, 0 + 100: 36082d21 tbz w1, #1, 6a4 + 104: 37480008 tbnz w8, #9, 104 + 108: 374ff7c8 tbnz w8, #9, 0 + 10c: 37482cc8 tbnz w8, #9, 6a4 + 110: 128b50ec movn w12, #0x5a87 + 114: 52a9ff8b movz w11, #0x4ffc, lsl #16 + 118: 7281d095 movk w21, #0xe84 + 11c: 92edfebd movn x29, #0x6ff5, lsl #48 + 120: d28361e3 movz x3, #0x1b0f + 124: f2a4cc96 movk x22, #0x2664, lsl #16 + 128: 9346590c sbfx x12, x8, #6, #17 + 12c: 33194f33 bfi w19, w25, #7, #20 + 130: 531d3d89 ubfiz w9, w12, #3, #16 + 134: 9350433c sbfx x28, x25, #16, #1 + 138: b34464ac bfxil x12, x5, #4, #22 + 13c: d3462140 ubfx x0, x10, #6, #3 + 140: 139a61a4 extr w4, w13, w26, #24 + 144: 93d87fd7 extr x23, x30, x24, #31 + 148: 54000000 b.eq 148 + 14c: 54fff5a0 b.eq 0 + 150: 54002aa0 b.eq 6a4 + 154: 54000001 b.ne 154 + 158: 54fff541 b.ne 0 + 15c: 54002a41 b.ne 6a4 + 160: 54000002 b.cs 160 + 164: 54fff4e2 b.cs 0 + 168: 540029e2 b.cs 6a4 + 16c: 54000002 b.cs 16c + 170: 54fff482 b.cs 0 + 174: 54002982 b.cs 6a4 + 178: 54000003 b.cc 178 + 17c: 54fff423 b.cc 0 + 180: 54002923 b.cc 6a4 + 184: 54000003 b.cc 184 + 188: 54fff3c3 b.cc 0 + 18c: 540028c3 b.cc 6a4 + 190: 54000004 b.mi 190 + 194: 54fff364 b.mi 0 + 198: 54002864 b.mi 6a4 + 19c: 54000005 b.pl 19c + 1a0: 54fff305 b.pl 0 + 1a4: 54002805 b.pl 6a4 + 1a8: 54000006 b.vs 1a8 + 1ac: 54fff2a6 b.vs 0 + 1b0: 540027a6 b.vs 6a4 + 1b4: 54000007 b.vc 1b4 + 1b8: 54fff247 b.vc 0 + 1bc: 54002747 b.vc 6a4 + 1c0: 54000008 b.hi 1c0 + 1c4: 54fff1e8 b.hi 0 + 1c8: 540026e8 b.hi 6a4 + 1cc: 54000009 b.ls 1cc + 1d0: 54fff189 b.ls 0 + 1d4: 54002689 b.ls 6a4 + 1d8: 5400000a b.ge 1d8 + 1dc: 54fff12a b.ge 0 + 1e0: 5400262a b.ge 6a4 + 1e4: 5400000b b.lt 1e4 + 1e8: 54fff0cb b.lt 0 + 1ec: 540025cb b.lt 6a4 + 1f0: 5400000c b.gt 1f0 + 1f4: 54fff06c b.gt 0 + 1f8: 5400256c b.gt 6a4 + 1fc: 5400000d b.le 1fc + 200: 54fff00d b.le 0 + 204: 5400250d b.le 6a4 + 208: 5400000e b.al 208 + 20c: 54ffefae b.al 0 + 210: 540024ae b.al 6a4 + 214: 5400000f b.nv 214 + 218: 54ffef4f b.nv 0 + 21c: 5400244f b.nv 6a4 + 220: d4063721 svc #0x31b9 + 224: d4035082 hvc #0x1a84 + 228: d400bfe3 smc #0x5ff + 22c: d4282fc0 brk #0x417e + 230: d444c320 hlt #0x2619 + 234: d503201f nop + 238: d69f03e0 eret + 23c: d6bf03e0 drps + 240: d5033fdf isb + 244: d5033f9f dsb sy + 248: d5033abf dmb ishst + 24c: d61f0040 br x2 + 250: d63f00a0 blr x5 + 254: c8147c55 stxr w20, x21, [x2] + 258: c805fcfd stlxr w5, x29, [x7] + 25c: c85f7e05 ldxr x5, [x16] + 260: c85fffbb ldaxr x27, [x29] + 264: c89fffa0 stlr x0, [x29] + 268: c8dfff95 ldar x21, [x28] + 26c: 88157cf8 stxr w21, w24, [x7] + 270: 8815ff9a stlxr w21, w26, [x28] + 274: 885f7cd5 ldxr w21, [x6] + 278: 885fffcf ldaxr w15, [x30] + 27c: 889ffc73 stlr w19, [x3] + 280: 88dffc56 ldar w22, [x2] + 284: 48127c0f stxrh w18, w15, [x0] + 288: 480bff85 stlxrh w11, w5, [x28] + 28c: 485f7cdd ldxrh w29, [x6] + 290: 485ffcf2 ldaxrh w18, [x7] + 294: 489fff99 stlrh w25, [x28] + 298: 48dffe62 ldarh w2, [x19] + 29c: 080a7c3e stxrb w10, w30, [x1] + 2a0: 0814fed5 stlxrb w20, w21, [x22] + 2a4: 085f7c59 ldxrb w25, [x2] + 2a8: 085ffcb8 ldaxrb w24, [x5] + 2ac: 089ffc70 stlrb w16, [x3] + 2b0: 08dfffb6 ldarb w22, [x29] + 2b4: c87f0a68 ldxp x8, x2, [x19] + 2b8: c87fcdc7 ldaxp x7, x19, [x14] + 2bc: c82870bb stxp w8, x27, x28, [x5] + 2c0: c825b8c8 stlxp w5, x8, x14, [x6] + 2c4: 887f12d9 ldxp w25, w4, [x22] + 2c8: 887fb9ed ldaxp w13, w14, [x15] + 2cc: 8834215a stxp w20, w26, w8, [x10] + 2d0: 8837ca52 stlxp w23, w18, w18, [x18] + 2d4: f806317e str x30, [x11,#99] + 2d8: b81b3337 str w23, [x25,#-77] + 2dc: 39000dc2 strb w2, [x14,#3] + 2e0: 78005149 strh w9, [x10,#5] + 2e4: f84391f4 ldr x20, [x15,#57] + 2e8: b85b220c ldr w12, [x16,#-78] + 2ec: 385fd356 ldrb w22, [x26,#-3] + 2f0: 785d127e ldrh w30, [x19,#-47] + 2f4: 389f4149 ldrsb x9, [x10,#-12] + 2f8: 79801e3c ldrsh x28, [x17,#14] + 2fc: 79c014a3 ldrsh w3, [x5,#10] + 300: b89a5231 ldrsw x17, [x17,#-91] + 304: fc5ef282 ldr d2, [x20,#-17] + 308: bc5f60f6 ldr s22, [x7,#-10] + 30c: fc12125e str d30, [x18,#-223] + 310: bc0152cd str s13, [x22,#21] + 314: f8190e49 str x9, [x18,#-112]! + 318: b800befd str w29, [x23,#11]! + 31c: 381ffd92 strb w18, [x12,#-1]! + 320: 781e9e90 strh w16, [x20,#-23]! + 324: f8409fa3 ldr x3, [x29,#9]! + 328: b8413c79 ldr w25, [x3,#19]! + 32c: 385fffa1 ldrb w1, [x29,#-1]! + 330: 785c7fa8 ldrh w8, [x29,#-57]! + 334: 389f3dc5 ldrsb x5, [x14,#-13]! + 338: 78801f6a ldrsh x10, [x27,#1]! + 33c: 78c19d4b ldrsh w11, [x10,#25]! + 340: b89a4ec4 ldrsw x4, [x22,#-92]! + 344: fc408eeb ldr d11, [x23,#8]! + 348: bc436e79 ldr s25, [x19,#54]! + 34c: fc152ce1 str d1, [x7,#-174]! + 350: bc036f28 str s8, [x25,#54]! + 354: f8025565 str x5, [x11],#37 + 358: b80135f8 str w24, [x15],#19 + 35c: 381ff74f strb w15, [x26],#-1 + 360: 781fa652 strh w18, [x18],#-6 + 364: f851a447 ldr x7, [x2],#-230 + 368: b85e557b ldr w27, [x11],#-27 + 36c: 385e7472 ldrb w18, [x3],#-25 + 370: 785e070a ldrh w10, [x24],#-32 + 374: 38804556 ldrsb x22, [x10],#4 + 378: 78819591 ldrsh x17, [x12],#25 + 37c: 78dc24e8 ldrsh w8, [x7],#-62 + 380: b89cd6d7 ldrsw x23, [x22],#-51 + 384: fc430738 ldr d24, [x25],#48 + 388: bc5f6595 ldr s21, [x12],#-10 + 38c: fc1225b2 str d18, [x13],#-222 + 390: bc1d7430 str s16, [x1],#-41 + 394: f82fcac2 str x2, [x22,w15,sxtw] + 398: b83d6a02 str w2, [x16,x29] + 39c: 382e5a54 strb w20, [x18,w14,uxtw #0] + 3a0: 7834fa66 strh w6, [x19,x20,sxtx #1] + 3a4: f86ecbae ldr x14, [x29,w14,sxtw] + 3a8: b86cda90 ldr w16, [x20,w12,sxtw #2] + 3ac: 3860d989 ldrb w9, [x12,w0,sxtw #0] + 3b0: 78637a2c ldrh w12, [x17,x3,lsl #1] + 3b4: 38a3fa22 ldrsb x2, [x17,x3,sxtx #0] + 3b8: 78b15827 ldrsh x7, [x1,w17,uxtw #1] + 3bc: 78f2d9f9 ldrsh w25, [x15,w18,sxtw #1] + 3c0: b8ac6ab7 ldrsw x23, [x21,x12] + 3c4: fc6879a5 ldr d5, [x13,x8,lsl #3] + 3c8: bc767943 ldr s3, [x10,x22,lsl #2] + 3cc: fc3bc84e str d14, [x2,w27,sxtw] + 3d0: bc3968d4 str s20, [x6,x25] + 3d4: f91fc0fe str x30, [x7,#16256] + 3d8: b91da50f str w15, [x8,#7588] + 3dc: 391d280b strb w11, [x0,#1866] + 3e0: 791d2e23 strh w3, [x17,#3734] + 3e4: f95bc8e2 ldr x2, [x7,#14224] + 3e8: b95ce525 ldr w5, [x9,#7396] + 3ec: 395ae53c ldrb w28, [x9,#1721] + 3f0: 795c9282 ldrh w2, [x20,#3656] + 3f4: 399d7dd6 ldrsb x22, [x14,#1887] + 3f8: 799fe008 ldrsh x8, [x0,#4080] + 3fc: 79de9bc0 ldrsh w0, [x30,#3916] + 400: b99aae78 ldrsw x24, [x19,#6828] + 404: fd597598 ldr d24, [x12,#13032] + 408: bd5d1d08 ldr s8, [x8,#7452] + 40c: fd1f3dea str d10, [x15,#15992] + 410: bd1a227a str s26, [x19,#6688] + 414: 5800148a ldr x10, 6a4 + 418: 18000003 ldr w3, 418 + 41c: f88092e0 prfm pldl1keep, [x23,#9] + 420: d8ffdf00 prfm pldl1keep, 0 + 424: f8a84860 prfm pldl1keep, [x3,w8,uxtw] + 428: f99d7560 prfm pldl1keep, [x11,#15080] + 42c: 1a1c012d adc w13, w9, w28 + 430: 3a1c027b adcs w27, w19, w28 + 434: 5a060253 sbc w19, w18, w6 + 438: 7a03028e sbcs w14, w20, w3 + 43c: 9a0801d0 adc x16, x14, x8 + 440: ba0803a0 adcs x0, x29, x8 + 444: da140308 sbc x8, x24, x20 + 448: fa00038c sbcs x12, x28, x0 + 44c: 0b3010d7 add w23, w6, w16, uxtb #4 + 450: 2b37ab39 adds w25, w25, w23, sxth #2 + 454: cb2466da sub x26, x22, x4, uxtx #1 + 458: 6b33efb1 subs w17, w29, w19, sxtx #3 + 45c: 8b350fcb add x11, x30, w21, uxtb #3 + 460: ab208a70 adds x16, x19, w0, sxtb #2 + 464: cb39e52b sub x11, x9, x25, sxtx #1 + 468: eb2c9291 subs x17, x20, w12, sxtb #4 + 46c: 3a4bd1a3 ccmn w13, w11, #0x3, le + 470: 7a4c81a2 ccmp w13, w12, #0x2, hi + 474: ba42106c ccmn x3, x2, #0xc, ne + 478: fa5560e3 ccmp x7, x21, #0x3, vs + 47c: 3a4e3844 ccmn w2, #0xe, #0x4, cc + 480: 7a515a26 ccmp w17, #0x11, #0x6, pl + 484: ba4c2940 ccmn x10, #0xc, #0x0, cs + 488: fa52aaae ccmp x21, #0x12, #0xe, ge + 48c: 1a8cc1b5 csel w21, w13, w12, gt + 490: 1a8f976a csinc w10, w27, w15, ls + 494: 5a8981a0 csinv w0, w13, w9, hi + 498: 5a9a6492 csneg w18, w4, w26, vs + 49c: 9a8793ac csel x12, x29, x7, ls + 4a0: 9a9474e6 csinc x6, x7, x20, vc + 4a4: da83d2b6 csinv x22, x21, x3, le + 4a8: da9b9593 csneg x19, x12, x27, ls + 4ac: 5ac00200 rbit w0, w16 + 4b0: 5ac006f1 rev16 w17, w23 + 4b4: 5ac009d1 rev w17, w14 + 4b8: 5ac013d8 clz w24, w30 + 4bc: 5ac016d8 cls w24, w22 + 4c0: dac00223 rbit x3, x17 + 4c4: dac005ac rev16 x12, x13 + 4c8: dac00ac9 rev32 x9, x22 + 4cc: dac00c00 rev x0, x0 + 4d0: dac01205 clz x5, x16 + 4d4: dac016d9 cls x25, x22 + 4d8: 1ac0089d udiv w29, w4, w0 + 4dc: 1add0fa0 sdiv w0, w29, w29 + 4e0: 1ad52225 lsl w5, w17, w21 + 4e4: 1ad22529 lsr w9, w9, w18 + 4e8: 1ac82b61 asr w1, w27, w8 + 4ec: 1acd2e92 ror w18, w20, w13 + 4f0: 9acc0b28 udiv x8, x25, x12 + 4f4: 9adc0ca7 sdiv x7, x5, x28 + 4f8: 9adb2225 lsl x5, x17, x27 + 4fc: 9ad42757 lsr x23, x26, x20 + 500: 9adc291c asr x28, x8, x28 + 504: 9ac42fa3 ror x3, x29, x4 + 508: 1b1a55d1 madd w17, w14, w26, w21 + 50c: 1b0bafc1 msub w1, w30, w11, w11 + 510: 9b067221 madd x1, x17, x6, x28 + 514: 9b1ea0de msub x30, x6, x30, x8 + 518: 9b2e20d5 smaddl x21, w6, w14, x8 + 51c: 9b38cd4a smsubl x10, w10, w24, x19 + 520: 9bae6254 umaddl x20, w18, w14, x24 + 524: 9ba59452 umsubl x18, w2, w5, x5 + 528: 1e2d0a48 fmul s8, s18, s13 + 52c: 1e3c19c2 fdiv s2, s14, s28 + 530: 1e3c298f fadd s15, s12, s28 + 534: 1e213980 fsub s0, s12, s1 + 538: 1e240baf fmul s15, s29, s4 + 53c: 1e77082c fmul d12, d1, d23 + 540: 1e72191b fdiv d27, d8, d18 + 544: 1e6b2a97 fadd d23, d20, d11 + 548: 1e723988 fsub d8, d12, d18 + 54c: 1e770b1a fmul d26, d24, d23 + 550: 1f0d66f5 fmadd s21, s23, s13, s25 + 554: 1f01b956 fmsub s22, s10, s1, s14 + 558: 1f227a8e fnmadd s14, s20, s2, s30 + 55c: 1f365ba7 fnmadd s7, s29, s22, s22 + 560: 1f4f14ad fmadd d13, d5, d15, d5 + 564: 1f45a98e fmsub d14, d12, d5, d10 + 568: 1f60066a fnmadd d10, d19, d0, d1 + 56c: 1f620054 fnmadd d20, d2, d2, d0 + 570: 1e204139 fmov s25, s9 + 574: 1e20c094 fabs s20, s4 + 578: 1e214363 fneg s3, s27 + 57c: 1e21c041 fsqrt s1, s2 + 580: 1e22c01e fcvt d30, s0 + 584: 1e60408c fmov d12, d4 + 588: 1e60c361 fabs d1, d27 + 58c: 1e6142c8 fneg d8, d22 + 590: 1e61c16b fsqrt d11, d11 + 594: 1e624396 fcvt s22, d28 + 598: 1e3802dc fcvtzs w28, s22 + 59c: 9e380374 fcvtzs x20, s27 + 5a0: 1e78000e fcvtzs w14, d0 + 5a4: 9e78017a fcvtzs x26, d11 + 5a8: 1e2202dc scvtf s28, w22 + 5ac: 9e220150 scvtf s16, x10 + 5b0: 1e6202a8 scvtf d8, w21 + 5b4: 9e620395 scvtf d21, x28 + 5b8: 1e260318 fmov w24, s24 + 5bc: 9e660268 fmov x8, d19 + 5c0: 1e270188 fmov s8, w12 + 5c4: 9e6700e6 fmov d6, x7 + 5c8: 1e3023c0 fcmp s30, s16 + 5cc: 1e6b2320 fcmp d25, d11 + 5d0: 1e202168 fcmp s11, #0.0 + 5d4: 1e602168 fcmp d11, #0.0 + 5d8: 2910323d stp w29, w12, [x17,#128] + 5dc: 297449d6 ldp w22, w18, [x14,#-96] + 5e0: 6948402b ldpsw x11, x16, [x1,#64] + 5e4: a9072f40 stp x0, x11, [x26,#112] + 5e8: a9410747 ldp x7, x1, [x26,#16] + 5ec: 29801f0a stp w10, w7, [x24,#0]! + 5f0: 29e07307 ldp w7, w28, [x24,#-256]! + 5f4: 69e272b9 ldpsw x25, x28, [x21,#-240]! + 5f8: a9bf49d4 stp x20, x18, [x14,#-16]! + 5fc: a9c529a8 ldp x8, x10, [x13,#80]! + 600: 28b0605a stp w26, w24, [x2],#-128 + 604: 28e866a2 ldp w2, w25, [x21],#-192 + 608: 68ee0ab1 ldpsw x17, x2, [x21],#-144 + 60c: a886296c stp x12, x10, [x11],#96 + 610: a8fe1a38 ldp x24, x6, [x17],#-32 + 614: 282479c3 stnp w3, w30, [x14,#-224] + 618: 286e534f ldnp w15, w20, [x26,#-144] + 61c: a8386596 stnp x22, x25, [x12,#-128] + 620: a8755a3b ldnp x27, x22, [x17,#-176] + 624: 1e601000 fmov d0, #2.000000000000000000e+00 + 628: 1e603000 fmov d0, #2.125000000000000000e+00 + 62c: 1e621000 fmov d0, #4.000000000000000000e+00 + 630: 1e623000 fmov d0, #4.250000000000000000e+00 + 634: 1e641000 fmov d0, #8.000000000000000000e+00 + 638: 1e643000 fmov d0, #8.500000000000000000e+00 + 63c: 1e661000 fmov d0, #1.600000000000000000e+01 + 640: 1e663000 fmov d0, #1.700000000000000000e+01 + 644: 1e681000 fmov d0, #1.250000000000000000e-01 + 648: 1e683000 fmov d0, #1.328125000000000000e-01 + 64c: 1e6a1000 fmov d0, #2.500000000000000000e-01 + 650: 1e6a3000 fmov d0, #2.656250000000000000e-01 + 654: 1e6c1000 fmov d0, #5.000000000000000000e-01 + 658: 1e6c3000 fmov d0, #5.312500000000000000e-01 + 65c: 1e6e1000 fmov d0, #1.000000000000000000e+00 + 660: 1e6e3000 fmov d0, #1.062500000000000000e+00 + 664: 1e701000 fmov d0, #-2.000000000000000000e+00 + 668: 1e703000 fmov d0, #-2.125000000000000000e+00 + 66c: 1e721000 fmov d0, #-4.000000000000000000e+00 + 670: 1e723000 fmov d0, #-4.250000000000000000e+00 + 674: 1e741000 fmov d0, #-8.000000000000000000e+00 + 678: 1e743000 fmov d0, #-8.500000000000000000e+00 + 67c: 1e761000 fmov d0, #-1.600000000000000000e+01 + 680: 1e763000 fmov d0, #-1.700000000000000000e+01 + 684: 1e781000 fmov d0, #-1.250000000000000000e-01 + 688: 1e783000 fmov d0, #-1.328125000000000000e-01 + 68c: 1e7a1000 fmov d0, #-2.500000000000000000e-01 + 690: 1e7a3000 fmov d0, #-2.656250000000000000e-01 + 694: 1e7c1000 fmov d0, #-5.000000000000000000e-01 + 698: 1e7c3000 fmov d0, #-5.312500000000000000e-01 + 69c: 1e7e1000 fmov d0, #-1.000000000000000000e+00 + 6a0: 1e7e3000 fmov d0, #-1.062500000000000000e+00 + */ + + static const unsigned int insns[] = + { + 0x8b0772d3, 0xcb4a3570, 0xab9c09bb, 0xeb9aa794, + 0x0b934e68, 0x4b0a3924, 0x2b1e3568, 0x6b132720, + 0x8a154c14, 0xaa1445d5, 0xca01cf99, 0xea8b3f6a, + 0x0a8c5cb9, 0x2a4a11d2, 0x4a855aa4, 0x6a857415, + 0x8aa697da, 0xaa6d7423, 0xca29bf80, 0xea3cb8bd, + 0x0a675249, 0x2ab961ba, 0x4a331899, 0x6a646345, + 0x11055267, 0x31064408, 0x51028e9d, 0x710bdee8, + 0x91082d81, 0xb106a962, 0xd10b33ae, 0xf10918ab, + 0x121102d7, 0x3204cd44, 0x5204cf00, 0x72099fb3, + 0x92729545, 0xb20e37cc, 0xd27c34be, 0xf27e4efa, + 0x14000000, 0x17ffffd7, 0x1400017f, 0x94000000, + 0x97ffffd4, 0x9400017c, 0x3400000c, 0x34fffa2c, + 0x34002f2c, 0x35000014, 0x35fff9d4, 0x35002ed4, + 0xb400000c, 0xb4fff96c, 0xb4002e6c, 0xb5000018, + 0xb5fff918, 0xb5002e18, 0x10000006, 0x10fff8a6, + 0x10002da6, 0x90000015, 0x36080001, 0x360ff821, + 0x36082d21, 0x37480008, 0x374ff7c8, 0x37482cc8, + 0x128b50ec, 0x52a9ff8b, 0x7281d095, 0x92edfebd, + 0xd28361e3, 0xf2a4cc96, 0x9346590c, 0x33194f33, + 0x531d3d89, 0x9350433c, 0xb34464ac, 0xd3462140, + 0x139a61a4, 0x93d87fd7, 0x54000000, 0x54fff5a0, + 0x54002aa0, 0x54000001, 0x54fff541, 0x54002a41, + 0x54000002, 0x54fff4e2, 0x540029e2, 0x54000002, + 0x54fff482, 0x54002982, 0x54000003, 0x54fff423, + 0x54002923, 0x54000003, 0x54fff3c3, 0x540028c3, + 0x54000004, 0x54fff364, 0x54002864, 0x54000005, + 0x54fff305, 0x54002805, 0x54000006, 0x54fff2a6, + 0x540027a6, 0x54000007, 0x54fff247, 0x54002747, + 0x54000008, 0x54fff1e8, 0x540026e8, 0x54000009, + 0x54fff189, 0x54002689, 0x5400000a, 0x54fff12a, + 0x5400262a, 0x5400000b, 0x54fff0cb, 0x540025cb, + 0x5400000c, 0x54fff06c, 0x5400256c, 0x5400000d, + 0x54fff00d, 0x5400250d, 0x5400000e, 0x54ffefae, + 0x540024ae, 0x5400000f, 0x54ffef4f, 0x5400244f, + 0xd4063721, 0xd4035082, 0xd400bfe3, 0xd4282fc0, + 0xd444c320, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, + 0xd5033fdf, 0xd5033f9f, 0xd5033abf, 0xd61f0040, + 0xd63f00a0, 0xc8147c55, 0xc805fcfd, 0xc85f7e05, + 0xc85fffbb, 0xc89fffa0, 0xc8dfff95, 0x88157cf8, + 0x8815ff9a, 0x885f7cd5, 0x885fffcf, 0x889ffc73, + 0x88dffc56, 0x48127c0f, 0x480bff85, 0x485f7cdd, + 0x485ffcf2, 0x489fff99, 0x48dffe62, 0x080a7c3e, + 0x0814fed5, 0x085f7c59, 0x085ffcb8, 0x089ffc70, + 0x08dfffb6, 0xc87f0a68, 0xc87fcdc7, 0xc82870bb, + 0xc825b8c8, 0x887f12d9, 0x887fb9ed, 0x8834215a, + 0x8837ca52, 0xf806317e, 0xb81b3337, 0x39000dc2, + 0x78005149, 0xf84391f4, 0xb85b220c, 0x385fd356, + 0x785d127e, 0x389f4149, 0x79801e3c, 0x79c014a3, + 0xb89a5231, 0xfc5ef282, 0xbc5f60f6, 0xfc12125e, + 0xbc0152cd, 0xf8190e49, 0xb800befd, 0x381ffd92, + 0x781e9e90, 0xf8409fa3, 0xb8413c79, 0x385fffa1, + 0x785c7fa8, 0x389f3dc5, 0x78801f6a, 0x78c19d4b, + 0xb89a4ec4, 0xfc408eeb, 0xbc436e79, 0xfc152ce1, + 0xbc036f28, 0xf8025565, 0xb80135f8, 0x381ff74f, + 0x781fa652, 0xf851a447, 0xb85e557b, 0x385e7472, + 0x785e070a, 0x38804556, 0x78819591, 0x78dc24e8, + 0xb89cd6d7, 0xfc430738, 0xbc5f6595, 0xfc1225b2, + 0xbc1d7430, 0xf82fcac2, 0xb83d6a02, 0x382e5a54, + 0x7834fa66, 0xf86ecbae, 0xb86cda90, 0x3860d989, + 0x78637a2c, 0x38a3fa22, 0x78b15827, 0x78f2d9f9, + 0xb8ac6ab7, 0xfc6879a5, 0xbc767943, 0xfc3bc84e, + 0xbc3968d4, 0xf91fc0fe, 0xb91da50f, 0x391d280b, + 0x791d2e23, 0xf95bc8e2, 0xb95ce525, 0x395ae53c, + 0x795c9282, 0x399d7dd6, 0x799fe008, 0x79de9bc0, + 0xb99aae78, 0xfd597598, 0xbd5d1d08, 0xfd1f3dea, + 0xbd1a227a, 0x5800148a, 0x18000003, 0xf88092e0, + 0xd8ffdf00, 0xf8a84860, 0xf99d7560, 0x1a1c012d, + 0x3a1c027b, 0x5a060253, 0x7a03028e, 0x9a0801d0, + 0xba0803a0, 0xda140308, 0xfa00038c, 0x0b3010d7, + 0x2b37ab39, 0xcb2466da, 0x6b33efb1, 0x8b350fcb, + 0xab208a70, 0xcb39e52b, 0xeb2c9291, 0x3a4bd1a3, + 0x7a4c81a2, 0xba42106c, 0xfa5560e3, 0x3a4e3844, + 0x7a515a26, 0xba4c2940, 0xfa52aaae, 0x1a8cc1b5, + 0x1a8f976a, 0x5a8981a0, 0x5a9a6492, 0x9a8793ac, + 0x9a9474e6, 0xda83d2b6, 0xda9b9593, 0x5ac00200, + 0x5ac006f1, 0x5ac009d1, 0x5ac013d8, 0x5ac016d8, + 0xdac00223, 0xdac005ac, 0xdac00ac9, 0xdac00c00, + 0xdac01205, 0xdac016d9, 0x1ac0089d, 0x1add0fa0, + 0x1ad52225, 0x1ad22529, 0x1ac82b61, 0x1acd2e92, + 0x9acc0b28, 0x9adc0ca7, 0x9adb2225, 0x9ad42757, + 0x9adc291c, 0x9ac42fa3, 0x1b1a55d1, 0x1b0bafc1, + 0x9b067221, 0x9b1ea0de, 0x9b2e20d5, 0x9b38cd4a, + 0x9bae6254, 0x9ba59452, 0x1e2d0a48, 0x1e3c19c2, + 0x1e3c298f, 0x1e213980, 0x1e240baf, 0x1e77082c, + 0x1e72191b, 0x1e6b2a97, 0x1e723988, 0x1e770b1a, + 0x1f0d66f5, 0x1f01b956, 0x1f227a8e, 0x1f365ba7, + 0x1f4f14ad, 0x1f45a98e, 0x1f60066a, 0x1f620054, + 0x1e204139, 0x1e20c094, 0x1e214363, 0x1e21c041, + 0x1e22c01e, 0x1e60408c, 0x1e60c361, 0x1e6142c8, + 0x1e61c16b, 0x1e624396, 0x1e3802dc, 0x9e380374, + 0x1e78000e, 0x9e78017a, 0x1e2202dc, 0x9e220150, + 0x1e6202a8, 0x9e620395, 0x1e260318, 0x9e660268, + 0x1e270188, 0x9e6700e6, 0x1e3023c0, 0x1e6b2320, + 0x1e202168, 0x1e602168, 0x2910323d, 0x297449d6, + 0x6948402b, 0xa9072f40, 0xa9410747, 0x29801f0a, + 0x29e07307, 0x69e272b9, 0xa9bf49d4, 0xa9c529a8, + 0x28b0605a, 0x28e866a2, 0x68ee0ab1, 0xa886296c, + 0xa8fe1a38, 0x282479c3, 0x286e534f, 0xa8386596, + 0xa8755a3b, 0x1e601000, 0x1e603000, 0x1e621000, + 0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000, + 0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000, + 0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, + 0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000, + 0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000, + 0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000, + 0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, + 0x1e7e3000, + }; +// END Generated code -- do not edit + + { + bool ok = true; + unsigned int *insns1 = (unsigned int *)entry; + for (unsigned int i = 0; i < sizeof insns / sizeof insns[0]; i++) { + if (insns[i] != insns1[i]) { + ok = false; + printf("Ours:\n"); + Disassembler::decode((address)&insns1[i], (address)&insns1[i+1]); + printf("Theirs:\n"); + Disassembler::decode((address)&insns[i], (address)&insns[i+1]); + printf("\n"); + } + } + assert(ok, "Assembler smoke test failed"); + } + +#ifndef PRODUCT + + address PC = __ pc(); + __ ld1(v0, __ T16B, Address(r16)); // No offset + __ ld1(v0, __ T16B, __ post(r16, 0)); // Post-index + __ ld1(v0, __ T16B, Address(r16, r17)); // + + +#endif // PRODUCT +} + +#endif // ASSERT + +#undef __ + +void Assembler::emit_data64(jlong data, + relocInfo::relocType rtype, + int format) { + if (rtype == relocInfo::none) { + emit_int64(data); + } else { + emit_data64(data, Relocation::spec_simple(rtype), format); + } +} + +void Assembler::emit_data64(jlong data, + RelocationHolder const& rspec, + int format) { + + assert(inst_mark() != NULL, "must be inside InstructionMark"); + // Do not use AbstractAssembler::relocate, which is not intended for + // embedded words. Instead, relocate to the enclosing instruction. + code_section()->relocate(inst_mark(), rspec, format); + emit_int64(data); +} + +extern "C" { + void das(uint64_t start, int len) { + ResourceMark rm; + len <<= 2; + if (len < 0) + Disassembler::decode((address)start + len, (address)start); + else + Disassembler::decode((address)start, (address)start + len); + } + + JNIEXPORT void das1(unsigned long insn) { + das(insn, 1); + } +} + +#define gas_assert(ARG1) assert(ARG1, #ARG1) + +#define __ as-> + +void Address::lea(MacroAssembler *as, Register r) const { + Relocation* reloc = _rspec.reloc(); + relocInfo::relocType rtype = (relocInfo::relocType) reloc->type(); + + switch(_mode) { + case base_plus_offset: { + if (_offset == 0 && _base == r) // it's a nop + break; + if (_offset > 0) + __ add(r, _base, _offset); + else + __ sub(r, _base, -_offset); + break; + } + case base_plus_offset_reg: { + __ add(r, _base, _index, _ext.op(), MAX(_ext.shift(), 0)); + break; + } + case literal: { + if (rtype == relocInfo::none) + __ mov(r, target()); + else + __ movptr(r, (uint64_t)target()); + break; + } + default: + ShouldNotReachHere(); + } +} + +void Assembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) { + ShouldNotReachHere(); +} + +#undef __ + +#define starti Instruction_aarch64 do_not_use(this); set_current(&do_not_use) + + void Assembler::adr(Register Rd, address adr) { + long offset = adr - pc(); + int offset_lo = offset & 3; + offset >>= 2; + starti; + f(0, 31), f(offset_lo, 30, 29), f(0b10000, 28, 24), sf(offset, 23, 5); + rf(Rd, 0); + } + + void Assembler::_adrp(Register Rd, address adr) { + uint64_t pc_page = (uint64_t)pc() >> 12; + uint64_t adr_page = (uint64_t)adr >> 12; + long offset = adr_page - pc_page; + int offset_lo = offset & 3; + offset >>= 2; + starti; + f(1, 31), f(offset_lo, 30, 29), f(0b10000, 28, 24), sf(offset, 23, 5); + rf(Rd, 0); + } + +#undef starti + +Address::Address(address target, relocInfo::relocType rtype) : _mode(literal){ + _is_lval = false; + _target = target; + switch (rtype) { + case relocInfo::oop_type: + case relocInfo::metadata_type: + // Oops are a special case. Normally they would be their own section + // but in cases like icBuffer they are literals in the code stream that + // we don't have a section for. We use none so that we get a literal address + // which is always patchable. + break; + case relocInfo::external_word_type: + _rspec = external_word_Relocation::spec(target); + break; + case relocInfo::internal_word_type: + _rspec = internal_word_Relocation::spec(target); + break; + case relocInfo::opt_virtual_call_type: + _rspec = opt_virtual_call_Relocation::spec(); + break; + case relocInfo::static_call_type: + _rspec = static_call_Relocation::spec(); + break; + case relocInfo::runtime_call_type: + _rspec = runtime_call_Relocation::spec(); + break; + case relocInfo::poll_type: + case relocInfo::poll_return_type: + _rspec = Relocation::spec_simple(rtype); + break; + case relocInfo::none: + _rspec = RelocationHolder::none; + break; + default: + ShouldNotReachHere(); + break; + } +} + +void Assembler::b(const Address &dest) { + InstructionMark im(this); + code_section()->relocate(inst_mark(), dest.rspec()); + b(dest.target()); +} + +void Assembler::bl(const Address &dest) { + InstructionMark im(this); + code_section()->relocate(inst_mark(), dest.rspec()); + bl(dest.target()); +} + +void Assembler::adr(Register r, const Address &dest) { + InstructionMark im(this); + code_section()->relocate(inst_mark(), dest.rspec()); + adr(r, dest.target()); +} + +void Assembler::br(Condition cc, Label &L) { + if (L.is_bound()) { + br(cc, target(L)); + } else { + L.add_patch_at(code(), locator()); + br(cc, pc()); + } +} + +void Assembler::wrap_label(Label &L, + Assembler::uncond_branch_insn insn) { + if (L.is_bound()) { + (this->*insn)(target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(pc()); + } +} + +void Assembler::wrap_label(Register r, Label &L, + compare_and_branch_insn insn) { + if (L.is_bound()) { + (this->*insn)(r, target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(r, pc()); + } +} + +void Assembler::wrap_label(Register r, int bitpos, Label &L, + test_and_branch_insn insn) { + if (L.is_bound()) { + (this->*insn)(r, bitpos, target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(r, bitpos, pc()); + } +} + +void Assembler::wrap_label(Label &L, prfop op, prefetch_insn insn) { + if (L.is_bound()) { + (this->*insn)(target(L), op); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(pc(), op); + } +} + + // An "all-purpose" add/subtract immediate, per ARM documentation: + // A "programmer-friendly" assembler may accept a negative immediate + // between -(2^24 -1) and -1 inclusive, causing it to convert a + // requested ADD operation to a SUB, or vice versa, and then encode + // the absolute value of the immediate as for uimm24. +void Assembler::add_sub_immediate(Register Rd, Register Rn, unsigned uimm, int op, + int negated_op) { + bool sets_flags = op & 1; // this op sets flags + union { + unsigned u; + int imm; + }; + u = uimm; + bool shift = false; + bool neg = imm < 0; + if (neg) { + imm = -imm; + op = negated_op; + } + assert(Rd != sp || imm % 16 == 0, "misaligned stack"); + if (imm >= (1 << 11) + && ((imm >> 12) << 12 == imm)) { + imm >>= 12; + shift = true; + } + f(op, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10); + + // add/subtract immediate ops with the S bit set treat r31 as zr; + // with S unset they use sp. + if (sets_flags) + zrf(Rd, 0); + else + srf(Rd, 0); + + srf(Rn, 5); +} + +bool Assembler::operand_valid_for_add_sub_immediate(long imm) { + bool shift = false; + unsigned long uimm = uabs(imm); + if (uimm < (1 << 12)) + return true; + if (uimm < (1 << 24) + && ((uimm >> 12) << 12 == uimm)) { + return true; + } + return false; +} + +bool Assembler::operand_valid_for_logical_immediate(bool is32, uint64_t imm) { + return encode_logical_immediate(is32, imm) != 0xffffffff; +} + +static uint64_t doubleTo64Bits(jdouble d) { + union { + jdouble double_value; + uint64_t double_bits; + }; + + double_value = d; + return double_bits; +} + +bool Assembler::operand_valid_for_float_immediate(double imm) { + // If imm is all zero bits we can use ZR as the source of a + // floating-point value. + if (doubleTo64Bits(imm) == 0) + return true; + + // Otherwise try to encode imm then convert the encoded value back + // and make sure it's the exact same bit pattern. + unsigned result = encoding_for_fp_immediate(imm); + return doubleTo64Bits(imm) == fp_immediate_for_encoding(result, true); +} + +int AbstractAssembler::code_fill_byte() { + return 0; +} + +// n.b. this is implemented in subclass MacroAssembler +void Assembler::bang_stack_with_offset(int offset) { Unimplemented(); } + + +// and now the routines called by the assembler which encapsulate the +// above encode and decode functions + +uint32_t +asm_util::encode_logical_immediate(bool is32, uint64_t imm) +{ + if (is32) { + /* Allow all zeros or all ones in top 32-bits, so that + constant expressions like ~1 are permitted. */ + if (imm >> 32 != 0 && imm >> 32 != 0xffffffff) + return 0xffffffff; + /* Replicate the 32 lower bits to the 32 upper bits. */ + imm &= 0xffffffff; + imm |= imm << 32; + } + + return encoding_for_logical_immediate(imm); +} + +unsigned Assembler::pack(double value) { + float val = (float)value; + unsigned result = encoding_for_fp_immediate(val); + guarantee(unpack(result) == value, + "Invalid floating-point immediate operand"); + return result; +} + +// Packed operands for Floating-point Move (immediate) + +static float unpack(unsigned value) { + union { + unsigned ival; + float val; + }; + ival = fp_immediate_for_encoding(value, 0); + return val; +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/assembler_aarch64.hpp 2021-01-25 19:31:25.571359123 +0000 @@ -0,0 +1,2411 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2019, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_ASSEMBLER_AARCH64_HPP +#define CPU_AARCH64_VM_ASSEMBLER_AARCH64_HPP + +#include "asm/register.hpp" + +// definitions of various symbolic names for machine registers + +// First intercalls between C and Java which use 8 general registers +// and 8 floating registers + +// we also have to copy between x86 and ARM registers but that's a +// secondary complication -- not all code employing C call convention +// executes as x86 code though -- we generate some of it + +class Argument VALUE_OBJ_CLASS_SPEC { + public: + enum { + n_int_register_parameters_c = 8, // r0, r1, ... r7 (c_rarg0, c_rarg1, ...) + n_float_register_parameters_c = 8, // v0, v1, ... v7 (c_farg0, c_farg1, ... ) + + n_int_register_parameters_j = 8, // r1, ... r7, r0 (rj_rarg0, j_rarg1, ... + n_float_register_parameters_j = 8 // v0, v1, ... v7 (j_farg0, j_farg1, ... + }; +}; + +REGISTER_DECLARATION(Register, c_rarg0, r0); +REGISTER_DECLARATION(Register, c_rarg1, r1); +REGISTER_DECLARATION(Register, c_rarg2, r2); +REGISTER_DECLARATION(Register, c_rarg3, r3); +REGISTER_DECLARATION(Register, c_rarg4, r4); +REGISTER_DECLARATION(Register, c_rarg5, r5); +REGISTER_DECLARATION(Register, c_rarg6, r6); +REGISTER_DECLARATION(Register, c_rarg7, r7); + +REGISTER_DECLARATION(FloatRegister, c_farg0, v0); +REGISTER_DECLARATION(FloatRegister, c_farg1, v1); +REGISTER_DECLARATION(FloatRegister, c_farg2, v2); +REGISTER_DECLARATION(FloatRegister, c_farg3, v3); +REGISTER_DECLARATION(FloatRegister, c_farg4, v4); +REGISTER_DECLARATION(FloatRegister, c_farg5, v5); +REGISTER_DECLARATION(FloatRegister, c_farg6, v6); +REGISTER_DECLARATION(FloatRegister, c_farg7, v7); + +// Symbolically name the register arguments used by the Java calling convention. +// We have control over the convention for java so we can do what we please. +// What pleases us is to offset the java calling convention so that when +// we call a suitable jni method the arguments are lined up and we don't +// have to do much shuffling. A suitable jni method is non-static and a +// small number of arguments +// +// |--------------------------------------------------------------------| +// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7 | +// |--------------------------------------------------------------------| +// | r0 r1 r2 r3 r4 r5 r6 r7 | +// |--------------------------------------------------------------------| +// | j_rarg7 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6 | +// |--------------------------------------------------------------------| + + +REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); +REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); +REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); +REGISTER_DECLARATION(Register, j_rarg3, c_rarg4); +REGISTER_DECLARATION(Register, j_rarg4, c_rarg5); +REGISTER_DECLARATION(Register, j_rarg5, c_rarg6); +REGISTER_DECLARATION(Register, j_rarg6, c_rarg7); +REGISTER_DECLARATION(Register, j_rarg7, c_rarg0); + +// Java floating args are passed as per C + +REGISTER_DECLARATION(FloatRegister, j_farg0, v0); +REGISTER_DECLARATION(FloatRegister, j_farg1, v1); +REGISTER_DECLARATION(FloatRegister, j_farg2, v2); +REGISTER_DECLARATION(FloatRegister, j_farg3, v3); +REGISTER_DECLARATION(FloatRegister, j_farg4, v4); +REGISTER_DECLARATION(FloatRegister, j_farg5, v5); +REGISTER_DECLARATION(FloatRegister, j_farg6, v6); +REGISTER_DECLARATION(FloatRegister, j_farg7, v7); + +// registers used to hold VM data either temporarily within a method +// or across method calls + +// volatile (caller-save) registers + +// r8 is used for indirect result location return +// we use it and r9 as scratch registers +REGISTER_DECLARATION(Register, rscratch1, r8); +REGISTER_DECLARATION(Register, rscratch2, r9); + +// current method -- must be in a call-clobbered register +REGISTER_DECLARATION(Register, rmethod, r12); + +// non-volatile (callee-save) registers are r16-29 +// of which the following are dedicated global state + +// link register +REGISTER_DECLARATION(Register, lr, r30); +// frame pointer +REGISTER_DECLARATION(Register, rfp, r29); +// current thread +REGISTER_DECLARATION(Register, rthread, r28); +// base of heap +REGISTER_DECLARATION(Register, rheapbase, r27); +// constant pool cache +REGISTER_DECLARATION(Register, rcpool, r26); +// monitors allocated on stack +REGISTER_DECLARATION(Register, rmonitors, r25); +// locals on stack +REGISTER_DECLARATION(Register, rlocals, r24); +// bytecode pointer +REGISTER_DECLARATION(Register, rbcp, r22); +// Dispatch table base +REGISTER_DECLARATION(Register, rdispatch, r21); +// Java stack pointer +REGISTER_DECLARATION(Register, esp, r20); + +// TODO : x86 uses rbp to save SP in method handle code +// we may need to do the same with fp +// JSR 292 fixed register usages: +//REGISTER_DECLARATION(Register, r_mh_SP_save, r29); + +#define assert_cond(ARG1) assert(ARG1, #ARG1) + +namespace asm_util { + uint32_t encode_logical_immediate(bool is32, uint64_t imm); +}; + +using namespace asm_util; + + +class Assembler; + +class Instruction_aarch64 { + unsigned insn; +#ifdef ASSERT + unsigned bits; +#endif + Assembler *assem; + +public: + + Instruction_aarch64(class Assembler *as) { +#ifdef ASSERT + bits = 0; +#endif + insn = 0; + assem = as; + } + + inline ~Instruction_aarch64(); + + unsigned &get_insn() { return insn; } +#ifdef ASSERT + unsigned &get_bits() { return bits; } +#endif + + static inline int32_t extend(unsigned val, int hi = 31, int lo = 0) { + union { + unsigned u; + int n; + }; + + u = val << (31 - hi); + n = n >> (31 - hi + lo); + return n; + } + + static inline uint32_t extract(uint32_t val, int msb, int lsb) { + int nbits = msb - lsb + 1; + assert_cond(msb >= lsb); + uint32_t mask = (1U << nbits) - 1; + uint32_t result = val >> lsb; + result &= mask; + return result; + } + + static inline int32_t sextract(uint32_t val, int msb, int lsb) { + uint32_t uval = extract(val, msb, lsb); + return extend(uval, msb - lsb); + } + + static void patch(address a, int msb, int lsb, unsigned long val) { + int nbits = msb - lsb + 1; + guarantee(val < (1U << nbits), "Field too big for insn"); + assert_cond(msb >= lsb); + unsigned mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + unsigned target = *(unsigned *)a; + target &= ~mask; + target |= val; + *(unsigned *)a = target; + } + + static void spatch(address a, int msb, int lsb, long val) { + int nbits = msb - lsb + 1; + long chk = val >> (nbits - 1); + guarantee (chk == -1 || chk == 0, "Field too big for insn"); + unsigned uval = val; + unsigned mask = (1U << nbits) - 1; + uval &= mask; + uval <<= lsb; + mask <<= lsb; + unsigned target = *(unsigned *)a; + target &= ~mask; + target |= uval; + *(unsigned *)a = target; + } + + void f(unsigned val, int msb, int lsb) { + int nbits = msb - lsb + 1; + guarantee(val < (1U << nbits), "Field too big for insn"); + assert_cond(msb >= lsb); + unsigned mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + insn |= val; + assert_cond((bits & mask) == 0); +#ifdef ASSERT + bits |= mask; +#endif + } + + void f(unsigned val, int bit) { + f(val, bit, bit); + } + + void sf(long val, int msb, int lsb) { + int nbits = msb - lsb + 1; + long chk = val >> (nbits - 1); + guarantee (chk == -1 || chk == 0, "Field too big for insn"); + unsigned uval = val; + unsigned mask = (1U << nbits) - 1; + uval &= mask; + f(uval, lsb + nbits - 1, lsb); + } + + void rf(Register r, int lsb) { + f(r->encoding_nocheck(), lsb + 4, lsb); + } + + // reg|ZR + void zrf(Register r, int lsb) { + f(r->encoding_nocheck() - (r == zr), lsb + 4, lsb); + } + + // reg|SP + void srf(Register r, int lsb) { + f(r == sp ? 31 : r->encoding_nocheck(), lsb + 4, lsb); + } + + void rf(FloatRegister r, int lsb) { + f(r->encoding_nocheck(), lsb + 4, lsb); + } + + unsigned get(int msb = 31, int lsb = 0) { + int nbits = msb - lsb + 1; + unsigned mask = ((1U << nbits) - 1) << lsb; + assert_cond((bits & mask) == mask); + return (insn & mask) >> lsb; + } + + void fixed(unsigned value, unsigned mask) { + assert_cond ((mask & bits) == 0); +#ifdef ASSERT + bits |= mask; +#endif + insn |= value; + } +}; + +#define starti Instruction_aarch64 do_not_use(this); set_current(&do_not_use) + +class PrePost { + int _offset; + Register _r; +public: + PrePost(Register reg, int o) : _r(reg), _offset(o) { } + int offset() { return _offset; } + Register reg() { return _r; } +}; + +class Pre : public PrePost { +public: + Pre(Register reg, int o) : PrePost(reg, o) { } +}; +class Post : public PrePost { +public: + Post(Register reg, int o) : PrePost(reg, o) { } +}; + +namespace ext +{ + enum operation { uxtb, uxth, uxtw, uxtx, sxtb, sxth, sxtw, sxtx }; +}; + +// Addressing modes +class Address VALUE_OBJ_CLASS_SPEC { + public: + + enum mode { no_mode, base_plus_offset, pre, post, pcrel, + base_plus_offset_reg, literal }; + + // Shift and extend for base reg + reg offset addressing + class extend { + int _option, _shift; + ext::operation _op; + public: + extend() { } + extend(int s, int o, ext::operation op) : _shift(s), _option(o), _op(op) { } + int option() const{ return _option; } + int shift() const { return _shift; } + ext::operation op() const { return _op; } + }; + class uxtw : public extend { + public: + uxtw(int shift = -1): extend(shift, 0b010, ext::uxtw) { } + }; + class lsl : public extend { + public: + lsl(int shift = -1): extend(shift, 0b011, ext::uxtx) { } + }; + class sxtw : public extend { + public: + sxtw(int shift = -1): extend(shift, 0b110, ext::sxtw) { } + }; + class sxtx : public extend { + public: + sxtx(int shift = -1): extend(shift, 0b111, ext::sxtx) { } + }; + + private: + Register _base; + Register _index; + long _offset; + enum mode _mode; + extend _ext; + + RelocationHolder _rspec; + + // Typically we use AddressLiterals we want to use their rval + // However in some situations we want the lval (effect address) of + // the item. We provide a special factory for making those lvals. + bool _is_lval; + + // If the target is far we'll need to load the ea of this to a + // register to reach it. Otherwise if near we can do PC-relative + // addressing. + address _target; + + public: + Address() + : _mode(no_mode) { } + Address(Register r) + : _mode(base_plus_offset), _base(r), _offset(0), _index(noreg), _target(0) { } + Address(Register r, int o) + : _mode(base_plus_offset), _base(r), _offset(o), _index(noreg), _target(0) { } + Address(Register r, long o) + : _mode(base_plus_offset), _base(r), _offset(o), _index(noreg), _target(0) { } + Address(Register r, unsigned long o) + : _mode(base_plus_offset), _base(r), _offset(o), _index(noreg), _target(0) { } +#ifdef ASSERT + Address(Register r, ByteSize disp) + : _mode(base_plus_offset), _base(r), _offset(in_bytes(disp)), + _index(noreg), _target(0) { } +#endif + Address(Register r, Register r1, extend ext = lsl()) + : _mode(base_plus_offset_reg), _base(r), _index(r1), + _ext(ext), _offset(0), _target(0) { } + Address(Pre p) + : _mode(pre), _base(p.reg()), _offset(p.offset()) { } + Address(Post p) + : _mode(post), _base(p.reg()), _offset(p.offset()), _target(0) { } + Address(address target, RelocationHolder const& rspec) + : _mode(literal), + _rspec(rspec), + _is_lval(false), + _target(target) { } + Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type); + Address(Register base, RegisterOrConstant index, extend ext = lsl()) + : _base (base), + _ext(ext), _offset(0), _target(0) { + if (index.is_register()) { + _mode = base_plus_offset_reg; + _index = index.as_register(); + } else { + guarantee(ext.option() == ext::uxtx, "should be"); + assert(index.is_constant(), "should be"); + _mode = base_plus_offset; + _offset = index.as_constant() << ext.shift(); + } + } + + Register base() const { + guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg + | _mode == post), + "wrong mode"); + return _base; + } + long offset() const { + return _offset; + } + Register index() const { + return _index; + } + mode getMode() const { + return _mode; + } + bool uses(Register reg) const { return _base == reg || _index == reg; } + address target() const { return _target; } + const RelocationHolder& rspec() const { return _rspec; } + + void encode(Instruction_aarch64 *i) const { + i->f(0b111, 29, 27); + i->srf(_base, 5); + + switch(_mode) { + case base_plus_offset: + { + unsigned size = i->get(31, 30); + if (i->get(26, 26) && i->get(23, 23)) { + // SIMD Q Type - Size = 128 bits + assert(size == 0, "bad size"); + size = 0b100; + } + unsigned mask = (1 << size) - 1; + if (_offset < 0 || _offset & mask) + { + i->f(0b00, 25, 24); + i->f(0, 21), i->f(0b00, 11, 10); + i->sf(_offset, 20, 12); + } else { + i->f(0b01, 25, 24); + i->f(_offset >> size, 21, 10); + } + } + break; + + case base_plus_offset_reg: + { + i->f(0b00, 25, 24); + i->f(1, 21); + i->rf(_index, 16); + i->f(_ext.option(), 15, 13); + unsigned size = i->get(31, 30); + if (i->get(26, 26) && i->get(23, 23)) { + // SIMD Q Type - Size = 128 bits + assert(size == 0, "bad size"); + size = 0b100; + } + if (size == 0) // It's a byte + i->f(_ext.shift() >= 0, 12); + else { + if (_ext.shift() > 0) + assert(_ext.shift() == (int)size, "bad shift"); + i->f(_ext.shift() > 0, 12); + } + i->f(0b10, 11, 10); + } + break; + + case pre: + i->f(0b00, 25, 24); + i->f(0, 21), i->f(0b11, 11, 10); + i->sf(_offset, 20, 12); + break; + + case post: + i->f(0b00, 25, 24); + i->f(0, 21), i->f(0b01, 11, 10); + i->sf(_offset, 20, 12); + break; + + default: + ShouldNotReachHere(); + } + } + + void encode_pair(Instruction_aarch64 *i) const { + switch(_mode) { + case base_plus_offset: + i->f(0b010, 25, 23); + break; + case pre: + i->f(0b011, 25, 23); + break; + case post: + i->f(0b001, 25, 23); + break; + default: + ShouldNotReachHere(); + } + + unsigned size; // Operand shift in 32-bit words + + if (i->get(26, 26)) { // float + switch(i->get(31, 30)) { + case 0b10: + size = 2; break; + case 0b01: + size = 1; break; + case 0b00: + size = 0; break; + default: + ShouldNotReachHere(); + } + } else { + size = i->get(31, 31); + } + + size = 4 << size; + guarantee(_offset % size == 0, "bad offset"); + i->sf(_offset / size, 21, 15); + i->srf(_base, 5); + } + + void encode_nontemporal_pair(Instruction_aarch64 *i) const { + // Only base + offset is allowed + i->f(0b000, 25, 23); + unsigned size = i->get(31, 31); + size = 4 << size; + guarantee(_offset % size == 0, "bad offset"); + i->sf(_offset / size, 21, 15); + i->srf(_base, 5); + guarantee(_mode == Address::base_plus_offset, + "Bad addressing mode for non-temporal op"); + } + + void lea(MacroAssembler *, Register) const; + + static bool offset_ok_for_immed(long offset, int shift = 0) { + unsigned mask = (1 << shift) - 1; + if (offset < 0 || offset & mask) { + return (uabs(offset) < (1 << (20 - 12))); // Unscaled offset + } else { + return ((offset >> shift) < (1 << (21 - 10 + 1))); // Scaled, unsigned offset + } + } +}; + +// Convience classes +class RuntimeAddress: public Address { + + public: + + RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {} + +}; + +class OopAddress: public Address { + + public: + + OopAddress(address target) : Address(target, relocInfo::oop_type){} + +}; + +class ExternalAddress: public Address { + private: + static relocInfo::relocType reloc_for_target(address target) { + // Sometimes ExternalAddress is used for values which aren't + // exactly addresses, like the card table base. + // external_word_type can't be used for values in the first page + // so just skip the reloc in that case. + return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; + } + + public: + + ExternalAddress(address target) : Address(target, reloc_for_target(target)) {} + +}; + +class InternalAddress: public Address { + + public: + + InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {} +}; + +const int FPUStateSizeInWords = 32 * 2; +typedef enum { + PLDL1KEEP = 0b00000, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM, + PSTL1KEEP = 0b10000, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM, + PLIL1KEEP = 0b01000, PLIL1STRM, PLIL2KEEP, PLIL2STRM, PLIL3KEEP, PLIL3STRM +} prfop; + +class Assembler : public AbstractAssembler { + +#ifndef PRODUCT + static const unsigned long asm_bp; + + void emit_long(jint x) { + if ((unsigned long)pc() == asm_bp) + asm volatile ("nop"); + AbstractAssembler::emit_int32(x); + } +#else + void emit_long(jint x) { + AbstractAssembler::emit_int32(x); + } +#endif + +public: + + enum { instruction_size = 4 }; + + Address adjust(Register base, int offset, bool preIncrement) { + if (preIncrement) + return Address(Pre(base, offset)); + else + return Address(Post(base, offset)); + } + + Address pre(Register base, int offset) { + return adjust(base, offset, true); + } + + Address post (Register base, int offset) { + return adjust(base, offset, false); + } + + Instruction_aarch64* current; + + void set_current(Instruction_aarch64* i) { current = i; } + + void f(unsigned val, int msb, int lsb) { + current->f(val, msb, lsb); + } + void f(unsigned val, int msb) { + current->f(val, msb, msb); + } + void sf(long val, int msb, int lsb) { + current->sf(val, msb, lsb); + } + void rf(Register reg, int lsb) { + current->rf(reg, lsb); + } + void srf(Register reg, int lsb) { + current->srf(reg, lsb); + } + void zrf(Register reg, int lsb) { + current->zrf(reg, lsb); + } + void rf(FloatRegister reg, int lsb) { + current->rf(reg, lsb); + } + void fixed(unsigned value, unsigned mask) { + current->fixed(value, mask); + } + + void emit() { + emit_long(current->get_insn()); + assert_cond(current->get_bits() == 0xffffffff); + current = NULL; + } + + typedef void (Assembler::* uncond_branch_insn)(address dest); + typedef void (Assembler::* compare_and_branch_insn)(Register Rt, address dest); + typedef void (Assembler::* test_and_branch_insn)(Register Rt, int bitpos, address dest); + typedef void (Assembler::* prefetch_insn)(address target, prfop); + + void wrap_label(Label &L, uncond_branch_insn insn); + void wrap_label(Register r, Label &L, compare_and_branch_insn insn); + void wrap_label(Register r, int bitpos, Label &L, test_and_branch_insn insn); + void wrap_label(Label &L, prfop, prefetch_insn insn); + + // PC-rel. addressing + + void adr(Register Rd, address dest); + void _adrp(Register Rd, address dest); + + void adr(Register Rd, const Address &dest); + void _adrp(Register Rd, const Address &dest); + + void adr(Register Rd, Label &L) { + wrap_label(Rd, L, &Assembler::Assembler::adr); + } + void _adrp(Register Rd, Label &L) { + wrap_label(Rd, L, &Assembler::_adrp); + } + + void adrp(Register Rd, const Address &dest, unsigned long &offset); + +#undef INSN + + void add_sub_immediate(Register Rd, Register Rn, unsigned uimm, int op, + int negated_op); + + // Add/subtract (immediate) +#define INSN(NAME, decode, negated) \ + void NAME(Register Rd, Register Rn, unsigned imm, unsigned shift) { \ + starti; \ + f(decode, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10); \ + zrf(Rd, 0), srf(Rn, 5); \ + } \ + \ + void NAME(Register Rd, Register Rn, unsigned imm) { \ + starti; \ + add_sub_immediate(Rd, Rn, imm, decode, negated); \ + } + + INSN(addsw, 0b001, 0b011); + INSN(subsw, 0b011, 0b001); + INSN(adds, 0b101, 0b111); + INSN(subs, 0b111, 0b101); + +#undef INSN + +#define INSN(NAME, decode, negated) \ + void NAME(Register Rd, Register Rn, unsigned imm) { \ + starti; \ + add_sub_immediate(Rd, Rn, imm, decode, negated); \ + } + + INSN(addw, 0b000, 0b010); + INSN(subw, 0b010, 0b000); + INSN(add, 0b100, 0b110); + INSN(sub, 0b110, 0b100); + +#undef INSN + + // Logical (immediate) +#define INSN(NAME, decode, is32) \ + void NAME(Register Rd, Register Rn, uint64_t imm) { \ + starti; \ + uint32_t val = encode_logical_immediate(is32, imm); \ + f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10); \ + srf(Rd, 0), zrf(Rn, 5); \ + } + + INSN(andw, 0b000, true); + INSN(orrw, 0b001, true); + INSN(eorw, 0b010, true); + INSN(andr, 0b100, false); + INSN(orr, 0b101, false); + INSN(eor, 0b110, false); + +#undef INSN + +#define INSN(NAME, decode, is32) \ + void NAME(Register Rd, Register Rn, uint64_t imm) { \ + starti; \ + uint32_t val = encode_logical_immediate(is32, imm); \ + f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10); \ + zrf(Rd, 0), zrf(Rn, 5); \ + } + + INSN(ands, 0b111, false); + INSN(andsw, 0b011, true); + +#undef INSN + + // Move wide (immediate) +#define INSN(NAME, opcode) \ + void NAME(Register Rd, unsigned imm, unsigned shift = 0) { \ + assert_cond((shift/16)*16 == shift); \ + starti; \ + f(opcode, 31, 29), f(0b100101, 28, 23), f(shift/16, 22, 21), \ + f(imm, 20, 5); \ + rf(Rd, 0); \ + } + + INSN(movnw, 0b000); + INSN(movzw, 0b010); + INSN(movkw, 0b011); + INSN(movn, 0b100); + INSN(movz, 0b110); + INSN(movk, 0b111); + +#undef INSN + + // Bitfield +#define INSN(NAME, opcode) \ + void NAME(Register Rd, Register Rn, unsigned immr, unsigned imms) { \ + starti; \ + f(opcode, 31, 22), f(immr, 21, 16), f(imms, 15, 10); \ + rf(Rn, 5), rf(Rd, 0); \ + } + + INSN(sbfmw, 0b0001001100); + INSN(bfmw, 0b0011001100); + INSN(ubfmw, 0b0101001100); + INSN(sbfm, 0b1001001101); + INSN(bfm, 0b1011001101); + INSN(ubfm, 0b1101001101); + +#undef INSN + + // Extract +#define INSN(NAME, opcode) \ + void NAME(Register Rd, Register Rn, Register Rm, unsigned imms) { \ + starti; \ + f(opcode, 31, 21), f(imms, 15, 10); \ + rf(Rm, 16), rf(Rn, 5), rf(Rd, 0); \ + } + + INSN(extrw, 0b00010011100); + INSN(extr, 0b10010011110); + +#undef INSN + + // The maximum range of a branch is fixed for the AArch64 + // architecture. In debug mode we shrink it in order to test + // trampolines, but not so small that branches in the interpreter + // are out of range. + static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); + + static bool reachable_from_branch_at(address branch, address target) { + return uabs(target - branch) < branch_range; + } + + // Unconditional branch (immediate) + +#define INSN(NAME, opcode) \ + void NAME(address dest) { \ + starti; \ + long offset = (dest - pc()) >> 2; \ + DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \ + f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0); \ + } \ + void NAME(Label &L) { \ + wrap_label(L, &Assembler::NAME); \ + } \ + void NAME(const Address &dest); + + INSN(b, 0); + INSN(bl, 1); + +#undef INSN + + // Compare & branch (immediate) +#define INSN(NAME, opcode) \ + void NAME(Register Rt, address dest) { \ + long offset = (dest - pc()) >> 2; \ + starti; \ + f(opcode, 31, 24), sf(offset, 23, 5), rf(Rt, 0); \ + } \ + void NAME(Register Rt, Label &L) { \ + wrap_label(Rt, L, &Assembler::NAME); \ + } + + INSN(cbzw, 0b00110100); + INSN(cbnzw, 0b00110101); + INSN(cbz, 0b10110100); + INSN(cbnz, 0b10110101); + +#undef INSN + + // Test & branch (immediate) +#define INSN(NAME, opcode) \ + void NAME(Register Rt, int bitpos, address dest) { \ + long offset = (dest - pc()) >> 2; \ + int b5 = bitpos >> 5; \ + bitpos &= 0x1f; \ + starti; \ + f(b5, 31), f(opcode, 30, 24), f(bitpos, 23, 19), sf(offset, 18, 5); \ + rf(Rt, 0); \ + } \ + void NAME(Register Rt, int bitpos, Label &L) { \ + wrap_label(Rt, bitpos, L, &Assembler::NAME); \ + } + + INSN(tbz, 0b0110110); + INSN(tbnz, 0b0110111); + +#undef INSN + + // Conditional branch (immediate) + enum Condition + {EQ, NE, HS, CS=HS, LO, CC=LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV}; + + void br(Condition cond, address dest) { + long offset = (dest - pc()) >> 2; + starti; + f(0b0101010, 31, 25), f(0, 24), sf(offset, 23, 5), f(0, 4), f(cond, 3, 0); + } + +#define INSN(NAME, cond) \ + void NAME(address dest) { \ + br(cond, dest); \ + } + + INSN(beq, EQ); + INSN(bne, NE); + INSN(bhs, HS); + INSN(bcs, CS); + INSN(blo, LO); + INSN(bcc, CC); + INSN(bmi, MI); + INSN(bpl, PL); + INSN(bvs, VS); + INSN(bvc, VC); + INSN(bhi, HI); + INSN(bls, LS); + INSN(bge, GE); + INSN(blt, LT); + INSN(bgt, GT); + INSN(ble, LE); + INSN(bal, AL); + INSN(bnv, NV); + + void br(Condition cc, Label &L); + +#undef INSN + + // Exception generation + void generate_exception(int opc, int op2, int LL, unsigned imm) { + starti; + f(0b11010100, 31, 24); + f(opc, 23, 21), f(imm, 20, 5), f(op2, 4, 2), f(LL, 1, 0); + } + +#define INSN(NAME, opc, op2, LL) \ + void NAME(unsigned imm) { \ + generate_exception(opc, op2, LL, imm); \ + } + + INSN(svc, 0b000, 0, 0b01); + INSN(hvc, 0b000, 0, 0b10); + INSN(smc, 0b000, 0, 0b11); + INSN(brk, 0b001, 0, 0b00); + INSN(hlt, 0b010, 0, 0b00); + INSN(dpcs1, 0b101, 0, 0b01); + INSN(dpcs2, 0b101, 0, 0b10); + INSN(dpcs3, 0b101, 0, 0b11); + +#undef INSN + + // System + void system(int op0, int op1, int CRn, int CRm, int op2, + Register rt = dummy_reg) + { + starti; + f(0b11010101000, 31, 21); + f(op0, 20, 19); + f(op1, 18, 16); + f(CRn, 15, 12); + f(CRm, 11, 8); + f(op2, 7, 5); + rf(rt, 0); + } + + void hint(int imm) { + system(0b00, 0b011, 0b0010, imm, 0b000); + } + + void nop() { + hint(0); + } + // we only provide mrs and msr for the special purpose system + // registers where op1 (instr[20:19]) == 11 and, (currently) only + // use it for FPSR n.b msr has L (instr[21]) == 0 mrs has L == 1 + + void msr(int op1, int CRn, int CRm, int op2, Register rt) { + starti; + f(0b1101010100011, 31, 19); + f(op1, 18, 16); + f(CRn, 15, 12); + f(CRm, 11, 8); + f(op2, 7, 5); + // writing zr is ok + zrf(rt, 0); + } + + void mrs(int op1, int CRn, int CRm, int op2, Register rt) { + starti; + f(0b1101010100111, 31, 19); + f(op1, 18, 16); + f(CRn, 15, 12); + f(CRm, 11, 8); + f(op2, 7, 5); + // reading to zr is a mistake + rf(rt, 0); + } + + enum barrier {OSHLD = 0b0001, OSHST, OSH, NSHLD=0b0101, NSHST, NSH, + ISHLD = 0b1001, ISHST, ISH, LD=0b1101, ST, SY}; + + void dsb(barrier imm) { + system(0b00, 0b011, 0b00011, imm, 0b100); + } + + void dmb(barrier imm) { + system(0b00, 0b011, 0b00011, imm, 0b101); + } + + void isb() { + system(0b00, 0b011, 0b00011, SY, 0b110); + } + + void sys(int op1, int CRn, int CRm, int op2, + Register rt = (Register)0b11111) { + system(0b01, op1, CRn, CRm, op2, rt); + } + + // Only implement operations accessible from EL0 or higher, i.e., + // op1 CRn CRm op2 + // IC IVAU 3 7 5 1 + // DC CVAC 3 7 10 1 + // DC CVAU 3 7 11 1 + // DC CIVAC 3 7 14 1 + // DC ZVA 3 7 4 1 + // So only deal with the CRm field. + enum icache_maintenance {IVAU = 0b0101}; + enum dcache_maintenance {CVAC = 0b1010, CVAU = 0b1011, CIVAC = 0b1110, ZVA = 0b100}; + + void dc(dcache_maintenance cm, Register Rt) { + sys(0b011, 0b0111, cm, 0b001, Rt); + } + + void ic(icache_maintenance cm, Register Rt) { + sys(0b011, 0b0111, cm, 0b001, Rt); + } + + // A more convenient access to dmb for our purposes + enum Membar_mask_bits { + // We can use ISH for a barrier because the ARM ARM says "This + // architecture assumes that all Processing Elements that use the + // same operating system or hypervisor are in the same Inner + // Shareable shareability domain." + StoreStore = ISHST, + LoadStore = ISHLD, + LoadLoad = ISHLD, + StoreLoad = ISH, + AnyAny = ISH + }; + + void membar(Membar_mask_bits order_constraint) { + dmb(Assembler::barrier(order_constraint)); + } + + // Unconditional branch (register) + void branch_reg(Register R, int opc) { + starti; + f(0b1101011, 31, 25); + f(opc, 24, 21); + f(0b11111000000, 20, 10); + rf(R, 5); + f(0b00000, 4, 0); + } + +#define INSN(NAME, opc) \ + void NAME(Register R) { \ + branch_reg(R, opc); \ + } + + INSN(br, 0b0000); + INSN(blr, 0b0001); + INSN(ret, 0b0010); + + void ret(void *p); // This forces a compile-time error for ret(0) + +#undef INSN + +#define INSN(NAME, opc) \ + void NAME() { \ + branch_reg(dummy_reg, opc); \ + } + + INSN(eret, 0b0100); + INSN(drps, 0b0101); + +#undef INSN + + // Load/store exclusive + enum operand_size { byte, halfword, word, xword }; + + void load_store_exclusive(Register Rs, Register Rt1, Register Rt2, + Register Rn, enum operand_size sz, int op, bool ordered) { + starti; + f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21); + rf(Rs, 16), f(ordered, 15), rf(Rt2, 10), srf(Rn, 5), zrf(Rt1, 0); + } + + void load_exclusive(Register dst, Register addr, + enum operand_size sz, bool ordered) { + load_store_exclusive(dummy_reg, dst, dummy_reg, addr, + sz, 0b010, ordered); + } + + void store_exclusive(Register status, Register new_val, Register addr, + enum operand_size sz, bool ordered) { + load_store_exclusive(status, new_val, dummy_reg, addr, + sz, 0b000, ordered); + } + +#define INSN4(NAME, sz, op, o0) /* Four registers */ \ + void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) { \ + guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \ + load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0); \ + } + +#define INSN3(NAME, sz, op, o0) /* Three registers */ \ + void NAME(Register Rs, Register Rt, Register Rn) { \ + guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ + load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \ + } + +#define INSN2(NAME, sz, op, o0) /* Two registers */ \ + void NAME(Register Rt, Register Rn) { \ + load_store_exclusive(dummy_reg, Rt, dummy_reg, \ + Rn, sz, op, o0); \ + } + +#define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \ + void NAME(Register Rt1, Register Rt2, Register Rn) { \ + guarantee(Rt1 != Rt2, "unpredictable instruction"); \ + load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0); \ + } + + // bytes + INSN3(stxrb, byte, 0b000, 0); + INSN3(stlxrb, byte, 0b000, 1); + INSN2(ldxrb, byte, 0b010, 0); + INSN2(ldaxrb, byte, 0b010, 1); + INSN2(stlrb, byte, 0b100, 1); + INSN2(ldarb, byte, 0b110, 1); + + // halfwords + INSN3(stxrh, halfword, 0b000, 0); + INSN3(stlxrh, halfword, 0b000, 1); + INSN2(ldxrh, halfword, 0b010, 0); + INSN2(ldaxrh, halfword, 0b010, 1); + INSN2(stlrh, halfword, 0b100, 1); + INSN2(ldarh, halfword, 0b110, 1); + + // words + INSN3(stxrw, word, 0b000, 0); + INSN3(stlxrw, word, 0b000, 1); + INSN4(stxpw, word, 0b001, 0); + INSN4(stlxpw, word, 0b001, 1); + INSN2(ldxrw, word, 0b010, 0); + INSN2(ldaxrw, word, 0b010, 1); + INSN_FOO(ldxpw, word, 0b011, 0); + INSN_FOO(ldaxpw, word, 0b011, 1); + INSN2(stlrw, word, 0b100, 1); + INSN2(ldarw, word, 0b110, 1); + + // xwords + INSN3(stxr, xword, 0b000, 0); + INSN3(stlxr, xword, 0b000, 1); + INSN4(stxp, xword, 0b001, 0); + INSN4(stlxp, xword, 0b001, 1); + INSN2(ldxr, xword, 0b010, 0); + INSN2(ldaxr, xword, 0b010, 1); + INSN_FOO(ldxp, xword, 0b011, 0); + INSN_FOO(ldaxp, xword, 0b011, 1); + INSN2(stlr, xword, 0b100, 1); + INSN2(ldar, xword, 0b110, 1); + +#undef INSN2 +#undef INSN3 +#undef INSN4 +#undef INSN_FOO + + // 8.1 Compare and swap extensions + void lse_cas(Register Rs, Register Rt, Register Rn, + enum operand_size sz, bool a, bool r, bool not_pair) { + starti; + if (! not_pair) { // Pair + assert(sz == word || sz == xword, "invalid size"); + /* The size bit is in bit 30, not 31 */ + sz = (operand_size)(sz == word ? 0b00:0b01); + } + f(sz, 31, 30), f(0b001000, 29, 24), f(1, 23), f(a, 22), f(1, 21); + rf(Rs, 16), f(r, 15), f(0b11111, 14, 10), rf(Rn, 5), rf(Rt, 0); + } + + // CAS +#define INSN(NAME, a, r) \ + void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) { \ + assert(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ + lse_cas(Rs, Rt, Rn, sz, a, r, true); \ + } + INSN(cas, false, false) + INSN(casa, true, false) + INSN(casl, false, true) + INSN(casal, true, true) +#undef INSN + + // CASP +#define INSN(NAME, a, r) \ + void NAME(operand_size sz, Register Rs, Register Rs1, \ + Register Rt, Register Rt1, Register Rn) { \ + assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 && \ + Rs->successor() == Rs1 && Rt->successor() == Rt1 && \ + Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers"); \ + lse_cas(Rs, Rt, Rn, sz, a, r, false); \ + } + INSN(casp, false, false) + INSN(caspa, true, false) + INSN(caspl, false, true) + INSN(caspal, true, true) +#undef INSN + + // 8.1 Atomic operations + void lse_atomic(Register Rs, Register Rt, Register Rn, + enum operand_size sz, int op1, int op2, bool a, bool r) { + starti; + f(sz, 31, 30), f(0b111000, 29, 24), f(a, 23), f(r, 22), f(1, 21); + rf(Rs, 16), f(op1, 15), f(op2, 14, 12), f(0, 11, 10), rf(Rn, 5), zrf(Rt, 0); + } + +#define INSN(NAME, NAME_A, NAME_L, NAME_AL, op1, op2) \ + void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) { \ + lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, false); \ + } \ + void NAME_A(operand_size sz, Register Rs, Register Rt, Register Rn) { \ + lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, false); \ + } \ + void NAME_L(operand_size sz, Register Rs, Register Rt, Register Rn) { \ + lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, true); \ + } \ + void NAME_AL(operand_size sz, Register Rs, Register Rt, Register Rn) {\ + lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, true); \ + } + INSN(ldadd, ldadda, ldaddl, ldaddal, 0, 0b000); + INSN(ldbic, ldbica, ldbicl, ldbical, 0, 0b001); + INSN(ldeor, ldeora, ldeorl, ldeoral, 0, 0b010); + INSN(ldorr, ldorra, ldorrl, ldorral, 0, 0b011); + INSN(ldsmax, ldsmaxa, ldsmaxl, ldsmaxal, 0, 0b100); + INSN(ldsmin, ldsmina, ldsminl, ldsminal, 0, 0b101); + INSN(ldumax, ldumaxa, ldumaxl, ldumaxal, 0, 0b110); + INSN(ldumin, ldumina, lduminl, lduminal, 0, 0b111); + INSN(swp, swpa, swpl, swpal, 1, 0b000); +#undef INSN + + // Load register (literal) +#define INSN(NAME, opc, V) \ + void NAME(Register Rt, address dest) { \ + long offset = (dest - pc()) >> 2; \ + starti; \ + f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \ + sf(offset, 23, 5); \ + rf(Rt, 0); \ + } \ + void NAME(Register Rt, address dest, relocInfo::relocType rtype) { \ + InstructionMark im(this); \ + guarantee(rtype == relocInfo::internal_word_type, \ + "only internal_word_type relocs make sense here"); \ + code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); \ + NAME(Rt, dest); \ + } \ + void NAME(Register Rt, Label &L) { \ + wrap_label(Rt, L, &Assembler::NAME); \ + } + + INSN(ldrw, 0b00, 0); + INSN(ldr, 0b01, 0); + INSN(ldrsw, 0b10, 0); + +#undef INSN + +#define INSN(NAME, opc, V) \ + void NAME(FloatRegister Rt, address dest) { \ + long offset = (dest - pc()) >> 2; \ + starti; \ + f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \ + sf(offset, 23, 5); \ + rf((Register)Rt, 0); \ + } + + INSN(ldrs, 0b00, 1); + INSN(ldrd, 0b01, 1); + INSN(ldrq, 0x10, 1); + +#undef INSN + +#define INSN(NAME, opc, V) \ + void NAME(address dest, prfop op = PLDL1KEEP) { \ + long offset = (dest - pc()) >> 2; \ + starti; \ + f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \ + sf(offset, 23, 5); \ + f(op, 4, 0); \ + } \ + void NAME(Label &L, prfop op = PLDL1KEEP) { \ + wrap_label(L, op, &Assembler::NAME); \ + } + + INSN(prfm, 0b11, 0); + +#undef INSN + + // Load/store + void ld_st1(int opc, int p1, int V, int L, + Register Rt1, Register Rt2, Address adr, bool no_allocate) { + starti; + f(opc, 31, 30), f(p1, 29, 27), f(V, 26), f(L, 22); + zrf(Rt2, 10), zrf(Rt1, 0); + if (no_allocate) { + adr.encode_nontemporal_pair(current); + } else { + adr.encode_pair(current); + } + } + + // Load/store register pair (offset) +#define INSN(NAME, size, p1, V, L, no_allocate) \ + void NAME(Register Rt1, Register Rt2, Address adr) { \ + ld_st1(size, p1, V, L, Rt1, Rt2, adr, no_allocate); \ + } + + INSN(stpw, 0b00, 0b101, 0, 0, false); + INSN(ldpw, 0b00, 0b101, 0, 1, false); + INSN(ldpsw, 0b01, 0b101, 0, 1, false); + INSN(stp, 0b10, 0b101, 0, 0, false); + INSN(ldp, 0b10, 0b101, 0, 1, false); + + // Load/store no-allocate pair (offset) + INSN(stnpw, 0b00, 0b101, 0, 0, true); + INSN(ldnpw, 0b00, 0b101, 0, 1, true); + INSN(stnp, 0b10, 0b101, 0, 0, true); + INSN(ldnp, 0b10, 0b101, 0, 1, true); + +#undef INSN + +#define INSN(NAME, size, p1, V, L, no_allocate) \ + void NAME(FloatRegister Rt1, FloatRegister Rt2, Address adr) { \ + ld_st1(size, p1, V, L, (Register)Rt1, (Register)Rt2, adr, no_allocate); \ + } + + INSN(stps, 0b00, 0b101, 1, 0, false); + INSN(ldps, 0b00, 0b101, 1, 1, false); + INSN(stpd, 0b01, 0b101, 1, 0, false); + INSN(ldpd, 0b01, 0b101, 1, 1, false); + INSN(stpq, 0b10, 0b101, 1, 0, false); + INSN(ldpq, 0b10, 0b101, 1, 1, false); + +#undef INSN + + // Load/store register (all modes) + void ld_st2(Register Rt, const Address &adr, int size, int op, int V = 0) { + starti; + + f(V, 26); // general reg? + zrf(Rt, 0); + + // Encoding for literal loads is done here (rather than pushed + // down into Address::encode) because the encoding of this + // instruction is too different from all of the other forms to + // make it worth sharing. + if (adr.getMode() == Address::literal) { + assert(size == 0b10 || size == 0b11, "bad operand size in ldr"); + assert(op == 0b01, "literal form can only be used with loads"); + f(size & 0b01, 31, 30), f(0b011, 29, 27), f(0b00, 25, 24); + long offset = (adr.target() - pc()) >> 2; + sf(offset, 23, 5); + code_section()->relocate(pc(), adr.rspec()); + return; + } + + f(size, 31, 30); + f(op, 23, 22); // str + adr.encode(current); + } + +#define INSN(NAME, size, op) \ + void NAME(Register Rt, const Address &adr) { \ + ld_st2(Rt, adr, size, op); \ + } \ + + INSN(str, 0b11, 0b00); + INSN(strw, 0b10, 0b00); + INSN(strb, 0b00, 0b00); + INSN(strh, 0b01, 0b00); + + INSN(ldr, 0b11, 0b01); + INSN(ldrw, 0b10, 0b01); + INSN(ldrb, 0b00, 0b01); + INSN(ldrh, 0b01, 0b01); + + INSN(ldrsb, 0b00, 0b10); + INSN(ldrsbw, 0b00, 0b11); + INSN(ldrsh, 0b01, 0b10); + INSN(ldrshw, 0b01, 0b11); + INSN(ldrsw, 0b10, 0b10); + +#undef INSN + +#define INSN(NAME, size, op) \ + void NAME(const Address &adr, prfop pfop = PLDL1KEEP) { \ + ld_st2((Register)pfop, adr, size, op); \ + } + + INSN(prfm, 0b11, 0b10); // FIXME: PRFM should not be used with + // writeback modes, but the assembler + // doesn't enfore that. + +#undef INSN + +#define INSN(NAME, size, op) \ + void NAME(FloatRegister Rt, const Address &adr) { \ + ld_st2((Register)Rt, adr, size, op, 1); \ + } + + INSN(strd, 0b11, 0b00); + INSN(strs, 0b10, 0b00); + INSN(ldrd, 0b11, 0b01); + INSN(ldrs, 0b10, 0b01); + INSN(strq, 0b00, 0b10); + INSN(ldrq, 0x00, 0b11); + +#undef INSN + + enum shift_kind { LSL, LSR, ASR, ROR }; + + void op_shifted_reg(unsigned decode, + enum shift_kind kind, unsigned shift, + unsigned size, unsigned op) { + f(size, 31); + f(op, 30, 29); + f(decode, 28, 24); + f(shift, 15, 10); + f(kind, 23, 22); + } + + // Logical (shifted register) +#define INSN(NAME, size, op, N) \ + void NAME(Register Rd, Register Rn, Register Rm, \ + enum shift_kind kind = LSL, unsigned shift = 0) { \ + starti; \ + f(N, 21); \ + zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); \ + op_shifted_reg(0b01010, kind, shift, size, op); \ + } + + INSN(andr, 1, 0b00, 0); + INSN(orr, 1, 0b01, 0); + INSN(eor, 1, 0b10, 0); + INSN(ands, 1, 0b11, 0); + INSN(andw, 0, 0b00, 0); + INSN(orrw, 0, 0b01, 0); + INSN(eorw, 0, 0b10, 0); + INSN(andsw, 0, 0b11, 0); + + INSN(bic, 1, 0b00, 1); + INSN(orn, 1, 0b01, 1); + INSN(eon, 1, 0b10, 1); + INSN(bics, 1, 0b11, 1); + INSN(bicw, 0, 0b00, 1); + INSN(ornw, 0, 0b01, 1); + INSN(eonw, 0, 0b10, 1); + INSN(bicsw, 0, 0b11, 1); + +#undef INSN + + // Add/subtract (shifted register) +#define INSN(NAME, size, op) \ + void NAME(Register Rd, Register Rn, Register Rm, \ + enum shift_kind kind, unsigned shift = 0) { \ + starti; \ + f(0, 21); \ + assert_cond(kind != ROR); \ + zrf(Rd, 0), zrf(Rn, 5), zrf(Rm, 16); \ + op_shifted_reg(0b01011, kind, shift, size, op); \ + } + + INSN(add, 1, 0b000); + INSN(sub, 1, 0b10); + INSN(addw, 0, 0b000); + INSN(subw, 0, 0b10); + + INSN(adds, 1, 0b001); + INSN(subs, 1, 0b11); + INSN(addsw, 0, 0b001); + INSN(subsw, 0, 0b11); + +#undef INSN + + // Add/subtract (extended register) +#define INSN(NAME, op) \ + void NAME(Register Rd, Register Rn, Register Rm, \ + ext::operation option, int amount = 0) { \ + starti; \ + zrf(Rm, 16), srf(Rn, 5), srf(Rd, 0); \ + add_sub_extended_reg(op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \ + } + + void add_sub_extended_reg(unsigned op, unsigned decode, + Register Rd, Register Rn, Register Rm, + unsigned opt, ext::operation option, unsigned imm) { + guarantee(imm <= 4, "shift amount must be < 4"); + f(op, 31, 29), f(decode, 28, 24), f(opt, 23, 22), f(1, 21); + f(option, 15, 13), f(imm, 12, 10); + } + + INSN(addw, 0b000); + INSN(subw, 0b010); + INSN(add, 0b100); + INSN(sub, 0b110); + +#undef INSN + +#define INSN(NAME, op) \ + void NAME(Register Rd, Register Rn, Register Rm, \ + ext::operation option, int amount = 0) { \ + starti; \ + zrf(Rm, 16), srf(Rn, 5), zrf(Rd, 0); \ + add_sub_extended_reg(op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \ + } + + INSN(addsw, 0b001); + INSN(subsw, 0b011); + INSN(adds, 0b101); + INSN(subs, 0b111); + +#undef INSN + + // Aliases for short forms of add and sub +#define INSN(NAME) \ + void NAME(Register Rd, Register Rn, Register Rm) { \ + if (Rd == sp || Rn == sp) \ + NAME(Rd, Rn, Rm, ext::uxtx); \ + else \ + NAME(Rd, Rn, Rm, LSL); \ + } + + INSN(addw); + INSN(subw); + INSN(add); + INSN(sub); + + INSN(addsw); + INSN(subsw); + INSN(adds); + INSN(subs); + +#undef INSN + + // Add/subtract (with carry) + void add_sub_carry(unsigned op, Register Rd, Register Rn, Register Rm) { + starti; + f(op, 31, 29); + f(0b11010000, 28, 21); + f(0b000000, 15, 10); + zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); + } + + #define INSN(NAME, op) \ + void NAME(Register Rd, Register Rn, Register Rm) { \ + add_sub_carry(op, Rd, Rn, Rm); \ + } + + INSN(adcw, 0b000); + INSN(adcsw, 0b001); + INSN(sbcw, 0b010); + INSN(sbcsw, 0b011); + INSN(adc, 0b100); + INSN(adcs, 0b101); + INSN(sbc,0b110); + INSN(sbcs, 0b111); + +#undef INSN + + // Conditional compare (both kinds) + void conditional_compare(unsigned op, int o2, int o3, + Register Rn, unsigned imm5, unsigned nzcv, + unsigned cond) { + f(op, 31, 29); + f(0b11010010, 28, 21); + f(cond, 15, 12); + f(o2, 10); + f(o3, 4); + f(nzcv, 3, 0); + f(imm5, 20, 16), rf(Rn, 5); + } + +#define INSN(NAME, op) \ + void NAME(Register Rn, Register Rm, int imm, Condition cond) { \ + starti; \ + f(0, 11); \ + conditional_compare(op, 0, 0, Rn, (uintptr_t)Rm, imm, cond); \ + } \ + \ + void NAME(Register Rn, int imm5, int imm, Condition cond) { \ + starti; \ + f(1, 11); \ + conditional_compare(op, 0, 0, Rn, imm5, imm, cond); \ + } + + INSN(ccmnw, 0b001); + INSN(ccmpw, 0b011); + INSN(ccmn, 0b101); + INSN(ccmp, 0b111); + +#undef INSN + + // Conditional select + void conditional_select(unsigned op, unsigned op2, + Register Rd, Register Rn, Register Rm, + unsigned cond) { + starti; + f(op, 31, 29); + f(0b11010100, 28, 21); + f(cond, 15, 12); + f(op2, 11, 10); + zrf(Rm, 16), zrf(Rn, 5), rf(Rd, 0); + } + +#define INSN(NAME, op, op2) \ + void NAME(Register Rd, Register Rn, Register Rm, Condition cond) { \ + conditional_select(op, op2, Rd, Rn, Rm, cond); \ + } + + INSN(cselw, 0b000, 0b00); + INSN(csincw, 0b000, 0b01); + INSN(csinvw, 0b010, 0b00); + INSN(csnegw, 0b010, 0b01); + INSN(csel, 0b100, 0b00); + INSN(csinc, 0b100, 0b01); + INSN(csinv, 0b110, 0b00); + INSN(csneg, 0b110, 0b01); + +#undef INSN + + // Data processing + void data_processing(unsigned op29, unsigned opcode, + Register Rd, Register Rn) { + f(op29, 31, 29), f(0b11010110, 28, 21); + f(opcode, 15, 10); + rf(Rn, 5), rf(Rd, 0); + } + + // (1 source) +#define INSN(NAME, op29, opcode2, opcode) \ + void NAME(Register Rd, Register Rn) { \ + starti; \ + f(opcode2, 20, 16); \ + data_processing(op29, opcode, Rd, Rn); \ + } + + INSN(rbitw, 0b010, 0b00000, 0b00000); + INSN(rev16w, 0b010, 0b00000, 0b00001); + INSN(revw, 0b010, 0b00000, 0b00010); + INSN(clzw, 0b010, 0b00000, 0b00100); + INSN(clsw, 0b010, 0b00000, 0b00101); + + INSN(rbit, 0b110, 0b00000, 0b00000); + INSN(rev16, 0b110, 0b00000, 0b00001); + INSN(rev32, 0b110, 0b00000, 0b00010); + INSN(rev, 0b110, 0b00000, 0b00011); + INSN(clz, 0b110, 0b00000, 0b00100); + INSN(cls, 0b110, 0b00000, 0b00101); + +#undef INSN + + // (2 sources) +#define INSN(NAME, op29, opcode) \ + void NAME(Register Rd, Register Rn, Register Rm) { \ + starti; \ + rf(Rm, 16); \ + data_processing(op29, opcode, Rd, Rn); \ + } + + INSN(udivw, 0b000, 0b000010); + INSN(sdivw, 0b000, 0b000011); + INSN(lslvw, 0b000, 0b001000); + INSN(lsrvw, 0b000, 0b001001); + INSN(asrvw, 0b000, 0b001010); + INSN(rorvw, 0b000, 0b001011); + + INSN(udiv, 0b100, 0b000010); + INSN(sdiv, 0b100, 0b000011); + INSN(lslv, 0b100, 0b001000); + INSN(lsrv, 0b100, 0b001001); + INSN(asrv, 0b100, 0b001010); + INSN(rorv, 0b100, 0b001011); + +#undef INSN + + // (3 sources) + void data_processing(unsigned op54, unsigned op31, unsigned o0, + Register Rd, Register Rn, Register Rm, + Register Ra) { + starti; + f(op54, 31, 29), f(0b11011, 28, 24); + f(op31, 23, 21), f(o0, 15); + zrf(Rm, 16), zrf(Ra, 10), zrf(Rn, 5), zrf(Rd, 0); + } + +#define INSN(NAME, op54, op31, o0) \ + void NAME(Register Rd, Register Rn, Register Rm, Register Ra) { \ + data_processing(op54, op31, o0, Rd, Rn, Rm, Ra); \ + } + + INSN(maddw, 0b000, 0b000, 0); + INSN(msubw, 0b000, 0b000, 1); + INSN(madd, 0b100, 0b000, 0); + INSN(msub, 0b100, 0b000, 1); + INSN(smaddl, 0b100, 0b001, 0); + INSN(smsubl, 0b100, 0b001, 1); + INSN(umaddl, 0b100, 0b101, 0); + INSN(umsubl, 0b100, 0b101, 1); + +#undef INSN + +#define INSN(NAME, op54, op31, o0) \ + void NAME(Register Rd, Register Rn, Register Rm) { \ + data_processing(op54, op31, o0, Rd, Rn, Rm, (Register)31); \ + } + + INSN(smulh, 0b100, 0b010, 0); + INSN(umulh, 0b100, 0b110, 0); + +#undef INSN + + // Floating-point data-processing (1 source) + void data_processing(unsigned op31, unsigned type, unsigned opcode, + FloatRegister Vd, FloatRegister Vn) { + starti; + f(op31, 31, 29); + f(0b11110, 28, 24); + f(type, 23, 22), f(1, 21), f(opcode, 20, 15), f(0b10000, 14, 10); + rf(Vn, 5), rf(Vd, 0); + } + +#define INSN(NAME, op31, type, opcode) \ + void NAME(FloatRegister Vd, FloatRegister Vn) { \ + data_processing(op31, type, opcode, Vd, Vn); \ + } + +private: + INSN(i_fmovs, 0b000, 0b00, 0b000000); +public: + INSN(fabss, 0b000, 0b00, 0b000001); + INSN(fnegs, 0b000, 0b00, 0b000010); + INSN(fsqrts, 0b000, 0b00, 0b000011); + INSN(fcvts, 0b000, 0b00, 0b000101); // Single-precision to double-precision + +private: + INSN(i_fmovd, 0b000, 0b01, 0b000000); +public: + INSN(fabsd, 0b000, 0b01, 0b000001); + INSN(fnegd, 0b000, 0b01, 0b000010); + INSN(fsqrtd, 0b000, 0b01, 0b000011); + INSN(fcvtd, 0b000, 0b01, 0b000100); // Double-precision to single-precision + + void fmovd(FloatRegister Vd, FloatRegister Vn) { + assert(Vd != Vn, "should be"); + i_fmovd(Vd, Vn); + } + + void fmovs(FloatRegister Vd, FloatRegister Vn) { + assert(Vd != Vn, "should be"); + i_fmovs(Vd, Vn); + } + +#undef INSN + + // Floating-point data-processing (2 source) + void data_processing(unsigned op31, unsigned type, unsigned opcode, + FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { + starti; + f(op31, 31, 29); + f(0b11110, 28, 24); + f(type, 23, 22), f(1, 21), f(opcode, 15, 12), f(0b10, 11, 10); + rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); + } + +#define INSN(NAME, op31, type, opcode) \ + void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \ + data_processing(op31, type, opcode, Vd, Vn, Vm); \ + } + + INSN(fmuls, 0b000, 0b00, 0b0000); + INSN(fdivs, 0b000, 0b00, 0b0001); + INSN(fadds, 0b000, 0b00, 0b0010); + INSN(fsubs, 0b000, 0b00, 0b0011); + INSN(fnmuls, 0b000, 0b00, 0b1000); + + INSN(fmuld, 0b000, 0b01, 0b0000); + INSN(fdivd, 0b000, 0b01, 0b0001); + INSN(faddd, 0b000, 0b01, 0b0010); + INSN(fsubd, 0b000, 0b01, 0b0011); + INSN(fnmuld, 0b000, 0b01, 0b1000); + +#undef INSN + + // Floating-point data-processing (3 source) + void data_processing(unsigned op31, unsigned type, unsigned o1, unsigned o0, + FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, + FloatRegister Va) { + starti; + f(op31, 31, 29); + f(0b11111, 28, 24); + f(type, 23, 22), f(o1, 21), f(o0, 15); + rf(Vm, 16), rf(Va, 10), rf(Vn, 5), rf(Vd, 0); + } + +#define INSN(NAME, op31, type, o1, o0) \ + void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, \ + FloatRegister Va) { \ + data_processing(op31, type, o1, o0, Vd, Vn, Vm, Va); \ + } + + INSN(fmadds, 0b000, 0b00, 0, 0); + INSN(fmsubs, 0b000, 0b00, 0, 1); + INSN(fnmadds, 0b000, 0b00, 1, 0); + INSN(fnmsubs, 0b000, 0b00, 1, 1); + + INSN(fmaddd, 0b000, 0b01, 0, 0); + INSN(fmsubd, 0b000, 0b01, 0, 1); + INSN(fnmaddd, 0b000, 0b01, 1, 0); + INSN(fnmsub, 0b000, 0b01, 1, 1); + +#undef INSN + + // Floating-point conditional select + void fp_conditional_select(unsigned op31, unsigned type, + unsigned op1, unsigned op2, + Condition cond, FloatRegister Vd, + FloatRegister Vn, FloatRegister Vm) { + starti; + f(op31, 31, 29); + f(0b11110, 28, 24); + f(type, 23, 22); + f(op1, 21, 21); + f(op2, 11, 10); + f(cond, 15, 12); + rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); + } + +#define INSN(NAME, op31, type, op1, op2) \ + void NAME(FloatRegister Vd, FloatRegister Vn, \ + FloatRegister Vm, Condition cond) { \ + fp_conditional_select(op31, type, op1, op2, cond, Vd, Vn, Vm); \ + } + + INSN(fcsels, 0b000, 0b00, 0b1, 0b11); + INSN(fcseld, 0b000, 0b01, 0b1, 0b11); + +#undef INSN + + // Floating-point<->integer conversions + void float_int_convert(unsigned op31, unsigned type, + unsigned rmode, unsigned opcode, + Register Rd, Register Rn) { + starti; + f(op31, 31, 29); + f(0b11110, 28, 24); + f(type, 23, 22), f(1, 21), f(rmode, 20, 19); + f(opcode, 18, 16), f(0b000000, 15, 10); + zrf(Rn, 5), zrf(Rd, 0); + } + +#define INSN(NAME, op31, type, rmode, opcode) \ + void NAME(Register Rd, FloatRegister Vn) { \ + float_int_convert(op31, type, rmode, opcode, Rd, (Register)Vn); \ + } + + INSN(fcvtzsw, 0b000, 0b00, 0b11, 0b000); + INSN(fcvtzs, 0b100, 0b00, 0b11, 0b000); + INSN(fcvtzdw, 0b000, 0b01, 0b11, 0b000); + INSN(fcvtzd, 0b100, 0b01, 0b11, 0b000); + + INSN(fmovs, 0b000, 0b00, 0b00, 0b110); + INSN(fmovd, 0b100, 0b01, 0b00, 0b110); + + // INSN(fmovhid, 0b100, 0b10, 0b01, 0b110); + +#undef INSN + +#define INSN(NAME, op31, type, rmode, opcode) \ + void NAME(FloatRegister Vd, Register Rn) { \ + float_int_convert(op31, type, rmode, opcode, (Register)Vd, Rn); \ + } + + INSN(fmovs, 0b000, 0b00, 0b00, 0b111); + INSN(fmovd, 0b100, 0b01, 0b00, 0b111); + + INSN(scvtfws, 0b000, 0b00, 0b00, 0b010); + INSN(scvtfs, 0b100, 0b00, 0b00, 0b010); + INSN(scvtfwd, 0b000, 0b01, 0b00, 0b010); + INSN(scvtfd, 0b100, 0b01, 0b00, 0b010); + + // INSN(fmovhid, 0b100, 0b10, 0b01, 0b111); + +#undef INSN + + // Floating-point compare + void float_compare(unsigned op31, unsigned type, + unsigned op, unsigned op2, + FloatRegister Vn, FloatRegister Vm = (FloatRegister)0) { + starti; + f(op31, 31, 29); + f(0b11110, 28, 24); + f(type, 23, 22), f(1, 21); + f(op, 15, 14), f(0b1000, 13, 10), f(op2, 4, 0); + rf(Vn, 5), rf(Vm, 16); + } + + +#define INSN(NAME, op31, type, op, op2) \ + void NAME(FloatRegister Vn, FloatRegister Vm) { \ + float_compare(op31, type, op, op2, Vn, Vm); \ + } + +#define INSN1(NAME, op31, type, op, op2) \ + void NAME(FloatRegister Vn, double d) { \ + assert_cond(d == 0.0); \ + float_compare(op31, type, op, op2, Vn); \ + } + + INSN(fcmps, 0b000, 0b00, 0b00, 0b00000); + INSN1(fcmps, 0b000, 0b00, 0b00, 0b01000); + // INSN(fcmpes, 0b000, 0b00, 0b00, 0b10000); + // INSN1(fcmpes, 0b000, 0b00, 0b00, 0b11000); + + INSN(fcmpd, 0b000, 0b01, 0b00, 0b00000); + INSN1(fcmpd, 0b000, 0b01, 0b00, 0b01000); + // INSN(fcmped, 0b000, 0b01, 0b00, 0b10000); + // INSN1(fcmped, 0b000, 0b01, 0b00, 0b11000); + +#undef INSN +#undef INSN1 + + // Floating-point Move (immediate) +private: + unsigned pack(double value); + + void fmov_imm(FloatRegister Vn, double value, unsigned size) { + starti; + f(0b00011110, 31, 24), f(size, 23, 22), f(1, 21); + f(pack(value), 20, 13), f(0b10000000, 12, 5); + rf(Vn, 0); + } + +public: + + void fmovs(FloatRegister Vn, double value) { + if (value) + fmov_imm(Vn, value, 0b00); + else + fmovs(Vn, zr); + } + void fmovd(FloatRegister Vn, double value) { + if (value) + fmov_imm(Vn, value, 0b01); + else + fmovd(Vn, zr); + } + +/* SIMD extensions + * + * We just use FloatRegister in the following. They are exactly the same + * as SIMD registers. + */ + public: + + enum SIMD_Arrangement { + T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D + }; + + enum SIMD_RegVariant { + B, H, S, D, Q + }; + +#define INSN(NAME, op) \ + void NAME(FloatRegister Rt, SIMD_RegVariant T, const Address &adr) { \ + ld_st2((Register)Rt, adr, (int)T & 3, op + ((T==Q) ? 0b10:0b00), 1); \ + } \ + + INSN(ldr, 1); + INSN(str, 0); + +#undef INSN + + private: + + void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) { + starti; + f(0,31), f((int)T & 1, 30); + f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12); + f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); + } + void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, + int imm, int op1, int op2) { + starti; + f(0,31), f((int)T & 1, 30); + f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12); + f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); + } + void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, + Register Xm, int op1, int op2) { + starti; + f(0,31), f((int)T & 1, 30); + f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12); + f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); + } + + void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2) { + switch (a.getMode()) { + case Address::base_plus_offset: + guarantee(a.offset() == 0, "no offset allowed here"); + ld_st(Vt, T, a.base(), op1, op2); + break; + case Address::post: + ld_st(Vt, T, a.base(), a.offset(), op1, op2); + break; + case Address::base_plus_offset_reg: + ld_st(Vt, T, a.base(), a.index(), op1, op2); + break; + default: + ShouldNotReachHere(); + } + } + + public: + +#define INSN1(NAME, op1, op2) \ + void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) { \ + ld_st(Vt, T, a, op1, op2); \ + } + +#define INSN2(NAME, op1, op2) \ + void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \ + assert(Vt->successor() == Vt2, "Registers must be ordered"); \ + ld_st(Vt, T, a, op1, op2); \ + } + +#define INSN3(NAME, op1, op2) \ + void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ + SIMD_Arrangement T, const Address &a) { \ + assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \ + "Registers must be ordered"); \ + ld_st(Vt, T, a, op1, op2); \ + } + +#define INSN4(NAME, op1, op2) \ + void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ + FloatRegister Vt4, SIMD_Arrangement T, const Address &a) { \ + assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \ + Vt3->successor() == Vt4, "Registers must be ordered"); \ + ld_st(Vt, T, a, op1, op2); \ + } + + INSN1(ld1, 0b001100010, 0b0111); + INSN2(ld1, 0b001100010, 0b1010); + INSN3(ld1, 0b001100010, 0b0110); + INSN4(ld1, 0b001100010, 0b0010); + + INSN2(ld2, 0b001100010, 0b1000); + INSN3(ld3, 0b001100010, 0b0100); + INSN4(ld4, 0b001100010, 0b0000); + + INSN1(st1, 0b001100000, 0b0111); + INSN2(st1, 0b001100000, 0b1010); + INSN3(st1, 0b001100000, 0b0110); + INSN4(st1, 0b001100000, 0b0010); + + INSN2(st2, 0b001100000, 0b1000); + INSN3(st3, 0b001100000, 0b0100); + INSN4(st4, 0b001100000, 0b0000); + + INSN1(ld1r, 0b001101010, 0b1100); + INSN2(ld2r, 0b001101011, 0b1100); + INSN3(ld3r, 0b001101010, 0b1110); + INSN4(ld4r, 0b001101011, 0b1110); + +#undef INSN1 +#undef INSN2 +#undef INSN3 +#undef INSN4 + +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + assert(T == T8B || T == T16B, "must be T8B or T16B"); \ + f(0, 31), f((int)T & 1, 30), f(opc, 29, 21); \ + rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(eor, 0b101110001); + INSN(orr, 0b001110101); + INSN(andr, 0b001110001); + INSN(bic, 0b001110011); + INSN(bif, 0b101110111); + INSN(bit, 0b101110101); + INSN(bsl, 0b101110011); + INSN(orn, 0b001110111); + +#undef INSN + +#define INSN(NAME, opc, opc2) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \ + f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16), f(opc2, 15, 10); \ + rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(addv, 0, 0b100001); + INSN(subv, 1, 0b100001); + INSN(mulv, 0, 0b100111); + INSN(mlav, 0, 0b100101); + INSN(mlsv, 1, 0b100101); + INSN(sshl, 0, 0b010001); + INSN(ushl, 1, 0b010001); + +#undef INSN + +#define INSN(NAME, opc, opc2) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ + starti; \ + f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \ + f((int)T >> 1, 23, 22), f(opc2, 21, 10); \ + rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(absr, 0, 0b100000101110); + INSN(negr, 1, 0b100000101110); + INSN(notr, 1, 0b100000010110); + INSN(addv, 0, 0b110001101110); + INSN(cls, 0, 0b100000010010); + INSN(clz, 1, 0b100000010010); + INSN(cnt, 0, 0b100000010110); + +#undef INSN + +#define INSN(NAME, op0, cmode0) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, unsigned imm8, unsigned lsl = 0) { \ + unsigned cmode = cmode0; \ + unsigned op = op0; \ + starti; \ + assert(lsl == 0 || \ + ((T == T4H || T == T8H) && lsl == 8) || \ + ((T == T2S || T == T4S) && ((lsl >> 3) < 4)), "invalid shift"); \ + cmode |= lsl >> 2; \ + if (T == T4H || T == T8H) cmode |= 0b1000; \ + if (!(T == T4H || T == T8H || T == T2S || T == T4S)) { \ + assert(op == 0 && cmode0 == 0, "must be MOVI"); \ + cmode = 0b1110; \ + if (T == T1D || T == T2D) op = 1; \ + } \ + f(0, 31), f((int)T & 1, 30), f(op, 29), f(0b0111100000, 28, 19); \ + f(imm8 >> 5, 18, 16), f(cmode, 15, 12), f(0x01, 11, 10), f(imm8 & 0b11111, 9, 5); \ + rf(Vd, 0); \ + } + + INSN(movi, 0, 0); + INSN(orri, 0, 1); + INSN(mvni, 1, 0); + INSN(bici, 1, 1); + +#undef INSN + +#define INSN(NAME, op1, op2, op3) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); \ + f(0, 31), f((int)T & 1, 30), f(op1, 29), f(0b01110, 28, 24), f(op2, 23); \ + f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(fadd, 0, 0, 0b110101); + INSN(fdiv, 1, 0, 0b111111); + INSN(fmul, 1, 0, 0b110111); + INSN(fsub, 0, 1, 0b110101); + +#undef INSN + +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + assert(T == T4S, "arrangement must be T4S"); \ + f(0b01011110000, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(sha1c, 0b000000); + INSN(sha1m, 0b001000); + INSN(sha1p, 0b000100); + INSN(sha1su0, 0b001100); + INSN(sha256h2, 0b010100); + INSN(sha256h, 0b010000); + INSN(sha256su1, 0b011000); + +#undef INSN + +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ + starti; \ + assert(T == T4S, "arrangement must be T4S"); \ + f(0b0101111000101000, 31, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(sha1h, 0b000010); + INSN(sha1su1, 0b000110); + INSN(sha256su0, 0b001010); + +#undef INSN + +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, FloatRegister Vn) { \ + starti; \ + f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(aese, 0b0100111000101000010010); + INSN(aesd, 0b0100111000101000010110); + INSN(aesmc, 0b0100111000101000011010); + INSN(aesimc, 0b0100111000101000011110); + +#undef INSN + + void ins(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int didx, int sidx) { + starti; + assert(T != Q, "invalid register variant"); + f(0b01101110000, 31, 21), f(((didx<<1)|1)<<(int)T, 20, 16), f(0, 15); + f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); + } + + void umov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { + starti; + f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); + f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001111, 15, 10); + rf(Vn, 5), rf(Rd, 0); + } + +#define INSN(NAME, opc, opc2) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \ + starti; \ + /* The encodings for the immh:immb fields (bits 22:16) are \ + * 0001 xxx 8B/16B, shift = xxx \ + * 001x xxx 4H/8H, shift = xxxx \ + * 01xx xxx 2S/4S, shift = xxxxx \ + * 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) \ + */ \ + assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \ + f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \ + f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(shl, 0, 0b010101); + INSN(sshr, 0, 0b000001); + INSN(ushr, 1, 0b000001); + +#undef INSN + + void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { + starti; + /* The encodings for the immh:immb fields (bits 22:16) are + * 0001 xxx 8H, 8B/16b shift = xxx + * 001x xxx 4S, 4H/8H shift = xxxx + * 01xx xxx 2D, 2S/4S shift = xxxxx + * 1xxx xxx RESERVED + */ + assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement"); + assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value"); + f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16); + f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0); + } + void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { + ushll(Vd, Ta, Vn, Tb, shift); + } + + void uzp1(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T, int op = 0){ + starti; + f(0, 31), f((T & 0x1), 30), f(0b001110, 29, 24), f((T >> 1), 23, 22), f(0, 21); + rf(Vm, 16), f(0, 15), f(op, 14), f(0b0110, 13, 10), rf(Vn, 5), rf(Vd, 0); + } + void uzp2(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T){ + uzp1(Vd, Vn, Vm, T, 1); + } + + // Move from general purpose register + // mov Vd.T[index], Rn + void mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) { + starti; + f(0b01001110000, 31, 21), f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); + f(0b000111, 15, 10), rf(Xn, 5), rf(Vd, 0); + } + + // Move to general purpose register + // mov Rd, Vn.T[index] + void mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) { + starti; + f(0, 31), f((T >= T1D) ? 1:0, 30), f(0b001110000, 29, 21); + f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); + f(0b001111, 15, 10), rf(Vn, 5), rf(Xd, 0); + } + + // We do not handle the 1Q arrangement. + void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { + starti; + assert(Ta == T8H && (Tb == T8B || Tb == T16B), "Invalid Size specifier"); + f(0, 31), f(Tb & 1, 30), f(0b001110001, 29, 21), rf(Vm, 16), f(0b111000, 15, 10); + rf(Vn, 5), rf(Vd, 0); + } + void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { + pmull(Vd, Ta, Vn, Vm, Tb); + } + + void uqxtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) { + starti; + int size_b = (int)Tb >> 1; + int size_a = (int)Ta >> 1; + assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier"); + f(0, 31), f(Tb & 1, 30), f(0b101110, 29, 24), f(size_b, 23, 22); + f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0); + } + + void rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) + { + starti; + assert(T <= T8H, "must be one of T8B, T16B, T4H, T8H"); + f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24); + f(T <= T16B ? 0b00 : 0b01, 23, 22), f(0b100000000010, 21, 10); + rf(Vn, 5), rf(Vd, 0); + } + + void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs) + { + starti; + assert(T != T1D, "reserved encoding"); + f(0,31), f((int)T & 1, 30), f(0b001110000, 29, 21); + f((1 << (T >> 1)), 20, 16), f(0b000011, 15, 10), rf(Xs, 5), rf(Vd, 0); + } + + void dup(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int index = 0) + { + starti; + assert(T != T1D, "reserved encoding"); + f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21); + f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); + f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0); + } + + // CRC32 instructions +#define INSN(NAME, sf, sz) \ + void NAME(Register Rd, Register Rn, Register Rm) { \ + starti; \ + f(sf, 31), f(0b0011010110, 30, 21), f(0b0100, 15, 12), f(sz, 11, 10); \ + rf(Rm, 16), rf(Rn, 5), rf(Rd, 0); \ + } + + INSN(crc32b, 0, 0b00); + INSN(crc32h, 0, 0b01); + INSN(crc32w, 0, 0b10); + INSN(crc32x, 1, 0b11); + +#undef INSN + + Assembler(CodeBuffer* code) : AbstractAssembler(code) { + } + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + ShouldNotCallThis(); + return RegisterOrConstant(); + } + + // Stack overflow checking + virtual void bang_stack_with_offset(int offset); + + static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm); + static bool operand_valid_for_add_sub_immediate(long imm); + static bool operand_valid_for_float_immediate(double imm); + + void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0); + void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0); +}; + +inline Assembler::Membar_mask_bits operator|(Assembler::Membar_mask_bits a, + Assembler::Membar_mask_bits b) { + return Assembler::Membar_mask_bits(unsigned(a)|unsigned(b)); +} + +Instruction_aarch64::~Instruction_aarch64() { + assem->emit(); +} + +#undef starti + +// Invert a condition +inline const Assembler::Condition operator~(const Assembler::Condition cond) { + return Assembler::Condition(int(cond) ^ 1); +} + +class BiasedLockingCounters; + +extern "C" void das(uint64_t start, int len); + +#endif // CPU_AARCH64_VM_ASSEMBLER_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/assembler_aarch64.inline.hpp 2021-01-25 19:31:26.011363748 +0000 @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_ASSEMBLER_AARCH64_INLINE_HPP +#define CPU_AARCH64_VM_ASSEMBLER_AARCH64_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +#endif // CPU_AARCH64_VM_ASSEMBLER_AARCH64_INLINE_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/bytecodeInterpreter_aarch64.cpp 2021-01-25 19:31:26.447368330 +0000 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "interpreter/bytecodeInterpreter.hpp" +#include "interpreter/bytecodeInterpreter.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#ifdef TARGET_ARCH_MODEL_x86_32 +# include "interp_masm_x86_32.hpp" +#endif +#ifdef TARGET_ARCH_MODEL_x86_64 +# include "interp_masm_x86_64.hpp" +#endif +#ifdef TARGET_ARCH_MODEL_aarch64 +# include "interp_masm_aarch64.hpp" +#endif + +#ifdef CC_INTERP + +#endif // CC_INTERP (all) --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/bytecodeInterpreter_aarch64.hpp 2021-01-25 19:31:26.878372860 +0000 @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_HPP +#define CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_HPP + +// Platform specific for C++ based Interpreter + +private: + + interpreterState _self_link; /* Previous interpreter state */ /* sometimes points to self??? */ + address _result_handler; /* temp for saving native result handler */ + intptr_t* _sender_sp; /* sender's sp before stack (locals) extension */ + + address _extra_junk1; /* temp to save on recompiles */ + address _extra_junk2; /* temp to save on recompiles */ + address _extra_junk3; /* temp to save on recompiles */ + // address dummy_for_native2; /* a native frame result handler would be here... */ + // address dummy_for_native1; /* native result type stored here in a interpreter native frame */ + address _extra_junk4; /* temp to save on recompiles */ + address _extra_junk5; /* temp to save on recompiles */ + address _extra_junk6; /* temp to save on recompiles */ +public: + // we have an interpreter frame... +inline intptr_t* sender_sp() { + return _sender_sp; +} + +// The interpreter always has the frame anchor fully setup so we don't +// have to do anything going to vm from the interpreter. On return +// we do have to clear the flags in case they we're modified to +// maintain the stack walking invariants. +// +#define SET_LAST_JAVA_FRAME() + +#define RESET_LAST_JAVA_FRAME() + +/* + * Macros for accessing the stack. + */ +#undef STACK_INT +#undef STACK_FLOAT +#undef STACK_ADDR +#undef STACK_OBJECT +#undef STACK_DOUBLE +#undef STACK_LONG + +// JavaStack Implementation + +#define GET_STACK_SLOT(offset) (*((intptr_t*) &topOfStack[-(offset)])) +#define STACK_SLOT(offset) ((address) &topOfStack[-(offset)]) +#define STACK_ADDR(offset) (*((address *) &topOfStack[-(offset)])) +#define STACK_INT(offset) (*((jint*) &topOfStack[-(offset)])) +#define STACK_FLOAT(offset) (*((jfloat *) &topOfStack[-(offset)])) +#define STACK_OBJECT(offset) (*((oop *) &topOfStack [-(offset)])) +#define STACK_DOUBLE(offset) (((VMJavaVal64*) &topOfStack[-(offset)])->d) +#define STACK_LONG(offset) (((VMJavaVal64 *) &topOfStack[-(offset)])->l) + +#define SET_STACK_SLOT(value, offset) (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value)) +#define SET_STACK_ADDR(value, offset) (*((address *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_INT(value, offset) (*((jint *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_FLOAT(value, offset) (*((jfloat *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value)) +#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = \ + ((VMJavaVal64*)(addr))->d) +#define SET_STACK_LONG(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value)) +#define SET_STACK_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = \ + ((VMJavaVal64*)(addr))->l) +// JavaLocals implementation + +#define LOCALS_SLOT(offset) ((intptr_t*)&locals[-(offset)]) +#define LOCALS_ADDR(offset) ((address)locals[-(offset)]) +#define LOCALS_INT(offset) ((jint)(locals[-(offset)])) +#define LOCALS_FLOAT(offset) (*((jfloat*)&locals[-(offset)])) +#define LOCALS_OBJECT(offset) (cast_to_oop(locals[-(offset)])) +#define LOCALS_DOUBLE(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->d) +#define LOCALS_LONG(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->l) +#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)])) +#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)])) + +#define SET_LOCALS_SLOT(value, offset) (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value)) +#define SET_LOCALS_ADDR(value, offset) (*((address *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_INT(value, offset) (*((jint *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_FLOAT(value, offset) (*((jfloat *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_OBJECT(value, offset) (*((oop *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_DOUBLE(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value)) +#define SET_LOCALS_LONG(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value)) +#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \ + ((VMJavaVal64*)(addr))->d) +#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \ + ((VMJavaVal64*)(addr))->l) + +#endif // CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/bytecodeInterpreter_aarch64.inline.hpp 2021-01-25 19:31:27.290377191 +0000 @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_INLINE_HPP +#define CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_INLINE_HPP + +// Inline interpreter functions for IA32 + +inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; } +inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; } +inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; } +inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; } +inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return fmod(op1, op2); } + +inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; } + +inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) { + return ( op1 < op2 ? -1 : + op1 > op2 ? 1 : + op1 == op2 ? 0 : + (direction == -1 || direction == 1) ? direction : 0); + +} + +inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) { + // x86 can do unaligned copies but not 64bits at a time + to[0] = from[0]; to[1] = from[1]; +} + +// The long operations depend on compiler support for "long long" on x86 + +inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) { + return op1 + op2; +} + +inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) { + return op1 & op2; +} + +inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) { + // QQQ what about check and throw... + return op1 / op2; +} + +inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) { + return op1 * op2; +} + +inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) { + return op1 | op2; +} + +inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) { + return op1 - op2; +} + +inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) { + return op1 ^ op2; +} + +inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) { + return op1 % op2; +} + +inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) { + // CVM did this 0x3f mask, is the really needed??? QQQ + return ((unsigned long long) op1) >> (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) { + return op1 >> (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) { + return op1 << (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongNeg(jlong op) { + return -op; +} + +inline jlong BytecodeInterpreter::VMlongNot(jlong op) { + return ~op; +} + +inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) { + return (op <= 0); +} + +inline int32_t BytecodeInterpreter::VMlongGez(jlong op) { + return (op >= 0); +} + +inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) { + return (op == 0); +} + +inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) { + return (op1 == op2); +} + +inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) { + return (op1 != op2); +} + +inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) { + return (op1 >= op2); +} + +inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) { + return (op1 <= op2); +} + +inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) { + return (op1 < op2); +} + +inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) { + return (op1 > op2); +} + +inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) { + return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0); +} + +// Long conversions + +inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) { + return (jdouble) val; +} + +inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) { + return (jfloat) val; +} + +inline jint BytecodeInterpreter::VMlong2Int(jlong val) { + return (jint) val; +} + +// Double Arithmetic + +inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) { + return op1 + op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) { + // Divide by zero... QQQ + return op1 / op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) { + return op1 * op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) { + return -op; +} + +inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) { + return fmod(op1, op2); +} + +inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) { + return op1 - op2; +} + +inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) { + return ( op1 < op2 ? -1 : + op1 > op2 ? 1 : + op1 == op2 ? 0 : + (direction == -1 || direction == 1) ? direction : 0); +} + +// Double Conversions + +inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) { + return (jfloat) val; +} + +// Float Conversions + +inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) { + return (jdouble) op; +} + +// Integer Arithmetic + +inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) { + return op1 + op2; +} + +inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) { + return op1 & op2; +} + +inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) { + /* it's possible we could catch this special case implicitly */ + if ((juint)op1 == 0x80000000 && op2 == -1) return op1; + else return op1 / op2; +} + +inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) { + return op1 * op2; +} + +inline jint BytecodeInterpreter::VMintNeg(jint op) { + return -op; +} + +inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) { + return op1 | op2; +} + +inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) { + /* it's possible we could catch this special case implicitly */ + if ((juint)op1 == 0x80000000 && op2 == -1) return 0; + else return op1 % op2; +} + +inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) { + return op1 << op2; +} + +inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) { + return op1 >> (op2 & 0x1f); +} + +inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) { + return op1 - op2; +} + +inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) { + return ((juint) op1) >> (op2 & 0x1f); +} + +inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) { + return op1 ^ op2; +} + +inline jdouble BytecodeInterpreter::VMint2Double(jint val) { + return (jdouble) val; +} + +inline jfloat BytecodeInterpreter::VMint2Float(jint val) { + return (jfloat) val; +} + +inline jlong BytecodeInterpreter::VMint2Long(jint val) { + return (jlong) val; +} + +inline jchar BytecodeInterpreter::VMint2Char(jint val) { + return (jchar) val; +} + +inline jshort BytecodeInterpreter::VMint2Short(jint val) { + return (jshort) val; +} + +inline jbyte BytecodeInterpreter::VMint2Byte(jint val) { + return (jbyte) val; +} + +#endif // CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_INLINE_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/bytecodes_aarch64.cpp 2021-01-25 19:31:27.735381868 +0000 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/bytecodes.hpp" + + +void Bytecodes::pd_initialize() { + // No aarch64 specific initialization +} + + +Bytecodes::Code Bytecodes::pd_base_code_for(Code code) { + // No aarch64 specific bytecodes + return code; +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/bytecodes_aarch64.hpp 2021-01-25 19:31:28.143386156 +0000 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_BYTECODES_AARCH64_HPP +#define CPU_AARCH64_VM_BYTECODES_AARCH64_HPP + +// No aarch64 specific bytecodes + +#endif // CPU_AARCH64_VM_BYTECODES_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/bytes_aarch64.hpp 2021-01-25 19:31:28.566390602 +0000 @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_BYTES_AARCH64_HPP +#define CPU_AARCH64_VM_BYTES_AARCH64_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { + public: + // Returns true if the byte ordering used by Java is different from the native byte ordering + // of the underlying machine. For example, this is true for Intel x86, but false for Solaris + // on Sparc. + static inline bool is_Java_byte_ordering_different(){ return true; } + + + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering + // (no special code is needed since x86 CPUs can access unaligned data) + static inline u2 get_native_u2(address p) { return *(u2*)p; } + static inline u4 get_native_u4(address p) { return *(u4*)p; } + static inline u8 get_native_u8(address p) { return *(u8*)p; } + + static inline void put_native_u2(address p, u2 x) { *(u2*)p = x; } + static inline void put_native_u4(address p, u4 x) { *(u4*)p = x; } + static inline void put_native_u8(address p, u8 x) { *(u8*)p = x; } + + + // Efficient reading and writing of unaligned unsigned data in Java + // byte ordering (i.e. big-endian ordering). Byte-order reversal is + // needed since x86 CPUs use little-endian format. + static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } + static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } + static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } + + + // Efficient swapping of byte ordering + static inline u2 swap_u2(u2 x); // compiler-dependent implementation + static inline u4 swap_u4(u4 x); // compiler-dependent implementation + static inline u8 swap_u8(u8 x); +}; + + +// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] + +#ifdef TARGET_OS_ARCH_linux_aarch64 +# include "bytes_linux_aarch64.inline.hpp" +#endif + +#endif // CPU_AARCH64_VM_BYTES_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp 2021-01-25 19:31:29.041395594 +0000 @@ -0,0 +1,466 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1999, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "nativeInst_aarch64.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_aarch64.inline.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#endif + + +#define __ ce->masm()-> + +float ConversionStub::float_zero = 0.0; +double ConversionStub::double_zero = 0.0; + +static Register as_reg(LIR_Opr op) { + return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); +} + +void ConversionStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + // FIXME: Agh, this is so painful + + __ enter(); + __ sub(sp, sp, 2 * wordSize); + __ push(RegSet::range(r0, r29), sp); // integer registers except lr & sp + for (int i = 30; i >= 0; i -= 2) // caller-saved fp registers + if (i < 8 || i > 15) + __ stpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(__ pre(sp, -2 * wordSize))); + + switch(bytecode()) { + case Bytecodes::_f2i: + { + if (v0 != input()->as_float_reg()) + __ fmovs(v0, input()->as_float_reg()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i)); + } + break; + case Bytecodes::_d2i: + { + if (v0 != input()->as_double_reg()) + __ fmovd(v0, input()->as_double_reg()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i)); + } + break; + case Bytecodes::_f2l: + { + if (v0 != input()->as_float_reg()) + __ fmovs(v0, input()->as_float_reg()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l)); + } + break; + case Bytecodes::_d2l: + { + if (v0 != input()->as_double_reg()) + __ fmovd(v0, input()->as_double_reg()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l)); + } + break; + default: + ShouldNotReachHere(); + } + + __ str(r0, Address(rfp, -wordSize)); + + for (int i = 0; i < 32; i += 2) + if (i < 8 || i > 15) + __ ldpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(__ post(sp, 2 * wordSize))); + __ pop(RegSet::range(r0, r29), sp); + + __ ldr(as_reg(result()), Address(rfp, -wordSize)); + __ leave(); + + __ b(_continuation); +} + +void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + ce->store_parameter(_method->as_register(), 1); + ce->store_parameter(_bci, 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, + bool throw_index_out_of_bounds_exception) + : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception) + , _index(index) +{ + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_info->deoptimize_on_exception()) { + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); + return; + } + + if (_index->is_cpu_register()) { + __ mov(rscratch1, _index->as_register()); + } else { + __ mov(rscratch1, _index->as_jint()); + } + Runtime1::StubID stub_id; + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { + stub_id = Runtime1::throw_range_check_failed_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(stub_id)), NULL, rscratch2); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + +void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } + __ bind(_entry); + __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); +#ifdef ASSERT + __ should_not_reach_here(); +#endif +} + + + +// Implementation of NewInstanceStub + +NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); + assert(stub_id == Runtime1::new_instance_id || + stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + + + +void NewInstanceStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + __ mov(r3, _klass_reg->as_register()); + __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == r0, "result must in r0,"); + __ b(_continuation); +} + + +// Implementation of NewTypeArrayStub + +// Implementation of NewTypeArrayStub + +NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + + +void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == r19, "length must in r19,"); + assert(_klass_reg->as_register() == r3, "klass_reg must in r3"); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == r0, "result must in r0"); + __ b(_continuation); +} + + +// Implementation of NewObjectArrayStub + +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _result = result; + _length = length; + _info = new CodeEmitInfo(info); +} + + +void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == r19, "length must in r19,"); + assert(_klass_reg->as_register() == r3, "klass_reg must in r3"); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == r0, "result must in r0"); + __ b(_continuation); +} +// Implementation of MonitorAccessStubs + +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) +: MonitorAccessStub(obj_reg, lock_reg) +{ + _info = new CodeEmitInfo(info); +} + + +void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_obj_reg->as_register(), 1); + ce->store_parameter(_lock_reg->as_register(), 0); + Runtime1::StubID enter_id; + if (ce->compilation()->has_fpu_code()) { + enter_id = Runtime1::monitorenter_id; + } else { + enter_id = Runtime1::monitorenter_nofpu_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + + +void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_compute_lock) { + // lock_reg was destroyed by fast unlocking attempt => recompute it + ce->monitor_address(_monitor_ix, _lock_reg); + } + ce->store_parameter(_lock_reg->as_register(), 0); + // note: non-blocking leaf routine => no call info needed + Runtime1::StubID exit_id; + if (ce->compilation()->has_fpu_code()) { + exit_id = Runtime1::monitorexit_id; + } else { + exit_id = Runtime1::monitorexit_nofpu_id; + } + __ adr(lr, _continuation); + __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id))); +} + + +// Implementation of patching: +// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) +// - Replace original code with a call to the stub +// At Runtime: +// - call to stub, jump to runtime +// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) +// - in runtime: after initializing class, restore original code, reexecute instruction + +int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; + +void PatchingStub::align_patch_site(MacroAssembler* masm) { +} + +void PatchingStub::emit_code(LIR_Assembler* ce) { + assert(false, "AArch64 should not use C1 runtime patching"); +} + + +void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); + ce->add_call_info_here(_info); + DEBUG_ONLY(__ should_not_reach_here()); +} + + +void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is probably wrong to do it here. + a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + } else { + a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); + } + + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + __ bind(_entry); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + + +void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + + __ bind(_entry); + // pass the object in a scratch register because all other registers + // must be preserved + if (_obj->is_cpu_register()) { + __ mov(rscratch1, _obj->as_register()); + } + __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, rscratch2); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} + + +void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + //---------------slow case: call to native----------------- + __ bind(_entry); + // Figure out where the args should go + // This should really convert the IntrinsicID to the Method* and signature + // but I don't know how to do that. + // + VMRegPair args[5]; + BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT}; + SharedRuntime::java_calling_convention(signature, args, 5, true); + + // push parameters + // (src, src_pos, dest, destPos, length) + Register r[5]; + r[0] = src()->as_register(); + r[1] = src_pos()->as_register(); + r[2] = dst()->as_register(); + r[3] = dst_pos()->as_register(); + r[4] = length()->as_register(); + + // next registers will get stored on the stack + for (int i = 0; i < 5 ; i++ ) { + VMReg r_1 = args[i].first(); + if (r_1->is_stack()) { + int st_off = r_1->reg2stack() * wordSize; + __ str (r[i], Address(sp, st_off)); + } else { + assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg "); + } + } + + ce->align_call(lir_static_call); + + ce->emit_static_call_stub(); + if (ce->compilation()->bailed_out()) { + return; // CodeCache is full + } + Address resolve(SharedRuntime::get_resolve_static_call_stub(), + relocInfo::static_call_type); + address call = __ trampoline_call(resolve); + if (call == NULL) { + ce->bailout("trampoline stub overflow"); + return; + } + ce->add_call_info_here(info()); + +#ifndef PRODUCT + __ lea(rscratch2, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); + __ incrementw(Address(rscratch2)); +#endif + + __ b(_continuation); +} + + +///////////////////////////////////////////////////////////////////////////// +#if INCLUDE_ALL_GCS + +void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + + __ bind(_entry); + assert(pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = pre_val()->as_register(); + + if (do_load()) { + ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/); + } + __ cbz(pre_val_reg, _continuation); + ce->store_parameter(pre_val()->as_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id))); + __ b(_continuation); +} + +jbyte* G1PostBarrierStub::_byte_map_base = NULL; + +jbyte* G1PostBarrierStub::byte_map_base_slow() { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->is_a(BarrierSet::G1SATBCTLogging), + "Must be if we're using this."); + return ((G1SATBCardTableModRefBS*)bs)->byte_map_base; +} + + +void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + assert(addr()->is_register(), "Precondition."); + assert(new_val()->is_register(), "Precondition."); + Register new_val_reg = new_val()->as_register(); + __ cbz(new_val_reg, _continuation); + ce->store_parameter(addr()->as_pointer_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id))); + __ b(_continuation); +} + +#endif // INCLUDE_ALL_GCS +///////////////////////////////////////////////////////////////////////////// + +#undef __ --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_Defs_aarch64.hpp 2021-01-25 19:31:29.489400303 +0000 @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_C1_DEFS_AARCH64_HPP +#define CPU_AARCH64_VM_C1_DEFS_AARCH64_HPP + +// native word offsets from memory address (little endian) +enum { + pd_lo_word_offset_in_bytes = 0, + pd_hi_word_offset_in_bytes = BytesPerWord +}; + +// explicit rounding operations are required to implement the strictFP mode +enum { + pd_strict_fp_requires_explicit_rounding = false +}; + +// FIXME: There are no callee-saved + +// registers +enum { + pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, // number of registers used during code emission + pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission + + pd_nof_caller_save_cpu_regs_frame_map = 19 - 2, // number of registers killed by calls + pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls + + pd_first_callee_saved_reg = 19 - 2, + pd_last_callee_saved_reg = 26 - 2, + + pd_last_allocatable_cpu_reg = 16, + + pd_nof_cpu_regs_reg_alloc + = pd_last_allocatable_cpu_reg + 1, // number of registers that are visible to register allocator + pd_nof_fpu_regs_reg_alloc = 8, // number of registers that are visible to register allocator + + pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan + pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan + pd_nof_xmm_regs_linearscan = 0, // like sparc we don't have any of these + pd_first_cpu_reg = 0, + pd_last_cpu_reg = 16, + pd_first_byte_reg = 0, + pd_last_byte_reg = 16, + pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, + pd_last_fpu_reg = pd_first_fpu_reg + 31, + + pd_first_callee_saved_fpu_reg = 8 + pd_first_fpu_reg, + pd_last_callee_saved_fpu_reg = 15 + pd_first_fpu_reg, +}; + + +// Encoding of float value in debug info. This is true on x86 where +// floats are extended to doubles when stored in the stack, false for +// AArch64 where floats and doubles are stored in their native form. +enum { + pd_float_saved_as_double = false +}; + +#endif // CPU_AARCH64_VM_C1_DEFS_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_FpuStackSim_aarch64.cpp 2021-01-25 19:31:29.921404844 +0000 @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FpuStackSim.hpp" +#include "c1/c1_FrameMap.hpp" +#include "utilities/array.hpp" +#include "utilities/ostream.hpp" + +//-------------------------------------------------------- +// FpuStackSim +//-------------------------------------------------------- + +// This class maps the FPU registers to their stack locations; it computes +// the offsets between individual registers and simulates the FPU stack. + +const int EMPTY = -1; + +int FpuStackSim::regs_at(int i) const { + assert(i >= 0 && i < FrameMap::nof_fpu_regs, "out of bounds"); + return _regs[i]; +} + +void FpuStackSim::set_regs_at(int i, int val) { + assert(i >= 0 && i < FrameMap::nof_fpu_regs, "out of bounds"); + _regs[i] = val; +} + +void FpuStackSim::dec_stack_size() { + _stack_size--; + assert(_stack_size >= 0, "FPU stack underflow"); +} + +void FpuStackSim::inc_stack_size() { + _stack_size++; + assert(_stack_size <= FrameMap::nof_fpu_regs, "FPU stack overflow"); +} + +FpuStackSim::FpuStackSim(Compilation* compilation) + : _compilation(compilation) +{ + _stack_size = 0; + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + set_regs_at(i, EMPTY); + } +} + + +void FpuStackSim::pop() { + if (TraceFPUStack) { tty->print("FPU-pop "); print(); tty->cr(); } + set_regs_at(tos_index(), EMPTY); + dec_stack_size(); +} + +void FpuStackSim::pop(int rnr) { + if (TraceFPUStack) { tty->print("FPU-pop %d", rnr); print(); tty->cr(); } + assert(regs_at(tos_index()) == rnr, "rnr is not on TOS"); + set_regs_at(tos_index(), EMPTY); + dec_stack_size(); +} + + +void FpuStackSim::push(int rnr) { + if (TraceFPUStack) { tty->print("FPU-push %d", rnr); print(); tty->cr(); } + assert(regs_at(stack_size()) == EMPTY, "should be empty"); + set_regs_at(stack_size(), rnr); + inc_stack_size(); +} + + +void FpuStackSim::swap(int offset) { + if (TraceFPUStack) { tty->print("FPU-swap %d", offset); print(); tty->cr(); } + int t = regs_at(tos_index() - offset); + set_regs_at(tos_index() - offset, regs_at(tos_index())); + set_regs_at(tos_index(), t); +} + + +int FpuStackSim::offset_from_tos(int rnr) const { + for (int i = tos_index(); i >= 0; i--) { + if (regs_at(i) == rnr) { + return tos_index() - i; + } + } + assert(false, "FpuStackSim: register not found"); + BAILOUT_("FpuStackSim: register not found", 0); +} + + +int FpuStackSim::get_slot(int tos_offset) const { + return regs_at(tos_index() - tos_offset); +} + +void FpuStackSim::set_slot(int tos_offset, int rnr) { + set_regs_at(tos_index() - tos_offset, rnr); +} + +void FpuStackSim::rename(int old_rnr, int new_rnr) { + if (TraceFPUStack) { tty->print("FPU-rename %d %d", old_rnr, new_rnr); print(); tty->cr(); } + if (old_rnr == new_rnr) + return; + bool found = false; + for (int i = 0; i < stack_size(); i++) { + assert(regs_at(i) != new_rnr, "should not see old occurrences of new_rnr on the stack"); + if (regs_at(i) == old_rnr) { + set_regs_at(i, new_rnr); + found = true; + } + } + assert(found, "should have found at least one instance of old_rnr"); +} + + +bool FpuStackSim::contains(int rnr) { + for (int i = 0; i < stack_size(); i++) { + if (regs_at(i) == rnr) { + return true; + } + } + return false; +} + +bool FpuStackSim::is_empty() { +#ifdef ASSERT + if (stack_size() == 0) { + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + assert(regs_at(i) == EMPTY, "must be empty"); + } + } +#endif + return stack_size() == 0; +} + + +bool FpuStackSim::slot_is_empty(int tos_offset) { + return (regs_at(tos_index() - tos_offset) == EMPTY); +} + + +void FpuStackSim::clear() { + if (TraceFPUStack) { tty->print("FPU-clear"); print(); tty->cr(); } + for (int i = tos_index(); i >= 0; i--) { + set_regs_at(i, EMPTY); + } + _stack_size = 0; +} + + +intArray* FpuStackSim::write_state() { + intArray* res = new intArray(1 + FrameMap::nof_fpu_regs); + (*res)[0] = stack_size(); + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + (*res)[1 + i] = regs_at(i); + } + return res; +} + + +void FpuStackSim::read_state(intArray* fpu_stack_state) { + _stack_size = (*fpu_stack_state)[0]; + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + set_regs_at(i, (*fpu_stack_state)[1 + i]); + } +} + + +#ifndef PRODUCT +void FpuStackSim::print() { + tty->print(" N=%d[", stack_size());\ + for (int i = 0; i < stack_size(); i++) { + int reg = regs_at(i); + if (reg != EMPTY) { + tty->print("%d", reg); + } else { + tty->print("_"); + } + }; + tty->print(" ]"); +} +#endif --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_FpuStackSim_aarch64.hpp 2021-01-25 19:31:30.367409531 +0000 @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_X86_VM_C1_FPUSTACKSIM_X86_HPP +#define CPU_X86_VM_C1_FPUSTACKSIM_X86_HPP + +// Simulates the FPU stack and maintains mapping [fpu-register -> stack offset] +// FPU registers are described as numbers from 0..nof_fpu_regs-1 + +class Compilation; + +class FpuStackSim VALUE_OBJ_CLASS_SPEC { + private: + Compilation* _compilation; + int _stack_size; + int _regs[FrameMap::nof_fpu_regs]; + + int tos_index() const { return _stack_size - 1; } + + int regs_at(int i) const; + void set_regs_at(int i, int val); + void dec_stack_size(); + void inc_stack_size(); + + // unified bailout support + Compilation* compilation() const { return _compilation; } + void bailout(const char* msg) const { compilation()->bailout(msg); } + bool bailed_out() const { return compilation()->bailed_out(); } + + public: + FpuStackSim(Compilation* compilation); + void pop (); + void pop (int rnr); // rnr must be on tos + void push(int rnr); + void swap(int offset); // exchange tos with tos + offset + int offset_from_tos(int rnr) const; // return the offset of the topmost instance of rnr from TOS + int get_slot(int tos_offset) const; // return the entry at the given offset from TOS + void set_slot(int tos_offset, int rnr); // set the entry at the given offset from TOS + void rename(int old_rnr, int new_rnr); // rename all instances of old_rnr to new_rnr + bool contains(int rnr); // debugging support only + bool is_empty(); + bool slot_is_empty(int tos_offset); + int stack_size() const { return _stack_size; } + void clear(); + intArray* write_state(); + void read_state(intArray* fpu_stack_state); + + void print() PRODUCT_RETURN; +}; + +#endif // CPU_X86_VM_C1_FPUSTACKSIM_X86_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_FrameMap_aarch64.cpp 2021-01-25 19:31:30.788413956 +0000 @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIR.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_aarch64.inline.hpp" + +LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); + if (r_1->is_stack()) { + // Convert stack slot to an SP offset + // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value + // so we must add it in here. + int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; + opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type)); + } else if (r_1->is_Register()) { + Register reg = r_1->as_Register(); + if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { + Register reg2 = r_2->as_Register(); +#ifdef _LP64 + assert(reg2 == reg, "must be same register"); + opr = as_long_opr(reg); +#else + opr = as_long_opr(reg2, reg); +#endif // _LP64 + } else if (type == T_OBJECT || type == T_ARRAY) { + opr = as_oop_opr(reg); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg); + } else { + opr = as_opr(reg); + } + } else if (r_1->is_FloatRegister()) { + assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); + int num = r_1->as_FloatRegister()->encoding(); + if (type == T_FLOAT) { + opr = LIR_OprFact::single_fpu(num); + } else { + opr = LIR_OprFact::double_fpu(num); + } + } else { + ShouldNotReachHere(); + } + return opr; +} + +LIR_Opr FrameMap::r0_opr; +LIR_Opr FrameMap::r1_opr; +LIR_Opr FrameMap::r2_opr; +LIR_Opr FrameMap::r3_opr; +LIR_Opr FrameMap::r4_opr; +LIR_Opr FrameMap::r5_opr; +LIR_Opr FrameMap::r6_opr; +LIR_Opr FrameMap::r7_opr; +LIR_Opr FrameMap::r8_opr; +LIR_Opr FrameMap::r9_opr; +LIR_Opr FrameMap::r10_opr; +LIR_Opr FrameMap::r11_opr; +LIR_Opr FrameMap::r12_opr; +LIR_Opr FrameMap::r13_opr; +LIR_Opr FrameMap::r14_opr; +LIR_Opr FrameMap::r15_opr; +LIR_Opr FrameMap::r16_opr; +LIR_Opr FrameMap::r17_opr; +LIR_Opr FrameMap::r18_opr; +LIR_Opr FrameMap::r19_opr; +LIR_Opr FrameMap::r20_opr; +LIR_Opr FrameMap::r21_opr; +LIR_Opr FrameMap::r22_opr; +LIR_Opr FrameMap::r23_opr; +LIR_Opr FrameMap::r24_opr; +LIR_Opr FrameMap::r25_opr; +LIR_Opr FrameMap::r26_opr; +LIR_Opr FrameMap::r27_opr; +LIR_Opr FrameMap::r28_opr; +LIR_Opr FrameMap::r29_opr; +LIR_Opr FrameMap::r30_opr; + +LIR_Opr FrameMap::rfp_opr; +LIR_Opr FrameMap::sp_opr; + +LIR_Opr FrameMap::receiver_opr; + +LIR_Opr FrameMap::r0_oop_opr; +LIR_Opr FrameMap::r1_oop_opr; +LIR_Opr FrameMap::r2_oop_opr; +LIR_Opr FrameMap::r3_oop_opr; +LIR_Opr FrameMap::r4_oop_opr; +LIR_Opr FrameMap::r5_oop_opr; +LIR_Opr FrameMap::r6_oop_opr; +LIR_Opr FrameMap::r7_oop_opr; +LIR_Opr FrameMap::r8_oop_opr; +LIR_Opr FrameMap::r9_oop_opr; +LIR_Opr FrameMap::r10_oop_opr; +LIR_Opr FrameMap::r11_oop_opr; +LIR_Opr FrameMap::r12_oop_opr; +LIR_Opr FrameMap::r13_oop_opr; +LIR_Opr FrameMap::r14_oop_opr; +LIR_Opr FrameMap::r15_oop_opr; +LIR_Opr FrameMap::r16_oop_opr; +LIR_Opr FrameMap::r17_oop_opr; +LIR_Opr FrameMap::r18_oop_opr; +LIR_Opr FrameMap::r19_oop_opr; +LIR_Opr FrameMap::r20_oop_opr; +LIR_Opr FrameMap::r21_oop_opr; +LIR_Opr FrameMap::r22_oop_opr; +LIR_Opr FrameMap::r23_oop_opr; +LIR_Opr FrameMap::r24_oop_opr; +LIR_Opr FrameMap::r25_oop_opr; +LIR_Opr FrameMap::r26_oop_opr; +LIR_Opr FrameMap::r27_oop_opr; +LIR_Opr FrameMap::r28_oop_opr; +LIR_Opr FrameMap::r29_oop_opr; +LIR_Opr FrameMap::r30_oop_opr; + +LIR_Opr FrameMap::rscratch1_opr; +LIR_Opr FrameMap::rscratch2_opr; +LIR_Opr FrameMap::rscratch1_long_opr; +LIR_Opr FrameMap::rscratch2_long_opr; + +LIR_Opr FrameMap::r0_metadata_opr; +LIR_Opr FrameMap::r1_metadata_opr; +LIR_Opr FrameMap::r2_metadata_opr; +LIR_Opr FrameMap::r3_metadata_opr; +LIR_Opr FrameMap::r4_metadata_opr; +LIR_Opr FrameMap::r5_metadata_opr; + +LIR_Opr FrameMap::long0_opr; +LIR_Opr FrameMap::long1_opr; +LIR_Opr FrameMap::fpu0_float_opr; +LIR_Opr FrameMap::fpu0_double_opr; + +LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; + +//-------------------------------------------------------- +// FrameMap +//-------------------------------------------------------- + +void FrameMap::initialize() { + assert(!_init_done, "once"); + + int i=0; + map_register(i, r0); r0_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r1); r1_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r2); r2_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r3); r3_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r4); r4_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r5); r5_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r6); r6_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r7); r7_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r10); r10_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r11); r11_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r12); r12_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r13); r13_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r14); r14_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r15); r15_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r16); r16_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r17); r17_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r18); r18_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r19); r19_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r20); r20_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r21); r21_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r22); r22_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r23); r23_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r24); r24_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r25); r25_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r26); r26_opr = LIR_OprFact::single_cpu(i); i++; + + map_register(i, r27); r27_opr = LIR_OprFact::single_cpu(i); i++; // rheapbase + map_register(i, r28); r28_opr = LIR_OprFact::single_cpu(i); i++; // rthread + map_register(i, r29); r29_opr = LIR_OprFact::single_cpu(i); i++; // rfp + map_register(i, r30); r30_opr = LIR_OprFact::single_cpu(i); i++; // lr + map_register(i, r31_sp); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp + map_register(i, r8); r8_opr = LIR_OprFact::single_cpu(i); i++; // rscratch1 + map_register(i, r9); r9_opr = LIR_OprFact::single_cpu(i); i++; // rscratch2 + + rscratch1_opr = r8_opr; + rscratch2_opr = r9_opr; + rscratch1_long_opr = LIR_OprFact::double_cpu(r8_opr->cpu_regnr(), r8_opr->cpu_regnr()); + rscratch2_long_opr = LIR_OprFact::double_cpu(r9_opr->cpu_regnr(), r9_opr->cpu_regnr()); + + long0_opr = LIR_OprFact::double_cpu(0, 0); + long1_opr = LIR_OprFact::double_cpu(1, 1); + + fpu0_float_opr = LIR_OprFact::single_fpu(0); + fpu0_double_opr = LIR_OprFact::double_fpu(0); + + _caller_save_cpu_regs[0] = r0_opr; + _caller_save_cpu_regs[1] = r1_opr; + _caller_save_cpu_regs[2] = r2_opr; + _caller_save_cpu_regs[3] = r3_opr; + _caller_save_cpu_regs[4] = r4_opr; + _caller_save_cpu_regs[5] = r5_opr; + _caller_save_cpu_regs[6] = r6_opr; + _caller_save_cpu_regs[7] = r7_opr; + // rscratch1, rscratch 2 not included + _caller_save_cpu_regs[8] = r10_opr; + _caller_save_cpu_regs[9] = r11_opr; + _caller_save_cpu_regs[10] = r12_opr; + _caller_save_cpu_regs[11] = r13_opr; + _caller_save_cpu_regs[12] = r14_opr; + _caller_save_cpu_regs[13] = r15_opr; + _caller_save_cpu_regs[14] = r16_opr; + _caller_save_cpu_regs[15] = r17_opr; + _caller_save_cpu_regs[16] = r18_opr; + + for (int i = 0; i < 8; i++) { + _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); + } + + _init_done = true; + + r0_oop_opr = as_oop_opr(r0); + r1_oop_opr = as_oop_opr(r1); + r2_oop_opr = as_oop_opr(r2); + r3_oop_opr = as_oop_opr(r3); + r4_oop_opr = as_oop_opr(r4); + r5_oop_opr = as_oop_opr(r5); + r6_oop_opr = as_oop_opr(r6); + r7_oop_opr = as_oop_opr(r7); + r8_oop_opr = as_oop_opr(r8); + r9_oop_opr = as_oop_opr(r9); + r10_oop_opr = as_oop_opr(r10); + r11_oop_opr = as_oop_opr(r11); + r12_oop_opr = as_oop_opr(r12); + r13_oop_opr = as_oop_opr(r13); + r14_oop_opr = as_oop_opr(r14); + r15_oop_opr = as_oop_opr(r15); + r16_oop_opr = as_oop_opr(r16); + r17_oop_opr = as_oop_opr(r17); + r18_oop_opr = as_oop_opr(r18); + r19_oop_opr = as_oop_opr(r19); + r20_oop_opr = as_oop_opr(r20); + r21_oop_opr = as_oop_opr(r21); + r22_oop_opr = as_oop_opr(r22); + r23_oop_opr = as_oop_opr(r23); + r24_oop_opr = as_oop_opr(r24); + r25_oop_opr = as_oop_opr(r25); + r26_oop_opr = as_oop_opr(r26); + r27_oop_opr = as_oop_opr(r27); + r28_oop_opr = as_oop_opr(r28); + r29_oop_opr = as_oop_opr(r29); + r30_oop_opr = as_oop_opr(r30); + + r0_metadata_opr = as_metadata_opr(r0); + r1_metadata_opr = as_metadata_opr(r1); + r2_metadata_opr = as_metadata_opr(r2); + r3_metadata_opr = as_metadata_opr(r3); + r4_metadata_opr = as_metadata_opr(r4); + r5_metadata_opr = as_metadata_opr(r5); + + sp_opr = as_pointer_opr(r31_sp); + rfp_opr = as_pointer_opr(rfp); + + VMRegPair regs; + BasicType sig_bt = T_OBJECT; + SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); + receiver_opr = as_oop_opr(regs.first()->as_Register()); + + for (int i = 0; i < nof_caller_save_fpu_regs; i++) { + _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); + } +} + + +Address FrameMap::make_new_address(ByteSize sp_offset) const { + // for rbp, based address use this: + // return Address(rbp, in_bytes(sp_offset) - (framesize() - 2) * 4); + return Address(sp, in_bytes(sp_offset)); +} + + +// ----------------mapping----------------------- +// all mapping is based on rfp addressing, except for simple leaf methods where we access +// the locals sp based (and no frame is built) + + +// Frame for simple leaf methods (quick entries) +// +// +----------+ +// | ret addr | <- TOS +// +----------+ +// | args | +// | ...... | + +// Frame for standard methods +// +// | .........| <- TOS +// | locals | +// +----------+ +// | old fp, | <- RFP +// +----------+ +// | ret addr | +// +----------+ +// | args | +// | .........| + + +// For OopMaps, map a local variable or spill index to an VMRegImpl name. +// This is the offset from sp() in the frame of the slot for the index, +// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.) +// +// framesize + +// stack0 stack0 0 <- VMReg +// | | | +// ...........|..............|.............| +// 0 1 2 3 x x 4 5 6 ... | <- local indices +// ^ ^ sp() ( x x indicate link +// | | and return addr) +// arguments non-argument locals + + +VMReg FrameMap::fpu_regname (int n) { + // Return the OptoReg name for the fpu stack slot "n" + // A spilled fpu stack slot comprises to two single-word OptoReg's. + return as_FloatRegister(n)->as_VMReg(); +} + +LIR_Opr FrameMap::stack_pointer() { + return FrameMap::sp_opr; +} + + +// JSR 292 +LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { + // assert(rfp == rbp_mh_SP_save, "must be same register"); + return rfp_opr; +} + + +bool FrameMap::validate_frame() { + return true; +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_FrameMap_aarch64.hpp 2021-01-25 19:31:31.210418392 +0000 @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1999, 2012, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_C1_FRAMEMAP_AARCH64_HPP +#define CPU_AARCH64_VM_C1_FRAMEMAP_AARCH64_HPP + +// On AArch64 the frame looks as follows: +// +// +-----------------------------+---------+----------------------------------------+----------------+----------- +// | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling . +// +-----------------------------+---------+----------------------------------------+----------------+----------- + + public: + static const int pd_c_runtime_reserved_arg_size; + + enum { + first_available_sp_in_frame = 0, + frame_pad_in_bytes = 16, + nof_reg_args = 8 + }; + + public: + static LIR_Opr receiver_opr; + + static LIR_Opr r0_opr; + static LIR_Opr r1_opr; + static LIR_Opr r2_opr; + static LIR_Opr r3_opr; + static LIR_Opr r4_opr; + static LIR_Opr r5_opr; + static LIR_Opr r6_opr; + static LIR_Opr r7_opr; + static LIR_Opr r8_opr; + static LIR_Opr r9_opr; + static LIR_Opr r10_opr; + static LIR_Opr r11_opr; + static LIR_Opr r12_opr; + static LIR_Opr r13_opr; + static LIR_Opr r14_opr; + static LIR_Opr r15_opr; + static LIR_Opr r16_opr; + static LIR_Opr r17_opr; + static LIR_Opr r18_opr; + static LIR_Opr r19_opr; + static LIR_Opr r20_opr; + static LIR_Opr r21_opr; + static LIR_Opr r22_opr; + static LIR_Opr r23_opr; + static LIR_Opr r24_opr; + static LIR_Opr r25_opr; + static LIR_Opr r26_opr; + static LIR_Opr r27_opr; + static LIR_Opr r28_opr; + static LIR_Opr r29_opr; + static LIR_Opr r30_opr; + static LIR_Opr rfp_opr; + static LIR_Opr sp_opr; + + static LIR_Opr r0_oop_opr; + static LIR_Opr r1_oop_opr; + static LIR_Opr r2_oop_opr; + static LIR_Opr r3_oop_opr; + static LIR_Opr r4_oop_opr; + static LIR_Opr r5_oop_opr; + static LIR_Opr r6_oop_opr; + static LIR_Opr r7_oop_opr; + static LIR_Opr r8_oop_opr; + static LIR_Opr r9_oop_opr; + static LIR_Opr r10_oop_opr; + static LIR_Opr r11_oop_opr; + static LIR_Opr r12_oop_opr; + static LIR_Opr r13_oop_opr; + static LIR_Opr r14_oop_opr; + static LIR_Opr r15_oop_opr; + static LIR_Opr r16_oop_opr; + static LIR_Opr r17_oop_opr; + static LIR_Opr r18_oop_opr; + static LIR_Opr r19_oop_opr; + static LIR_Opr r20_oop_opr; + static LIR_Opr r21_oop_opr; + static LIR_Opr r22_oop_opr; + static LIR_Opr r23_oop_opr; + static LIR_Opr r24_oop_opr; + static LIR_Opr r25_oop_opr; + static LIR_Opr r26_oop_opr; + static LIR_Opr r27_oop_opr; + static LIR_Opr r28_oop_opr; + static LIR_Opr r29_oop_opr; + static LIR_Opr r30_oop_opr; + + static LIR_Opr rscratch1_opr; + static LIR_Opr rscratch2_opr; + static LIR_Opr rscratch1_long_opr; + static LIR_Opr rscratch2_long_opr; + + static LIR_Opr r0_metadata_opr; + static LIR_Opr r1_metadata_opr; + static LIR_Opr r2_metadata_opr; + static LIR_Opr r3_metadata_opr; + static LIR_Opr r4_metadata_opr; + static LIR_Opr r5_metadata_opr; + + static LIR_Opr long0_opr; + static LIR_Opr long1_opr; + static LIR_Opr fpu0_float_opr; + static LIR_Opr fpu0_double_opr; + + static LIR_Opr as_long_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + static LIR_Opr as_pointer_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + + // VMReg name for spilled physical FPU stack slot n + static VMReg fpu_regname (int n); + + static bool is_caller_save_register (LIR_Opr opr) { return true; } + static bool is_caller_save_register (Register r) { return true; } + + static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; } + static int last_cpu_reg() { return pd_last_cpu_reg; } + static int last_byte_reg() { return pd_last_byte_reg; } + +#endif // CPU_AARCH64_VM_C1_FRAMEMAP_AARCH64_HPP + --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp 2021-01-25 19:31:31.641422922 +0000 @@ -0,0 +1,3195 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "gc_interface/collectedHeap.hpp" +#include "memory/barrierSet.hpp" +#include "memory/cardTableModRefBS.hpp" +#include "nativeInst_aarch64.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_aarch64.inline.hpp" + + + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +NEEDS_CLEANUP // remove this definitions ? +const Register IC_Klass = rscratch2; // where the IC klass is cached +const Register SYNC_header = r0; // synchronization header +const Register SHIFT_count = r0; // where count for shift operations must be + +#define __ _masm-> + + +static void select_different_registers(Register preserve, + Register extra, + Register &tmp1, + Register &tmp2) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp2 = extra; + } + assert_different_registers(preserve, tmp1, tmp2); +} + + + +static void select_different_registers(Register preserve, + Register extra, + Register &tmp1, + Register &tmp2, + Register &tmp3) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp2 = extra; + } else if (tmp3 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp3 = extra; + } + assert_different_registers(preserve, tmp1, tmp2, tmp3); +} + + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + + +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; +} + +LIR_Opr LIR_Assembler::osrBufferPointer() { + return FrameMap::as_pointer_opr(receiverOpr()->as_register()); +} + +//--------------fpu register translations----------------------- + + +address LIR_Assembler::float_constant(float f) { + address const_addr = __ float_constant(f); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + + +address LIR_Assembler::double_constant(double d) { + address const_addr = __ double_constant(d); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +address LIR_Assembler::int_constant(jlong n) { + address const_addr = __ long_constant(n); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } + +void LIR_Assembler::reset_FPU() { Unimplemented(); } + +void LIR_Assembler::fpop() { Unimplemented(); } + +void LIR_Assembler::fxch(int i) { Unimplemented(); } + +void LIR_Assembler::fld(int i) { Unimplemented(); } + +void LIR_Assembler::ffree(int i) { Unimplemented(); } + +void LIR_Assembler::breakpoint() { Unimplemented(); } + +void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } + +void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } + +bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; } +//------------------------------------------- + +static Register as_reg(LIR_Opr op) { + return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); +} + +static jlong as_long(LIR_Opr data) { + jlong result; + switch (data->type()) { + case T_INT: + result = (data->as_jint()); + break; + case T_LONG: + result = (data->as_jlong()); + break; + default: + ShouldNotReachHere(); + } + return result; +} + +Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { + Register base = addr->base()->as_pointer_register(); + LIR_Opr opr = addr->index(); + if (opr->is_cpu_register()) { + Register index; + if (opr->is_single_cpu()) + index = opr->as_register(); + else + index = opr->as_register_lo(); + assert(addr->disp() == 0, "must be"); + switch(opr->type()) { + case T_INT: + return Address(base, index, Address::sxtw(addr->scale())); + case T_LONG: + return Address(base, index, Address::lsl(addr->scale())); + default: + ShouldNotReachHere(); + } + } else { + intptr_t addr_offset = intptr_t(addr->disp()); + if (Address::offset_ok_for_immed(addr_offset, addr->scale())) + return Address(base, addr_offset, Address::lsl(addr->scale())); + else { + __ mov(tmp, addr_offset); + return Address(base, tmp, Address::lsl(addr->scale())); + } + } + return Address(); +} + +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { + ShouldNotReachHere(); + return Address(); +} + +Address LIR_Assembler::as_Address(LIR_Address* addr) { + return as_Address(addr, rscratch1); +} + +Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { + return as_Address(addr, rscratch1); // Ouch + // FIXME: This needs to be much more clever. See x86. +} + + +void LIR_Assembler::osr_entry() { + offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); + BlockBegin* osr_entry = compilation()->hir()->osr_entry(); + ValueStack* entry_state = osr_entry->state(); + int number_of_locks = entry_state->locks_size(); + + // we jump here if osr happens with the interpreter + // state set up to continue at the beginning of the + // loop that triggered osr - in particular, we have + // the following registers setup: + // + // r2: osr buffer + // + + // build frame + ciMethod* m = compilation()->method(); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + + // OSR buffer is + // + // locals[nlocals-1..0] + // monitors[0..number_of_locks] + // + // locals is a direct copy of the interpreter frame so in the osr buffer + // so first slot in the local array is the last local from the interpreter + // and last slot is local[0] (receiver) from the interpreter + // + // Similarly with locks. The first lock slot in the osr buffer is the nth lock + // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock + // in the interpreter frame (the method lock if a sync method) + + // Initialize monitors in the compiled activation. + // r2: pointer to osr buffer + // + // All other registers are dead at this point and the locals will be + // copied into place by code emitted in the IR. + + Register OSR_buf = osrBufferPointer()->as_pointer_register(); + { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); + int monitor_offset = BytesPerWord * method()->max_locals() + + (2 * BytesPerWord) * (number_of_locks - 1); + // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in + // the OSR buffer using 2 word entries: first the lock and then + // the oop. + for (int i = 0; i < number_of_locks; i++) { + int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); +#ifdef ASSERT + // verify the interpreter's monitor has a non-null object + { + Label L; + __ ldr(rscratch1, Address(OSR_buf, slot_offset + 1*BytesPerWord)); + __ cbnz(rscratch1, L); + __ stop("locked object is NULL"); + __ bind(L); + } +#endif + __ ldr(r19, Address(OSR_buf, slot_offset + 0)); + __ str(r19, frame_map()->address_for_monitor_lock(i)); + __ ldr(r19, Address(OSR_buf, slot_offset + 1*BytesPerWord)); + __ str(r19, frame_map()->address_for_monitor_object(i)); + } + } +} + + +// inline cache check; done before the frame is built. +int LIR_Assembler::check_icache() { + Register receiver = FrameMap::receiver_opr->as_register(); + Register ic_klass = IC_Klass; + int start_offset = __ offset(); + __ inline_cache_check(receiver, ic_klass); + + // if icache check fails, then jump to runtime routine + // Note: RECEIVER must still contain the receiver! + Label dont; + __ br(Assembler::EQ, dont); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + // We align the verified entry point unless the method body + // (including its inline cache check) will fit in a single 64-byte + // icache line. + if (! method()->is_accessor() || __ offset() - start_offset > 4 * 4) { + // force alignment after the cache check. + __ align(CodeEntryAlignment); + } + + __ bind(dont); + return start_offset; +} + + +void LIR_Assembler::jobject2reg(jobject o, Register reg) { + if (o == NULL) { + __ mov(reg, zr); + } else { + __ movoop(reg, o, /*immediate*/true); + } +} + +void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { + address target = NULL; + relocInfo::relocType reloc_type = relocInfo::none; + + switch (patching_id(info)) { + case PatchingStub::access_field_id: + target = Runtime1::entry_for(Runtime1::access_field_patching_id); + reloc_type = relocInfo::section_word_type; + break; + case PatchingStub::load_klass_id: + target = Runtime1::entry_for(Runtime1::load_klass_patching_id); + reloc_type = relocInfo::metadata_type; + break; + case PatchingStub::load_mirror_id: + target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); + reloc_type = relocInfo::oop_type; + break; + case PatchingStub::load_appendix_id: + target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); + reloc_type = relocInfo::oop_type; + break; + default: ShouldNotReachHere(); + } + + __ far_call(RuntimeAddress(target)); + add_call_info_here(info); +} + +void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { + deoptimize_trap(info); +} + + +// This specifies the rsp decrement needed to build the frame +int LIR_Assembler::initial_frame_size_in_bytes() const { + // if rounding, must let FrameMap know! + + // The frame_map records size in slots (32bit word) + + // subtract two words to account for return address and link + return (frame_map()->framesize() - (2*VMRegImpl::slots_per_word)) * VMRegImpl::stack_slot_size; +} + + +int LIR_Assembler::emit_exception_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(exception_handler_size); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("exception handler overflow"); + return -1; + } + + int offset = code_offset(); + + // the exception oop and pc are in r0, and r3 + // no other registers need to be preserved, so invalidate them + __ invalidate_registers(false, true, true, false, true, true); + + // check that there is really an exception + __ verify_not_null_oop(r0); + + // search an exception handler (r0: exception oop, r3: throwing pc) + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id))); + guarantee(code_offset() - offset <= exception_handler_size, "overflow"); + __ end_a_stub(); + + return offset; +} + + +// Emit the code to remove the frame from the stack in the exception +// unwind path. +int LIR_Assembler::emit_unwind_handler() { +#ifndef PRODUCT + if (CommentedAssembly) { + _masm->block_comment("Unwind handler"); + } +#endif + + int offset = code_offset(); + + // Fetch the exception from TLS and clear out exception related thread state + __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset())); + __ str(zr, Address(rthread, JavaThread::exception_oop_offset())); + __ str(zr, Address(rthread, JavaThread::exception_pc_offset())); + + __ bind(_unwind_handler_entry); + __ verify_not_null_oop(r0); + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ mov(r19, r0); // Preserve the exception + } + + // Preform needed unlocking + MonitorExitStub* stub = NULL; + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::r0_opr); + stub = new MonitorExitStub(FrameMap::r0_opr, true, 0); + __ unlock_object(r5, r4, r0, *stub->entry()); + __ bind(*stub->continuation()); + } + + if (compilation()->env()->dtrace_method_probes()) { + __ call_Unimplemented(); +#if 0 + __ movptr(Address(rsp, 0), rax); + __ mov_metadata(Address(rsp, sizeof(void*)), method()->constant_encoding()); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit))); +#endif + } + + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ mov(r0, r19); // Restore the exception + } + + // remove the activation and dispatch to the unwind handler + __ block_comment("remove_frame and dispatch to the unwind handler"); + __ remove_frame(initial_frame_size_in_bytes()); + __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id))); + + // Emit the slow path assembly + if (stub != NULL) { + stub->emit_code(this); + } + + return offset; +} + + +int LIR_Assembler::emit_deopt_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(deopt_handler_size); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("deopt handler overflow"); + return -1; + } + + int offset = code_offset(); + + __ adr(lr, pc()); + __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); + guarantee(code_offset() - offset <= deopt_handler_size, "overflow"); + __ end_a_stub(); + + return offset; +} + + +// This is the fast version of java.lang.String.compare; it has not +// OSR-entry and therefore, we generate a slow version for OSR's +void LIR_Assembler::emit_string_compare(LIR_Opr arg0, LIR_Opr arg1, LIR_Opr dst, CodeEmitInfo* info) { + __ mov(r2, (address)__FUNCTION__); + __ call_Unimplemented(); +} + + +void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { + _masm->code_section()->relocate(adr, relocInfo::poll_type); + int pc_offset = code_offset(); + flush_debug_info(pc_offset); + info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); + if (info->exception_handlers() != NULL) { + compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); + } +} + +// Rather than take a segfault when the polling page is protected, +// explicitly check for a safepoint in progress and if there is one, +// fake a call to the handler as if a segfault had been caught. +void LIR_Assembler::poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info) { + __ mov(rscratch1, SafepointSynchronize::address_of_state()); + __ ldrb(rscratch1, Address(rscratch1)); + Label nope, poll; + __ cbz(rscratch1, nope); + __ block_comment("safepoint"); + __ enter(); + __ push(0x3, sp); // r0 & r1 + __ push(0x3ffffffc, sp); // integer registers except lr & sp & r0 & r1 + __ adr(r0, poll); + __ str(r0, Address(rthread, JavaThread::saved_exception_pc_offset())); + __ mov(rscratch1, CAST_FROM_FN_PTR(address, SharedRuntime::get_poll_stub)); + __ blr(rscratch1); + __ maybe_isb(); + __ pop(0x3ffffffc, sp); // integer registers except lr & sp & r0 & r1 + __ mov(rscratch1, r0); + __ pop(0x3, sp); // r0 & r1 + __ leave(); + __ br(rscratch1); + address polling_page(os::get_polling_page()); + assert(os::is_poll_address(polling_page), "should be"); + unsigned long off; + __ adrp(rscratch1, Address(polling_page, rtype), off); + __ bind(poll); + if (info) + add_debug_info_for_branch(info); // This isn't just debug info: + // it's the oop map + else + __ code_section()->relocate(pc(), rtype); + __ ldrw(zr, Address(rscratch1, off)); + __ bind(nope); +} + +void LIR_Assembler::return_op(LIR_Opr result) { + assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,"); + // Pop the stack before the safepoint code + __ remove_frame(initial_frame_size_in_bytes()); + if (UseCompilerSafepoints) { + address polling_page(os::get_polling_page()); + __ read_polling_page(rscratch1, polling_page, relocInfo::poll_return_type); + } else { + poll_for_safepoint(relocInfo::poll_return_type); + } + __ ret(lr); +} + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { + address polling_page(os::get_polling_page()); + if (UseCompilerSafepoints) { + guarantee(info != NULL, "Shouldn't be NULL"); + assert(os::is_poll_address(polling_page), "should be"); + unsigned long off; + __ adrp(rscratch1, Address(polling_page, relocInfo::poll_type), off); + assert(off == 0, "must be"); + add_debug_info_for_branch(info); // This isn't just debug info: + // it's the oop map + __ read_polling_page(rscratch1, relocInfo::poll_type); + } else { + poll_for_safepoint(relocInfo::poll_type, info); + } + + return __ offset(); +} + + +void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { + if (from_reg == r31_sp) + from_reg = sp; + if (to_reg == r31_sp) + to_reg = sp; + __ mov(to_reg, from_reg); +} + +void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); } + + +void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + + switch (c->type()) { + case T_INT: { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ movw(dest->as_register(), c->as_jint()); + break; + } + + case T_ADDRESS: { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mov(dest->as_register(), c->as_jint()); + break; + } + + case T_LONG: { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mov(dest->as_register_lo(), (intptr_t)c->as_jlong()); + break; + } + + case T_OBJECT: { + if (patch_code == lir_patch_none) { + jobject2reg(c->as_jobject(), dest->as_register()); + } else { + jobject2reg_with_patching(dest->as_register(), info); + } + break; + } + + case T_METADATA: { + if (patch_code != lir_patch_none) { + klass2reg_with_patching(dest->as_register(), info); + } else { + __ mov_metadata(dest->as_register(), c->as_metadata()); + } + break; + } + + case T_FLOAT: { + if (__ operand_valid_for_float_immediate(c->as_jfloat())) { + __ fmovs(dest->as_float_reg(), (c->as_jfloat())); + } else { + __ adr(rscratch1, InternalAddress(float_constant(c->as_jfloat()))); + __ ldrs(dest->as_float_reg(), Address(rscratch1)); + } + break; + } + + case T_DOUBLE: { + if (__ operand_valid_for_float_immediate(c->as_jdouble())) { + __ fmovd(dest->as_double_reg(), (c->as_jdouble())); + } else { + __ adr(rscratch1, InternalAddress(double_constant(c->as_jdouble()))); + __ ldrd(dest->as_double_reg(), Address(rscratch1)); + } + break; + } + + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { + LIR_Const* c = src->as_constant_ptr(); + switch (c->type()) { + case T_OBJECT: + { + if (! c->as_jobject()) + __ str(zr, frame_map()->address_for_slot(dest->single_stack_ix())); + else { + const2reg(src, FrameMap::rscratch1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::rscratch1_opr, dest, c->type(), false); + } + } + break; + case T_ADDRESS: + { + const2reg(src, FrameMap::rscratch1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::rscratch1_opr, dest, c->type(), false); + } + case T_INT: + case T_FLOAT: + { + Register reg = zr; + if (c->as_jint_bits() == 0) + __ strw(zr, frame_map()->address_for_slot(dest->single_stack_ix())); + else { + __ movw(rscratch1, c->as_jint_bits()); + __ strw(rscratch1, frame_map()->address_for_slot(dest->single_stack_ix())); + } + } + break; + case T_LONG: + case T_DOUBLE: + { + Register reg = zr; + if (c->as_jlong_bits() == 0) + __ str(zr, frame_map()->address_for_slot(dest->double_stack_ix(), + lo_word_offset_in_bytes)); + else { + __ mov(rscratch1, (intptr_t)c->as_jlong_bits()); + __ str(rscratch1, frame_map()->address_for_slot(dest->double_stack_ix(), + lo_word_offset_in_bytes)); + } + } + break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) { + assert(src->is_constant(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + LIR_Address* to_addr = dest->as_address_ptr(); + + void (Assembler::* insn)(Register Rt, const Address &adr); + + switch (type) { + case T_ADDRESS: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::str; + break; + case T_LONG: + assert(c->as_jlong() == 0, "should be"); + insn = &Assembler::str; + break; + case T_INT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::strw; + break; + case T_OBJECT: + case T_ARRAY: + assert(c->as_jobject() == 0, "should be"); + if (UseCompressedOops && !wide) { + insn = &Assembler::strw; + } else { + insn = &Assembler::str; + } + break; + case T_CHAR: + case T_SHORT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::strh; + break; + case T_BOOLEAN: + case T_BYTE: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::strb; + break; + default: + ShouldNotReachHere(); + } + + if (info) add_debug_info_for_null_check_here(info); + (_masm->*insn)(zr, as_Address(to_addr, rscratch1)); +} + +void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { + assert(src->is_register(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + // move between cpu-registers + if (dest->is_single_cpu()) { + if (src->type() == T_LONG) { + // Can do LONG -> OBJECT + move_regs(src->as_register_lo(), dest->as_register()); + return; + } + assert(src->is_single_cpu(), "must match"); + if (src->type() == T_OBJECT) { + __ verify_oop(src->as_register()); + } + move_regs(src->as_register(), dest->as_register()); + + } else if (dest->is_double_cpu()) { + if (src->type() == T_OBJECT || src->type() == T_ARRAY) { + // Surprising to me but we can see move of a long to t_object + __ verify_oop(src->as_register()); + move_regs(src->as_register(), dest->as_register_lo()); + return; + } + assert(src->is_double_cpu(), "must match"); + Register f_lo = src->as_register_lo(); + Register f_hi = src->as_register_hi(); + Register t_lo = dest->as_register_lo(); + Register t_hi = dest->as_register_hi(); + assert(f_hi == f_lo, "must be same"); + assert(t_hi == t_lo, "must be same"); + move_regs(f_lo, t_lo); + + } else if (dest->is_single_fpu()) { + __ fmovs(dest->as_float_reg(), src->as_float_reg()); + + } else if (dest->is_double_fpu()) { + __ fmovd(dest->as_double_reg(), src->as_double_reg()); + + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { + if (src->is_single_cpu()) { + if (type == T_ARRAY || type == T_OBJECT) { + __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix())); + __ verify_oop(src->as_register()); + } else if (type == T_METADATA || type == T_DOUBLE) { + __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix())); + } else { + __ strw(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix())); + } + + } else if (src->is_double_cpu()) { + Address dest_addr_LO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes); + __ str(src->as_register_lo(), dest_addr_LO); + + } else if (src->is_single_fpu()) { + Address dest_addr = frame_map()->address_for_slot(dest->single_stack_ix()); + __ strs(src->as_float_reg(), dest_addr); + + } else if (src->is_double_fpu()) { + Address dest_addr = frame_map()->address_for_slot(dest->double_stack_ix()); + __ strd(src->as_double_reg(), dest_addr); + + } else { + ShouldNotReachHere(); + } + +} + + +void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { + LIR_Address* to_addr = dest->as_address_ptr(); + PatchingStub* patch = NULL; + Register compressed_src = rscratch1; + + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + if (type == T_ARRAY || type == T_OBJECT) { + __ verify_oop(src->as_register()); + + if (UseCompressedOops && !wide) { + __ encode_heap_oop(compressed_src, src->as_register()); + } else { + compressed_src = src->as_register(); + } + } + + int null_check_here = code_offset(); + switch (type) { + case T_FLOAT: { + __ strs(src->as_float_reg(), as_Address(to_addr)); + break; + } + + case T_DOUBLE: { + __ strd(src->as_double_reg(), as_Address(to_addr)); + break; + } + + case T_ARRAY: // fall through + case T_OBJECT: // fall through + if (UseCompressedOops && !wide) { + __ strw(compressed_src, as_Address(to_addr, rscratch2)); + } else { + __ str(compressed_src, as_Address(to_addr)); + } + break; + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be a compressed klass + // ptr or a 64 bit method pointer. + LP64_ONLY(ShouldNotReachHere()); + __ str(src->as_register(), as_Address(to_addr)); + break; + case T_ADDRESS: + __ str(src->as_register(), as_Address(to_addr)); + break; + case T_INT: + __ strw(src->as_register(), as_Address(to_addr)); + break; + + case T_LONG: { + __ str(src->as_register_lo(), as_Address_lo(to_addr)); + break; + } + + case T_BYTE: // fall through + case T_BOOLEAN: { + __ strb(src->as_register(), as_Address(to_addr)); + break; + } + + case T_CHAR: // fall through + case T_SHORT: + __ strh(src->as_register(), as_Address(to_addr)); + break; + + default: + ShouldNotReachHere(); + } + if (info != NULL) { + add_debug_info_for_null_check(null_check_here, info); + } +} + + +void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { + assert(src->is_stack(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + if (dest->is_single_cpu()) { + if (type == T_ARRAY || type == T_OBJECT) { + __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); + __ verify_oop(dest->as_register()); + } else if (type == T_METADATA) { + __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); + } else { + __ ldrw(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); + } + + } else if (dest->is_double_cpu()) { + Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes); + __ ldr(dest->as_register_lo(), src_addr_LO); + + } else if (dest->is_single_fpu()) { + Address src_addr = frame_map()->address_for_slot(src->single_stack_ix()); + __ ldrs(dest->as_float_reg(), src_addr); + + } else if (dest->is_double_fpu()) { + Address src_addr = frame_map()->address_for_slot(src->double_stack_ix()); + __ ldrd(dest->as_double_reg(), src_addr); + + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { + address target = NULL; + relocInfo::relocType reloc_type = relocInfo::none; + + switch (patching_id(info)) { + case PatchingStub::access_field_id: + target = Runtime1::entry_for(Runtime1::access_field_patching_id); + reloc_type = relocInfo::section_word_type; + break; + case PatchingStub::load_klass_id: + target = Runtime1::entry_for(Runtime1::load_klass_patching_id); + reloc_type = relocInfo::metadata_type; + break; + case PatchingStub::load_mirror_id: + target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); + reloc_type = relocInfo::oop_type; + break; + case PatchingStub::load_appendix_id: + target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); + reloc_type = relocInfo::oop_type; + break; + default: ShouldNotReachHere(); + } + + __ far_call(RuntimeAddress(target)); + add_call_info_here(info); +} + +void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + + LIR_Opr temp; + if (type == T_LONG || type == T_DOUBLE) + temp = FrameMap::rscratch1_long_opr; + else + temp = FrameMap::rscratch1_opr; + + stack2reg(src, temp, src->type()); + reg2stack(temp, dest, dest->type(), false); +} + + +void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { + LIR_Address* addr = src->as_address_ptr(); + LIR_Address* from_addr = src->as_address_ptr(); + + if (addr->base()->type() == T_OBJECT) { + __ verify_oop(addr->base()->as_pointer_register()); + } + + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + if (info != NULL) { + add_debug_info_for_null_check_here(info); + } + int null_check_here = code_offset(); + switch (type) { + case T_FLOAT: { + __ ldrs(dest->as_float_reg(), as_Address(from_addr)); + break; + } + + case T_DOUBLE: { + __ ldrd(dest->as_double_reg(), as_Address(from_addr)); + break; + } + + case T_ARRAY: // fall through + case T_OBJECT: // fall through + if (UseCompressedOops && !wide) { + __ ldrw(dest->as_register(), as_Address(from_addr)); + } else { + __ ldr(dest->as_register(), as_Address(from_addr)); + } + break; + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be a compressed klass + // ptr or a 64 bit method pointer. + LP64_ONLY(ShouldNotReachHere()); + __ ldr(dest->as_register(), as_Address(from_addr)); + break; + case T_ADDRESS: + // FIXME: OMG this is a horrible kludge. Any offset from an + // address that matches klass_offset_in_bytes() will be loaded + // as a word, not a long. + if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { + __ ldrw(dest->as_register(), as_Address(from_addr)); + } else { + __ ldr(dest->as_register(), as_Address(from_addr)); + } + break; + case T_INT: + __ ldrw(dest->as_register(), as_Address(from_addr)); + break; + + case T_LONG: { + __ ldr(dest->as_register_lo(), as_Address_lo(from_addr)); + break; + } + + case T_BYTE: + __ ldrsb(dest->as_register(), as_Address(from_addr)); + break; + case T_BOOLEAN: { + __ ldrb(dest->as_register(), as_Address(from_addr)); + break; + } + + case T_CHAR: + __ ldrh(dest->as_register(), as_Address(from_addr)); + break; + case T_SHORT: + __ ldrsh(dest->as_register(), as_Address(from_addr)); + break; + + default: + ShouldNotReachHere(); + } + + if (type == T_ARRAY || type == T_OBJECT) { +#ifdef _LP64 + if (UseCompressedOops && !wide) { + __ decode_heap_oop(dest->as_register()); + } +#endif + __ verify_oop(dest->as_register()); + } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { +#ifdef _LP64 + if (UseCompressedClassPointers) { + __ decode_klass_not_null(dest->as_register()); + } +#endif + } +} + + +void LIR_Assembler::prefetchr(LIR_Opr src) { Unimplemented(); } + + +void LIR_Assembler::prefetchw(LIR_Opr src) { Unimplemented(); } + + +int LIR_Assembler::array_element_size(BasicType type) const { + int elem_size = type2aelembytes(type); + return exact_log2(elem_size); +} + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + Register Rdividend = op->in_opr1()->as_register(); + Register Rdivisor = op->in_opr2()->as_register(); + Register Rscratch = op->in_opr3()->as_register(); + Register Rresult = op->result_opr()->as_register(); + int divisor = -1; + + /* + TODO: For some reason, using the Rscratch that gets passed in is + not possible because the register allocator does not see the tmp reg + as used, and assignes it the same register as Rdividend. We use rscratch1 + instead. + + assert(Rdividend != Rscratch, ""); + assert(Rdivisor != Rscratch, ""); + */ + + if (Rdivisor == noreg && is_power_of_2(divisor)) { + // convert division by a power of two into some shifts and logical operations + } + + if (op->code() == lir_irem) { + __ corrected_idivl(Rresult, Rdividend, Rdivisor, true, rscratch1); + } else if (op->code() == lir_idiv) { + __ corrected_idivl(Rresult, Rdividend, Rdivisor, false, rscratch1); + } else + ShouldNotReachHere(); +} + +void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { +#ifdef ASSERT + assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); + if (op->block() != NULL) _branch_target_blocks.append(op->block()); + if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock()); +#endif + + if (op->cond() == lir_cond_always) { + if (op->info() != NULL) add_debug_info_for_branch(op->info()); + __ b(*(op->label())); + } else { + Assembler::Condition acond; + if (op->code() == lir_cond_float_branch) { + bool is_unordered = (op->ublock() == op->block()); + // Assembler::EQ does not permit unordered branches, so we add + // another branch here. Likewise, Assembler::NE does not permit + // ordered branches. + if ((is_unordered && op->cond() == lir_cond_equal) + || (!is_unordered && op->cond() == lir_cond_notEqual)) + __ br(Assembler::VS, *(op->ublock()->label())); + switch(op->cond()) { + case lir_cond_equal: acond = Assembler::EQ; break; + case lir_cond_notEqual: acond = Assembler::NE; break; + case lir_cond_less: acond = (is_unordered ? Assembler::LT : Assembler::LO); break; + case lir_cond_lessEqual: acond = (is_unordered ? Assembler::LE : Assembler::LS); break; + case lir_cond_greaterEqual: acond = (is_unordered ? Assembler::HS : Assembler::GE); break; + case lir_cond_greater: acond = (is_unordered ? Assembler::HI : Assembler::GT); break; + default: ShouldNotReachHere(); + } + } else { + switch (op->cond()) { + case lir_cond_equal: acond = Assembler::EQ; break; + case lir_cond_notEqual: acond = Assembler::NE; break; + case lir_cond_less: acond = Assembler::LT; break; + case lir_cond_lessEqual: acond = Assembler::LE; break; + case lir_cond_greaterEqual: acond = Assembler::GE; break; + case lir_cond_greater: acond = Assembler::GT; break; + case lir_cond_belowEqual: acond = Assembler::LS; break; + case lir_cond_aboveEqual: acond = Assembler::HS; break; + default: ShouldNotReachHere(); + } + } + __ br(acond,*(op->label())); + } +} + + + +void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); + + switch (op->bytecode()) { + case Bytecodes::_i2f: + { + __ scvtfws(dest->as_float_reg(), src->as_register()); + break; + } + case Bytecodes::_i2d: + { + __ scvtfwd(dest->as_double_reg(), src->as_register()); + break; + } + case Bytecodes::_l2d: + { + __ scvtfd(dest->as_double_reg(), src->as_register_lo()); + break; + } + case Bytecodes::_l2f: + { + __ scvtfs(dest->as_float_reg(), src->as_register_lo()); + break; + } + case Bytecodes::_f2d: + { + __ fcvts(dest->as_double_reg(), src->as_float_reg()); + break; + } + case Bytecodes::_d2f: + { + __ fcvtd(dest->as_float_reg(), src->as_double_reg()); + break; + } + case Bytecodes::_i2c: + { + __ ubfx(dest->as_register(), src->as_register(), 0, 16); + break; + } + case Bytecodes::_i2l: + { + __ sxtw(dest->as_register_lo(), src->as_register()); + break; + } + case Bytecodes::_i2s: + { + __ sxth(dest->as_register(), src->as_register()); + break; + } + case Bytecodes::_i2b: + { + __ sxtb(dest->as_register(), src->as_register()); + break; + } + case Bytecodes::_l2i: + { + _masm->block_comment("FIXME: This could be a no-op"); + __ uxtw(dest->as_register(), src->as_register_lo()); + break; + } + case Bytecodes::_d2l: + { + Register tmp = op->tmp1()->as_register(); + __ clear_fpsr(); + __ fcvtzd(dest->as_register_lo(), src->as_double_reg()); + __ get_fpsr(tmp); + __ tst(tmp, 1); // FPSCR.IOC + __ br(Assembler::NE, *(op->stub()->entry())); + __ bind(*op->stub()->continuation()); + break; + } + case Bytecodes::_f2i: + { + Register tmp = op->tmp1()->as_register(); + __ clear_fpsr(); + __ fcvtzsw(dest->as_register(), src->as_float_reg()); + __ get_fpsr(tmp); + __ tst(tmp, 1); // FPSCR.IOC + __ br(Assembler::NE, *(op->stub()->entry())); + __ bind(*op->stub()->continuation()); + break; + } + case Bytecodes::_f2l: + { + Register tmp = op->tmp1()->as_register(); + __ clear_fpsr(); + __ fcvtzs(dest->as_register_lo(), src->as_float_reg()); + __ get_fpsr(tmp); + __ tst(tmp, 1); // FPSCR.IOC + __ br(Assembler::NE, *(op->stub()->entry())); + __ bind(*op->stub()->continuation()); + break; + } + case Bytecodes::_d2i: + { + Register tmp = op->tmp1()->as_register(); + __ clear_fpsr(); + __ fcvtzdw(dest->as_register(), src->as_double_reg()); + __ get_fpsr(tmp); + __ tst(tmp, 1); // FPSCR.IOC + __ br(Assembler::NE, *(op->stub()->entry())); + __ bind(*op->stub()->continuation()); + break; + } + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { + if (op->init_check()) { + __ ldrb(rscratch1, Address(op->klass()->as_register(), + InstanceKlass::init_state_offset())); + __ cmpw(rscratch1, InstanceKlass::fully_initialized); + add_debug_info_for_null_check_here(op->stub()->info()); + __ br(Assembler::NE, *op->stub()->entry()); + } + __ allocate_object(op->obj()->as_register(), + op->tmp1()->as_register(), + op->tmp2()->as_register(), + op->header_size(), + op->object_size(), + op->klass()->as_register(), + *op->stub()->entry()); + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { + Register len = op->len()->as_register(); + __ uxtw(len, len); + + if (UseSlowPath || + (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) || + (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) { + __ b(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + Register tmp3 = op->tmp3()->as_register(); + if (len == tmp1) { + tmp1 = tmp3; + } else if (len == tmp2) { + tmp2 = tmp3; + } else if (len == tmp3) { + // everything is ok + } else { + __ mov(tmp3, len); + } + __ allocate_array(op->obj()->as_register(), + len, + tmp1, + tmp2, + arrayOopDesc::header_size(op->type()), + array_element_size(op->type()), + op->klass()->as_register(), + *op->stub()->entry()); + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::type_profile_helper(Register mdo, + ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done) { + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + // See if the receiver is receiver[n]. + __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + __ ldr(rscratch1, Address(rscratch2)); + __ cmp(recv, rscratch1); + __ br(Assembler::NE, next_test); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment); + __ b(*update_done); + __ bind(next_test); + } + + // Didn't find receiver; find next empty slot and fill it in + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + __ lea(rscratch2, + Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + Address recv_addr(rscratch2); + __ ldr(rscratch1, recv_addr); + __ cbnz(rscratch1, next_test); + __ str(recv, recv_addr); + __ mov(rscratch1, DataLayout::counter_increment); + __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); + __ str(rscratch1, Address(rscratch2)); + __ b(*update_done); + __ bind(next_test); + } +} + +void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) { + // we always need a stub for the failure case. + CodeStub* stub = op->stub(); + Register obj = op->object()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register dst = op->result_opr()->as_register(); + ciKlass* k = op->klass(); + Register Rtmp1 = noreg; + + // check if it needs to be profiled + ciMethodData* md; + ciProfileData* data; + + if (op->should_profile()) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for type check"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + } + Label profile_cast_success, profile_cast_failure; + Label *success_target = op->should_profile() ? &profile_cast_success : success; + Label *failure_target = op->should_profile() ? &profile_cast_failure : failure; + + if (obj == k_RInfo) { + k_RInfo = dst; + } else if (obj == klass_RInfo) { + klass_RInfo = dst; + } + if (k->is_loaded() && !UseCompressedClassPointers) { + select_different_registers(obj, dst, k_RInfo, klass_RInfo); + } else { + Rtmp1 = op->tmp3()->as_register(); + select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1); + } + + assert_different_registers(obj, k_RInfo, klass_RInfo); + + if (op->should_profile()) { + Label not_null; + __ cbnz(obj, not_null); + // Object is null; update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Address data_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, DataLayout::DataLayout::header_offset()), + LogBytesPerWord); + int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant()); + __ ldr(rscratch1, data_addr); + __ orr(rscratch1, rscratch1, header_bits); + __ str(rscratch1, data_addr); + __ b(*obj_is_null); + __ bind(not_null); + } else { + __ cbz(obj, *obj_is_null); + } + + if (!k->is_loaded()) { + klass2reg_with_patching(k_RInfo, op->info_for_patch()); + } else { +#ifdef _LP64 + __ mov_metadata(k_RInfo, k->constant_encoding()); +#endif // _LP64 + } + __ verify_oop(obj); + + if (op->fast_check()) { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(rscratch1, obj); + __ cmp( rscratch1, k_RInfo); + + __ br(Assembler::NE, *failure_target); + // successful cast, fall through to profile or jump + } else { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(klass_RInfo, obj); + if (k->is_loaded()) { + // See if we get an immediate positive hit + __ ldr(rscratch1, Address(klass_RInfo, long(k->super_check_offset()))); + __ cmp(k_RInfo, rscratch1); + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { + __ br(Assembler::NE, *failure_target); + // successful cast, fall through to profile or jump + } else { + // See if we get an immediate positive hit + __ br(Assembler::EQ, *success_target); + // check for self + __ cmp(klass_RInfo, k_RInfo); + __ br(Assembler::EQ, *success_target); + + __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ ldr(klass_RInfo, Address(__ post(sp, 2 * wordSize))); + // result is a boolean + __ cbzw(klass_RInfo, *failure_target); + // successful cast, fall through to profile or jump + } + } else { + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize))); + // result is a boolean + __ cbz(k_RInfo, *failure_target); + // successful cast, fall through to profile or jump + } + } + if (op->should_profile()) { + Register mdo = klass_RInfo, recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, obj); + Label update_done; + type_profile_helper(mdo, md, data, recv, success); + __ b(*success); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, CounterData::count_offset()), + LogBytesPerWord); + __ ldr(rscratch1, counter_addr); + __ sub(rscratch1, rscratch1, DataLayout::counter_increment); + __ str(rscratch1, counter_addr); + __ b(*failure); + } + __ b(*success); +} + + +void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { + LIR_Code code = op->code(); + if (code == lir_store_check) { + Register value = op->object()->as_register(); + Register array = op->array()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register Rtmp1 = op->tmp3()->as_register(); + + CodeStub* stub = op->stub(); + + // check if it needs to be profiled + ciMethodData* md; + ciProfileData* data; + + if (op->should_profile()) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for type check"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + } + Label profile_cast_success, profile_cast_failure, done; + Label *success_target = op->should_profile() ? &profile_cast_success : &done; + Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry(); + + if (op->should_profile()) { + Label not_null; + __ cbnz(value, not_null); + // Object is null; update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Address data_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, DataLayout::header_offset()), + LogBytesPerInt); + int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant()); + __ ldrw(rscratch1, data_addr); + __ orrw(rscratch1, rscratch1, header_bits); + __ strw(rscratch1, data_addr); + __ b(done); + __ bind(not_null); + } else { + __ cbz(value, done); + } + + add_debug_info_for_null_check_here(op->info_for_exception()); + __ load_klass(k_RInfo, array); + __ load_klass(klass_RInfo, value); + + // get instance klass (it's already uncompressed) + __ ldr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize))); + // result is a boolean + __ cbzw(k_RInfo, *failure_target); + // fall through to the success case + + if (op->should_profile()) { + Register mdo = klass_RInfo, recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, value); + Label update_done; + type_profile_helper(mdo, md, data, recv, &done); + __ b(done); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ lea(rscratch2, counter_addr); + __ ldr(rscratch1, Address(rscratch2)); + __ sub(rscratch1, rscratch1, DataLayout::counter_increment); + __ str(rscratch1, Address(rscratch2)); + __ b(*stub->entry()); + } + + __ bind(done); + } else if (code == lir_checkcast) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success; + emit_typecheck_helper(op, &success, op->stub()->entry(), &success); + __ bind(success); + if (dst != obj) { + __ mov(dst, obj); + } + } else if (code == lir_instanceof) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success, failure, done; + emit_typecheck_helper(op, &success, &failure, &failure); + __ bind(failure); + __ mov(dst, zr); + __ b(done); + __ bind(success); + __ mov(dst, 1); + __ bind(done); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::word, /* acquire*/ true, /* release*/ true, rscratch1); + __ cset(rscratch1, Assembler::NE); + __ membar(__ AnyAny); +} + +void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::xword, /* acquire*/ true, /* release*/ true, rscratch1); + __ cset(rscratch1, Assembler::NE); + __ membar(__ AnyAny); +} + + +void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + assert(VM_Version::supports_cx8(), "wrong machine"); + Register addr = as_reg(op->addr()); + Register newval = as_reg(op->new_value()); + Register cmpval = as_reg(op->cmp_value()); + Label succeed, fail, around; + + if (op->code() == lir_cas_obj) { + if (UseCompressedOops) { + Register t1 = op->tmp1()->as_register(); + assert(op->tmp1()->is_valid(), "must be"); + __ encode_heap_oop(t1, cmpval); + cmpval = t1; + __ encode_heap_oop(rscratch2, newval); + newval = rscratch2; + casw(addr, newval, cmpval); + } else { + casl(addr, newval, cmpval); + } + } else if (op->code() == lir_cas_int) { + casw(addr, newval, cmpval); + } else { + casl(addr, newval, cmpval); + } +} + + +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { + + Assembler::Condition acond, ncond; + switch (condition) { + case lir_cond_equal: acond = Assembler::EQ; ncond = Assembler::NE; break; + case lir_cond_notEqual: acond = Assembler::NE; ncond = Assembler::EQ; break; + case lir_cond_less: acond = Assembler::LT; ncond = Assembler::GE; break; + case lir_cond_lessEqual: acond = Assembler::LE; ncond = Assembler::GT; break; + case lir_cond_greaterEqual: acond = Assembler::GE; ncond = Assembler::LT; break; + case lir_cond_greater: acond = Assembler::GT; ncond = Assembler::LE; break; + case lir_cond_belowEqual: Unimplemented(); break; + case lir_cond_aboveEqual: Unimplemented(); break; + default: ShouldNotReachHere(); + } + + assert(result->is_single_cpu() || result->is_double_cpu(), + "expect single register for result"); + if (opr1->is_constant() && opr2->is_constant() + && opr1->type() == T_INT && opr2->type() == T_INT) { + jint val1 = opr1->as_jint(); + jint val2 = opr2->as_jint(); + if (val1 == 0 && val2 == 1) { + __ cset(result->as_register(), ncond); + return; + } else if (val1 == 1 && val2 == 0) { + __ cset(result->as_register(), acond); + return; + } + } + + if (opr1->is_constant() && opr2->is_constant() + && opr1->type() == T_LONG && opr2->type() == T_LONG) { + jlong val1 = opr1->as_jlong(); + jlong val2 = opr2->as_jlong(); + if (val1 == 0 && val2 == 1) { + __ cset(result->as_register_lo(), ncond); + return; + } else if (val1 == 1 && val2 == 0) { + __ cset(result->as_register_lo(), acond); + return; + } + } + + if (opr1->is_stack()) { + stack2reg(opr1, FrameMap::rscratch1_opr, result->type()); + opr1 = FrameMap::rscratch1_opr; + } else if (opr1->is_constant()) { + LIR_Opr tmp + = opr1->type() == T_LONG ? FrameMap::rscratch1_long_opr : FrameMap::rscratch1_opr; + const2reg(opr1, tmp, lir_patch_none, NULL); + opr1 = tmp; + } + + if (opr2->is_stack()) { + stack2reg(opr2, FrameMap::rscratch2_opr, result->type()); + opr2 = FrameMap::rscratch2_opr; + } else if (opr2->is_constant()) { + LIR_Opr tmp + = opr2->type() == T_LONG ? FrameMap::rscratch2_long_opr : FrameMap::rscratch2_opr; + const2reg(opr2, tmp, lir_patch_none, NULL); + opr2 = tmp; + } + + if (result->type() == T_LONG) + __ csel(result->as_register_lo(), opr1->as_register_lo(), opr2->as_register_lo(), acond); + else + __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond); +} + +void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); + + if (left->is_single_cpu()) { + Register lreg = left->as_register(); + Register dreg = as_reg(dest); + + if (right->is_single_cpu()) { + // cpu register - cpu register + + assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, + "should be"); + Register rreg = right->as_register(); + switch (code) { + case lir_add: __ addw (dest->as_register(), lreg, rreg); break; + case lir_sub: __ subw (dest->as_register(), lreg, rreg); break; + case lir_mul: __ mulw (dest->as_register(), lreg, rreg); break; + default: ShouldNotReachHere(); + } + + } else if (right->is_double_cpu()) { + Register rreg = right->as_register_lo(); + // single_cpu + double_cpu: can happen with obj+long + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + switch (code) { + case lir_add: __ add(dreg, lreg, rreg); break; + case lir_sub: __ sub(dreg, lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else if (right->is_constant()) { + // cpu register - constant + jlong c; + + // FIXME. This is fugly: we really need to factor all this logic. + switch(right->type()) { + case T_LONG: + c = right->as_constant_ptr()->as_jlong(); + break; + case T_INT: + case T_ADDRESS: + c = right->as_constant_ptr()->as_jint(); + break; + default: + ShouldNotReachHere(); + break; + } + + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + if (c == 0 && dreg == lreg) { + COMMENT("effective nop elided"); + return; + } + switch(left->type()) { + case T_INT: + switch (code) { + case lir_add: __ addw(dreg, lreg, c); break; + case lir_sub: __ subw(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } + break; + case T_OBJECT: + case T_ADDRESS: + switch (code) { + case lir_add: __ add(dreg, lreg, c); break; + case lir_sub: __ sub(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } + break; + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } + + } else if (left->is_double_cpu()) { + Register lreg_lo = left->as_register_lo(); + + if (right->is_double_cpu()) { + // cpu register - cpu register + Register rreg_lo = right->as_register_lo(); + switch (code) { + case lir_add: __ add (dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_sub: __ sub (dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_mul: __ mul (dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_div: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, false, rscratch1); break; + case lir_rem: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, true, rscratch1); break; + default: + ShouldNotReachHere(); + } + + } else if (right->is_constant()) { + jlong c = right->as_constant_ptr()->as_jlong_bits(); + Register dreg = as_reg(dest); + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + if (c == 0 && dreg == lreg_lo) { + COMMENT("effective nop elided"); + return; + } + switch (code) { + case lir_add: __ add(dreg, lreg_lo, c); break; + case lir_sub: __ sub(dreg, lreg_lo, c); break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } + } else if (left->is_single_fpu()) { + assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register"); + switch (code) { + case lir_add: __ fadds (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ fsubs (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_mul_strictfp: // fall through + case lir_mul: __ fmuls (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_div_strictfp: // fall through + case lir_div: __ fdivs (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: + ShouldNotReachHere(); + } + } else if (left->is_double_fpu()) { + if (right->is_double_fpu()) { + // fpu register - fpu register + switch (code) { + case lir_add: __ faddd (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ fsubd (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_mul_strictfp: // fall through + case lir_mul: __ fmuld (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_div_strictfp: // fall through + case lir_div: __ fdivd (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: + ShouldNotReachHere(); + } + } else { + if (right->is_constant()) { + ShouldNotReachHere(); + } + ShouldNotReachHere(); + } + } else if (left->is_single_stack() || left->is_address()) { + assert(left == dest, "left and dest must be equal"); + ShouldNotReachHere(); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) { Unimplemented(); } + + +void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { + switch(code) { + case lir_abs : __ fabsd(dest->as_double_reg(), value->as_double_reg()); break; + case lir_sqrt: __ fsqrtd(dest->as_double_reg(), value->as_double_reg()); break; + default : ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { + + assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register"); + Register Rleft = left->is_single_cpu() ? left->as_register() : + left->as_register_lo(); + if (dst->is_single_cpu()) { + Register Rdst = dst->as_register(); + if (right->is_constant()) { + switch (code) { + case lir_logic_and: __ andw (Rdst, Rleft, right->as_jint()); break; + case lir_logic_or: __ orrw (Rdst, Rleft, right->as_jint()); break; + case lir_logic_xor: __ eorw (Rdst, Rleft, right->as_jint()); break; + default: ShouldNotReachHere(); break; + } + } else { + Register Rright = right->is_single_cpu() ? right->as_register() : + right->as_register_lo(); + switch (code) { + case lir_logic_and: __ andw (Rdst, Rleft, Rright); break; + case lir_logic_or: __ orrw (Rdst, Rleft, Rright); break; + case lir_logic_xor: __ eorw (Rdst, Rleft, Rright); break; + default: ShouldNotReachHere(); break; + } + } + } else { + Register Rdst = dst->as_register_lo(); + if (right->is_constant()) { + switch (code) { + case lir_logic_and: __ andr (Rdst, Rleft, right->as_jlong()); break; + case lir_logic_or: __ orr (Rdst, Rleft, right->as_jlong()); break; + case lir_logic_xor: __ eor (Rdst, Rleft, right->as_jlong()); break; + default: ShouldNotReachHere(); break; + } + } else { + Register Rright = right->is_single_cpu() ? right->as_register() : + right->as_register_lo(); + switch (code) { + case lir_logic_and: __ andr (Rdst, Rleft, Rright); break; + case lir_logic_or: __ orr (Rdst, Rleft, Rright); break; + case lir_logic_xor: __ eor (Rdst, Rleft, Rright); break; + default: ShouldNotReachHere(); break; + } + } + } +} + + + +void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) { Unimplemented(); } + + +void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { + if (opr1->is_constant() && opr2->is_single_cpu()) { + // tableswitch + Register reg = as_reg(opr2); + struct tableswitch &table = switches[opr1->as_constant_ptr()->as_jint()]; + __ tableswitch(reg, table._first_key, table._last_key, table._branches, table._after); + } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) { + Register reg1 = as_reg(opr1); + if (opr2->is_single_cpu()) { + // cpu register - cpu register + Register reg2 = opr2->as_register(); + if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) { + __ cmp(reg1, reg2); + } else { + assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?"); + __ cmpw(reg1, reg2); + } + return; + } + if (opr2->is_double_cpu()) { + // cpu register - cpu register + Register reg2 = opr2->as_register_lo(); + __ cmp(reg1, reg2); + return; + } + + if (opr2->is_constant()) { + bool is_32bit = false; // width of register operand + jlong imm; + + switch(opr2->type()) { + case T_INT: + imm = opr2->as_constant_ptr()->as_jint(); + is_32bit = true; + break; + case T_LONG: + imm = opr2->as_constant_ptr()->as_jlong(); + break; + case T_ADDRESS: + imm = opr2->as_constant_ptr()->as_jint(); + break; + case T_OBJECT: + case T_ARRAY: + imm = jlong(opr2->as_constant_ptr()->as_jobject()); + break; + default: + ShouldNotReachHere(); + break; + } + + if (Assembler::operand_valid_for_add_sub_immediate(imm)) { + if (is_32bit) + __ cmpw(reg1, imm); + else + __ cmp(reg1, imm); + return; + } else { + __ mov(rscratch1, imm); + if (is_32bit) + __ cmpw(reg1, rscratch1); + else + __ cmp(reg1, rscratch1); + return; + } + } else + ShouldNotReachHere(); + } else if (opr1->is_single_fpu()) { + FloatRegister reg1 = opr1->as_float_reg(); + assert(opr2->is_single_fpu(), "expect single float register"); + FloatRegister reg2 = opr2->as_float_reg(); + __ fcmps(reg1, reg2); + } else if (opr1->is_double_fpu()) { + FloatRegister reg1 = opr1->as_double_reg(); + assert(opr2->is_double_fpu(), "expect double float register"); + FloatRegister reg2 = opr2->as_double_reg(); + __ fcmpd(reg1, reg2); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){ + if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { + bool is_unordered_less = (code == lir_ucmp_fd2i); + if (left->is_single_fpu()) { + __ float_cmp(true, is_unordered_less ? -1 : 1, left->as_float_reg(), right->as_float_reg(), dst->as_register()); + } else if (left->is_double_fpu()) { + __ float_cmp(false, is_unordered_less ? -1 : 1, left->as_double_reg(), right->as_double_reg(), dst->as_register()); + } else { + ShouldNotReachHere(); + } + } else if (code == lir_cmp_l2i) { + Label done; + __ cmp(left->as_register_lo(), right->as_register_lo()); + __ mov(dst->as_register(), (u_int64_t)-1L); + __ br(Assembler::LT, done); + __ csinc(dst->as_register(), zr, zr, Assembler::EQ); + __ bind(done); + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::align_call(LIR_Code code) { } + + +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + address call = __ trampoline_call(Address(op->addr(), rtype)); + if (call == NULL) { + bailout("trampoline stub overflow"); + return; + } + add_call_info(code_offset(), op->info()); +} + + +void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { + address call = __ ic_call(op->addr()); + if (call == NULL) { + bailout("trampoline stub overflow"); + return; + } + add_call_info(code_offset(), op->info()); +} + + +/* Currently, vtable-dispatch is only enabled for sparc platforms */ +void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { + ShouldNotReachHere(); +} + + +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); + address stub = __ start_a_stub(call_stub_size); + if (stub == NULL) { + bailout("static call stub overflow"); + return; + } + + int start = __ offset(); + + __ relocate(static_stub_Relocation::spec(call_pc)); + __ mov_metadata(rmethod, (Metadata*)NULL); + __ movptr(rscratch1, 0); + __ br(rscratch1); + + assert(__ offset() - start <= call_stub_size, "stub too big"); + __ end_a_stub(); +} + + +void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { + assert(exceptionOop->as_register() == r0, "must match"); + assert(exceptionPC->as_register() == r3, "must match"); + + // exception object is not added to oop map by LinearScan + // (LinearScan assumes that no oops are in fixed registers) + info->add_register_oop(exceptionOop); + Runtime1::StubID unwind_id; + + // get current pc information + // pc is only needed if the method has an exception handler, the unwind code does not need it. + int pc_for_athrow_offset = __ offset(); + InternalAddress pc_for_athrow(__ pc()); + __ adr(exceptionPC->as_register(), pc_for_athrow); + add_call_info(pc_for_athrow_offset, info); // for exception handler + + __ verify_not_null_oop(r0); + // search an exception handler (r0: exception oop, r3: throwing pc) + if (compilation()->has_fpu_code()) { + unwind_id = Runtime1::handle_exception_id; + } else { + unwind_id = Runtime1::handle_exception_nofpu_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id))); + + // FIXME: enough room for two byte trap ???? + __ nop(); +} + + +void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { + assert(exceptionOop->as_register() == r0, "must match"); + + __ b(_unwind_handler_entry); +} + + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { + Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + + switch (left->type()) { + case T_INT: { + switch (code) { + case lir_shl: __ lslvw (dreg, lreg, count->as_register()); break; + case lir_shr: __ asrvw (dreg, lreg, count->as_register()); break; + case lir_ushr: __ lsrvw (dreg, lreg, count->as_register()); break; + default: + ShouldNotReachHere(); + break; + } + break; + case T_LONG: + case T_ADDRESS: + case T_OBJECT: + switch (code) { + case lir_shl: __ lslv (dreg, lreg, count->as_register()); break; + case lir_shr: __ asrv (dreg, lreg, count->as_register()); break; + case lir_ushr: __ lsrv (dreg, lreg, count->as_register()); break; + default: + ShouldNotReachHere(); + break; + } + break; + default: + ShouldNotReachHere(); + break; + } + } +} + + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { + Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + + switch (left->type()) { + case T_INT: { + switch (code) { + case lir_shl: __ lslw (dreg, lreg, count); break; + case lir_shr: __ asrw (dreg, lreg, count); break; + case lir_ushr: __ lsrw (dreg, lreg, count); break; + default: + ShouldNotReachHere(); + break; + } + break; + case T_LONG: + case T_ADDRESS: + case T_OBJECT: + switch (code) { + case lir_shl: __ lsl (dreg, lreg, count); break; + case lir_shr: __ asr (dreg, lreg, count); break; + case lir_ushr: __ lsr (dreg, lreg, count); break; + default: + ShouldNotReachHere(); + break; + } + break; + default: + ShouldNotReachHere(); + break; + } + } +} + + +void LIR_Assembler::store_parameter(Register r, int offset_from_rsp_in_words) { + assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); + int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; + assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ str (r, Address(sp, offset_from_rsp_in_bytes)); +} + + +void LIR_Assembler::store_parameter(jint c, int offset_from_rsp_in_words) { + assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); + int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; + assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ mov (rscratch1, c); + __ str (rscratch1, Address(sp, offset_from_rsp_in_bytes)); +} + + +void LIR_Assembler::store_parameter(jobject o, int offset_from_rsp_in_words) { + ShouldNotReachHere(); + assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); + int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; + assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ lea(rscratch1, __ constant_oop_address(o)); + __ str(rscratch1, Address(sp, offset_from_rsp_in_bytes)); +} + + +// This code replaces a call to arraycopy; no exception may +// be thrown in this code, they must be thrown in the System.arraycopy +// activation frame; we could save some checks if this would not be the case +void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { + ciArrayKlass* default_type = op->expected_type(); + Register src = op->src()->as_register(); + Register dst = op->dst()->as_register(); + Register src_pos = op->src_pos()->as_register(); + Register dst_pos = op->dst_pos()->as_register(); + Register length = op->length()->as_register(); + Register tmp = op->tmp()->as_register(); + + CodeStub* stub = op->stub(); + int flags = op->flags(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; + if (basic_type == T_ARRAY) basic_type = T_OBJECT; + + // if we don't know anything, just go through the generic arraycopy + if (default_type == NULL // || basic_type == T_OBJECT + ) { + Label done; + assert(src == r1 && src_pos == r2, "mismatch in calling convention"); + + // Save the arguments in case the generic arraycopy fails and we + // have to fall back to the JNI stub + __ stp(dst, dst_pos, Address(sp, 0*BytesPerWord)); + __ stp(length, src_pos, Address(sp, 2*BytesPerWord)); + __ str(src, Address(sp, 4*BytesPerWord)); + + address C_entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy); + address copyfunc_addr = StubRoutines::generic_arraycopy(); + + // The arguments are in java calling convention so we shift them + // to C convention + assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4); + __ mov(c_rarg0, j_rarg0); + assert_different_registers(c_rarg1, j_rarg2, j_rarg3, j_rarg4); + __ mov(c_rarg1, j_rarg1); + assert_different_registers(c_rarg2, j_rarg3, j_rarg4); + __ mov(c_rarg2, j_rarg2); + assert_different_registers(c_rarg3, j_rarg4); + __ mov(c_rarg3, j_rarg3); + __ mov(c_rarg4, j_rarg4); + if (copyfunc_addr == NULL) { // Use C version if stub was not generated + __ mov(rscratch1, RuntimeAddress(C_entry)); + __ blr(rscratch1); + } else { +#ifndef PRODUCT + if (PrintC1Statistics) { + __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt)); + } +#endif + __ far_call(RuntimeAddress(copyfunc_addr)); + } + + __ cbz(r0, *stub->continuation()); + + // Reload values from the stack so they are where the stub + // expects them. + __ ldp(dst, dst_pos, Address(sp, 0*BytesPerWord)); + __ ldp(length, src_pos, Address(sp, 2*BytesPerWord)); + __ ldr(src, Address(sp, 4*BytesPerWord)); + + if (copyfunc_addr != NULL) { + // r0 is -1^K where K == partial copied count + __ eonw(rscratch1, r0, zr); + // adjust length down and src/end pos up by partial copied count + __ subw(length, length, rscratch1); + __ addw(src_pos, src_pos, rscratch1); + __ addw(dst_pos, dst_pos, rscratch1); + } + __ b(*stub->entry()); + + __ bind(*stub->continuation()); + return; + } + + assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point"); + + int elem_size = type2aelembytes(basic_type); + int shift_amount; + int scale = exact_log2(elem_size); + + Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes()); + Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes()); + Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes()); + Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes()); + + // test for NULL + if (flags & LIR_OpArrayCopy::src_null_check) { + __ cbz(src, *stub->entry()); + } + if (flags & LIR_OpArrayCopy::dst_null_check) { + __ cbz(dst, *stub->entry()); + } + + // If the compiler was not able to prove that exact type of the source or the destination + // of the arraycopy is an array type, check at runtime if the source or the destination is + // an instance type. + if (flags & LIR_OpArrayCopy::type_check) { + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + __ ldrw(rscratch1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ cmpw(rscratch1, Klass::_lh_neutral_value); + __ br(Assembler::GE, *stub->entry()); + } + + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + __ ldrw(rscratch1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ cmpw(rscratch1, Klass::_lh_neutral_value); + __ br(Assembler::GE, *stub->entry()); + } + } + + // check if negative + if (flags & LIR_OpArrayCopy::src_pos_positive_check) { + __ cmpw(src_pos, 0); + __ br(Assembler::LT, *stub->entry()); + } + if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { + __ cmpw(dst_pos, 0); + __ br(Assembler::LT, *stub->entry()); + } + + if (flags & LIR_OpArrayCopy::length_positive_check) { + __ cmpw(length, 0); + __ br(Assembler::LT, *stub->entry()); + } + + if (flags & LIR_OpArrayCopy::src_range_check) { + __ addw(tmp, src_pos, length); + __ ldrw(rscratch1, src_length_addr); + __ cmpw(tmp, rscratch1); + __ br(Assembler::HI, *stub->entry()); + } + if (flags & LIR_OpArrayCopy::dst_range_check) { + __ addw(tmp, dst_pos, length); + __ ldrw(rscratch1, dst_length_addr); + __ cmpw(tmp, rscratch1); + __ br(Assembler::HI, *stub->entry()); + } + + // FIXME: The logic in LIRGenerator::arraycopy_helper clears + // length_positive_check if the source of our length operand is an + // arraylength. However, that arraylength might be zero, and the + // stub that we're about to call contains an assertion that count != + // 0 . So we make this check purely in order not to trigger an + // assertion failure. + __ cbzw(length, *stub->continuation()); + + if (flags & LIR_OpArrayCopy::type_check) { + // We don't know the array types are compatible + if (basic_type != T_OBJECT) { + // Simple test for basic type arrays + if (UseCompressedClassPointers) { + __ ldrw(tmp, src_klass_addr); + __ ldrw(rscratch1, dst_klass_addr); + __ cmpw(tmp, rscratch1); + } else { + __ ldr(tmp, src_klass_addr); + __ ldr(rscratch1, dst_klass_addr); + __ cmp(tmp, rscratch1); + } + __ br(Assembler::NE, *stub->entry()); + } else { + // For object arrays, if src is a sub class of dst then we can + // safely do the copy. + Label cont, slow; + +#define PUSH(r1, r2) \ + stp(r1, r2, __ pre(sp, -2 * wordSize)); + +#define POP(r1, r2) \ + ldp(r1, r2, __ post(sp, 2 * wordSize)); + + __ PUSH(src, dst); + + __ load_klass(src, src); + __ load_klass(dst, dst); + + __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); + + __ PUSH(src, dst); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ POP(src, dst); + + __ cbnz(src, cont); + + __ bind(slow); + __ POP(src, dst); + + address copyfunc_addr = StubRoutines::checkcast_arraycopy(); + if (copyfunc_addr != NULL) { // use stub if available + // src is not a sub class of dst so we have to do a + // per-element check. + + int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; + if ((flags & mask) != mask) { + // Check that at least both of them object arrays. + assert(flags & mask, "one of the two should be known to be an object array"); + + if (!(flags & LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + } + int lh_offset = in_bytes(Klass::layout_helper_offset()); + Address klass_lh_addr(tmp, lh_offset); + jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ ldrw(rscratch1, klass_lh_addr); + __ mov(rscratch2, objArray_lh); + __ eorw(rscratch1, rscratch1, rscratch2); + __ cbnzw(rscratch1, *stub->entry()); + } + + // Spill because stubs can use any register they like and it's + // easier to restore just those that we care about. + __ stp(dst, dst_pos, Address(sp, 0*BytesPerWord)); + __ stp(length, src_pos, Address(sp, 2*BytesPerWord)); + __ str(src, Address(sp, 4*BytesPerWord)); + + __ lea(c_rarg0, Address(src, src_pos, Address::uxtw(scale))); + __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg0, dst, dst_pos, length); + __ lea(c_rarg1, Address(dst, dst_pos, Address::uxtw(scale))); + __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg1, dst, length); + __ uxtw(c_rarg2, length); + assert_different_registers(c_rarg2, dst); + + __ load_klass(c_rarg4, dst); + __ ldr(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset())); + __ ldrw(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset())); + __ far_call(RuntimeAddress(copyfunc_addr)); + +#ifndef PRODUCT + if (PrintC1Statistics) { + Label failed; + __ cbnz(r0, failed); + __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt)); + __ bind(failed); + } +#endif + + __ cbz(r0, *stub->continuation()); + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt)); + } +#endif + assert_different_registers(dst, dst_pos, length, src_pos, src, r0, rscratch1); + + // Restore previously spilled arguments + __ ldp(dst, dst_pos, Address(sp, 0*BytesPerWord)); + __ ldp(length, src_pos, Address(sp, 2*BytesPerWord)); + __ ldr(src, Address(sp, 4*BytesPerWord)); + + // return value is -1^K where K is partial copied count + __ eonw(rscratch1, r0, zr); + // adjust length down and src/end pos up by partial copied count + __ subw(length, length, rscratch1); + __ addw(src_pos, src_pos, rscratch1); + __ addw(dst_pos, dst_pos, rscratch1); + } + + __ b(*stub->entry()); + + __ bind(cont); + __ POP(src, dst); + } + } + +#ifdef ASSERT + if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { + // Sanity check the known type with the incoming class. For the + // primitive case the types must match exactly with src.klass and + // dst.klass each exactly matching the default type. For the + // object array case, if no type check is needed then either the + // dst type is exactly the expected type and the src type is a + // subtype which we can't check or src is the same array as dst + // but not necessarily exactly of type default_type. + Label known_ok, halt; + __ mov_metadata(tmp, default_type->constant_encoding()); +#ifdef _LP64 + if (UseCompressedClassPointers) { + __ encode_klass_not_null(tmp); + } +#endif + + if (basic_type != T_OBJECT) { + + if (UseCompressedClassPointers) { + __ ldrw(rscratch1, dst_klass_addr); + __ cmpw(tmp, rscratch1); + } else { + __ ldr(rscratch1, dst_klass_addr); + __ cmp(tmp, rscratch1); + } + __ br(Assembler::NE, halt); + if (UseCompressedClassPointers) { + __ ldrw(rscratch1, src_klass_addr); + __ cmpw(tmp, rscratch1); + } else { + __ ldr(rscratch1, src_klass_addr); + __ cmp(tmp, rscratch1); + } + __ br(Assembler::EQ, known_ok); + } else { + if (UseCompressedClassPointers) { + __ ldrw(rscratch1, dst_klass_addr); + __ cmpw(tmp, rscratch1); + } else { + __ ldr(rscratch1, dst_klass_addr); + __ cmp(tmp, rscratch1); + } + __ br(Assembler::EQ, known_ok); + __ cmp(src, dst); + __ br(Assembler::EQ, known_ok); + } + __ bind(halt); + __ stop("incorrect type information in arraycopy"); + __ bind(known_ok); + } +#endif + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type))); + } +#endif + + __ lea(c_rarg0, Address(src, src_pos, Address::uxtw(scale))); + __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg0, dst, dst_pos, length); + __ lea(c_rarg1, Address(dst, dst_pos, Address::uxtw(scale))); + __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg1, dst, length); + __ uxtw(c_rarg2, length); + assert_different_registers(c_rarg2, dst); + + bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; + bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; + const char *name; + address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); + + CodeBlob *cb = CodeCache::find_blob(entry); + if (cb) { + __ far_call(RuntimeAddress(entry)); + } else { + __ call_VM_leaf(entry, 3); + } + + __ bind(*stub->continuation()); +} + + + + +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); + if (!UseFastLocking) { + __ b(*op->stub()->entry()); + } else if (op->code() == lir_lock) { + Register scratch = noreg; + if (UseBiasedLocking) { + scratch = op->scratch_opr()->as_register(); + } + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible + int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } + // done + } else if (op->code() == lir_unlock) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + __ unlock_object(hdr, obj, lock, *op->stub()->entry()); + } else { + Unimplemented(); + } + __ bind(*op->stub()->continuation()); +} + + +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + ciMethod* callee = op->profiled_callee(); + + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + Bytecodes::Code bc = method->java_code_at_bci(bci); + const bool callee_is_static = callee->is_loaded() && callee->is_static(); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) && + !callee_is_static && // required for optimized MH invokes + C1ProfileVirtualCalls) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment); + return; + } + } + + // Receiver type not found in profile data; select an empty slot + + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == NULL) { + Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); + __ mov_metadata(rscratch1, known_klass->constant_encoding()); + __ lea(rscratch2, recv_addr); + __ str(rscratch1, Address(rscratch2)); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment); + return; + } + } + } else { + __ load_klass(recv, recv); + Label update_done; + type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ addptr(counter_addr, DataLayout::counter_increment); + + __ bind(update_done); + } + } else { + // Static call + __ addptr(counter_addr, DataLayout::counter_increment); + } +} + + +void LIR_Assembler::emit_delay(LIR_OpDelay*) { + Unimplemented(); +} + + +void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { + __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); +} + +void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { + assert(op->crc()->is_single_cpu(), "crc must be register"); + assert(op->val()->is_single_cpu(), "byte value must be register"); + assert(op->result_opr()->is_single_cpu(), "result must be register"); + Register crc = op->crc()->as_register(); + Register val = op->val()->as_register(); + Register res = op->result_opr()->as_register(); + + assert_different_registers(val, crc, res); + unsigned long offset; + __ adrp(res, ExternalAddress(StubRoutines::crc_table_addr()), offset); + if (offset) __ add(res, res, offset); + + __ ornw(crc, zr, crc); // ~crc + __ update_byte_crc32(crc, val, res); + __ ornw(res, zr, crc); // ~crc +} + +void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { + COMMENT("emit_profile_type {"); + Register obj = op->obj()->as_register(); + Register tmp = op->tmp()->as_pointer_register(); + Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); + ciKlass* exact_klass = op->exact_klass(); + intptr_t current_klass = op->current_klass(); + bool not_null = op->not_null(); + bool no_conflict = op->no_conflict(); + + Label update, next, none; + + bool do_null = !not_null; + bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; + bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; + + assert(do_null || do_update, "why are we here?"); + assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + assert(mdo_addr.base() != rscratch1, "wrong register"); + + __ verify_oop(obj); + + if (tmp != obj) { + __ mov(tmp, obj); + } + if (do_null) { + __ cbnz(tmp, update); + if (!TypeEntries::was_null_seen(current_klass)) { + __ ldr(rscratch2, mdo_addr); + __ orr(rscratch2, rscratch2, TypeEntries::null_seen); + __ str(rscratch2, mdo_addr); + } + if (do_update) { +#ifndef ASSERT + __ b(next); + } +#else + __ b(next); + } + } else { + __ cbnz(tmp, update); + __ stop("unexpected null obj"); +#endif + } + + __ bind(update); + + if (do_update) { +#ifdef ASSERT + if (exact_klass != NULL) { + Label ok; + __ load_klass(tmp, tmp); + __ mov_metadata(rscratch1, exact_klass->constant_encoding()); + __ eor(rscratch1, tmp, rscratch1); + __ cbz(rscratch1, ok); + __ stop("exact klass and actual klass differ"); + __ bind(ok); + } +#endif + if (!no_conflict) { + if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { + if (exact_klass != NULL) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + } else { + __ load_klass(tmp, tmp); + } + + __ ldr(rscratch2, mdo_addr); + __ eor(tmp, tmp, rscratch2); + __ andr(rscratch1, tmp, TypeEntries::type_klass_mask); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + __ cbz(rscratch1, next); + + __ andr(rscratch1, tmp, TypeEntries::type_unknown); + __ cbnz(rscratch1, next); // already unknown. Nothing to do anymore. + + if (TypeEntries::is_type_none(current_klass)) { + __ cbz(rscratch2, none); + __ cmp(rscratch2, TypeEntries::null_seen); + __ br(Assembler::EQ, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + __ dmb(Assembler::ISHLD); + __ ldr(rscratch2, mdo_addr); + __ eor(tmp, tmp, rscratch2); + __ andr(rscratch1, tmp, TypeEntries::type_klass_mask); + __ cbz(rscratch1, next); + } + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); + + __ ldr(tmp, mdo_addr); + __ andr(rscratch1, tmp, TypeEntries::type_unknown); + __ cbnz(rscratch1, next); // already unknown. Nothing to do anymore. + } + + // different than before. Cannot keep accurate profile. + __ ldr(rscratch2, mdo_addr); + __ orr(rscratch2, rscratch2, TypeEntries::type_unknown); + __ str(rscratch2, mdo_addr); + + if (TypeEntries::is_type_none(current_klass)) { + __ b(next); + + __ bind(none); + // first time here. Set profile type. + __ str(tmp, mdo_addr); + } + } else { + // There's a single possible klass at this profile point + assert(exact_klass != NULL, "should be"); + if (TypeEntries::is_type_none(current_klass)) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + __ ldr(rscratch2, mdo_addr); + __ eor(tmp, tmp, rscratch2); + __ andr(rscratch1, tmp, TypeEntries::type_klass_mask); + __ cbz(rscratch1, next); +#ifdef ASSERT + { + Label ok; + __ ldr(rscratch1, mdo_addr); + __ cbz(rscratch1, ok); + __ cmp(rscratch1, TypeEntries::null_seen); + __ br(Assembler::EQ, ok); + // may have been set by another thread + __ dmb(Assembler::ISHLD); + __ mov_metadata(rscratch1, exact_klass->constant_encoding()); + __ ldr(rscratch2, mdo_addr); + __ eor(rscratch2, rscratch1, rscratch2); + __ andr(rscratch2, rscratch2, TypeEntries::type_mask); + __ cbz(rscratch2, ok); + + __ stop("unexpected profiling mismatch"); + __ bind(ok); + } +#endif + // first time here. Set profile type. + __ ldr(tmp, mdo_addr); + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); + + __ ldr(tmp, mdo_addr); + __ andr(rscratch1, tmp, TypeEntries::type_unknown); + __ cbnz(rscratch1, next); // already unknown. Nothing to do anymore. + + __ orr(tmp, tmp, TypeEntries::type_unknown); + __ str(tmp, mdo_addr); + // FIXME: Write barrier needed here? + } + } + + __ bind(next); + } + COMMENT("} emit_profile_type"); +} + + +void LIR_Assembler::align_backward_branch_target() { +} + + +void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) { + if (left->is_single_cpu()) { + assert(dest->is_single_cpu(), "expect single result reg"); + __ negw(dest->as_register(), left->as_register()); + } else if (left->is_double_cpu()) { + assert(dest->is_double_cpu(), "expect double result reg"); + __ neg(dest->as_register_lo(), left->as_register_lo()); + } else if (left->is_single_fpu()) { + assert(dest->is_single_fpu(), "expect single float result reg"); + __ fnegs(dest->as_float_reg(), left->as_float_reg()); + } else { + assert(left->is_double_fpu(), "expect double float operand reg"); + assert(dest->is_double_fpu(), "expect double float result reg"); + __ fnegd(dest->as_double_reg(), left->as_double_reg()); + } +} + + +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) { + __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr())); +} + + +void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { + assert(!tmp->is_valid(), "don't need temporary"); + + CodeBlob *cb = CodeCache::find_blob(dest); + if (cb) { + __ far_call(RuntimeAddress(dest)); + } else { + __ mov(rscratch1, RuntimeAddress(dest)); + __ blr(rscratch1); + } + + if (info != NULL) { + add_call_info_here(info); + } + __ maybe_isb(); +} + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + if (dest->is_address() || src->is_address()) { + move_op(src, dest, type, lir_patch_none, info, + /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false); + } else { + ShouldNotReachHere(); + } +} + +#ifdef ASSERT +// emit run-time assertion +void LIR_Assembler::emit_assert(LIR_OpAssert* op) { + assert(op->code() == lir_assert, "must be"); + + if (op->in_opr1()->is_valid()) { + assert(op->in_opr2()->is_valid(), "both operands must be valid"); + comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op); + } else { + assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); + assert(op->condition() == lir_cond_always, "no other conditions allowed"); + } + + Label ok; + if (op->condition() != lir_cond_always) { + Assembler::Condition acond = Assembler::AL; + switch (op->condition()) { + case lir_cond_equal: acond = Assembler::EQ; break; + case lir_cond_notEqual: acond = Assembler::NE; break; + case lir_cond_less: acond = Assembler::LT; break; + case lir_cond_lessEqual: acond = Assembler::LE; break; + case lir_cond_greaterEqual: acond = Assembler::GE; break; + case lir_cond_greater: acond = Assembler::GT; break; + case lir_cond_belowEqual: acond = Assembler::LS; break; + case lir_cond_aboveEqual: acond = Assembler::HS; break; + default: ShouldNotReachHere(); + } + __ br(acond, ok); + } + if (op->halt()) { + const char* str = __ code_string(op->msg()); + __ stop(str); + } else { + breakpoint(); + } + __ bind(ok); +} +#endif + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +void LIR_Assembler::membar() { + COMMENT("membar"); + __ membar(MacroAssembler::AnyAny); +} + +void LIR_Assembler::membar_acquire() { + __ membar(Assembler::LoadLoad|Assembler::LoadStore); +} + +void LIR_Assembler::membar_release() { + __ membar(Assembler::LoadStore|Assembler::StoreStore); +} + +void LIR_Assembler::membar_loadload() { + __ membar(Assembler::LoadLoad); +} + +void LIR_Assembler::membar_storestore() { + __ membar(MacroAssembler::StoreStore); +} + +void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); } + +void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); } + +void LIR_Assembler::get_thread(LIR_Opr result_reg) { + __ mov(result_reg->as_register(), rthread); +} + + +void LIR_Assembler::peephole(LIR_List *lir) { +#if 0 + if (tableswitch_count >= max_tableswitches) + return; + + /* + This finite-state automaton recognizes sequences of compare-and- + branch instructions. We will turn them into a tableswitch. You + could argue that C1 really shouldn't be doing this sort of + optimization, but without it the code is really horrible. + */ + + enum { start_s, cmp1_s, beq_s, cmp_s } state; + int first_key, last_key = -2147483648; + int next_key = 0; + int start_insn = -1; + int last_insn = -1; + Register reg = noreg; + LIR_Opr reg_opr; + state = start_s; + + LIR_OpList* inst = lir->instructions_list(); + for (int i = 0; i < inst->length(); i++) { + LIR_Op* op = inst->at(i); + switch (state) { + case start_s: + first_key = -1; + start_insn = i; + switch (op->code()) { + case lir_cmp: + LIR_Opr opr1 = op->as_Op2()->in_opr1(); + LIR_Opr opr2 = op->as_Op2()->in_opr2(); + if (opr1->is_cpu_register() && opr1->is_single_cpu() + && opr2->is_constant() + && opr2->type() == T_INT) { + reg_opr = opr1; + reg = opr1->as_register(); + first_key = opr2->as_constant_ptr()->as_jint(); + next_key = first_key + 1; + state = cmp_s; + goto next_state; + } + break; + } + break; + case cmp_s: + switch (op->code()) { + case lir_branch: + if (op->as_OpBranch()->cond() == lir_cond_equal) { + state = beq_s; + last_insn = i; + goto next_state; + } + } + state = start_s; + break; + case beq_s: + switch (op->code()) { + case lir_cmp: { + LIR_Opr opr1 = op->as_Op2()->in_opr1(); + LIR_Opr opr2 = op->as_Op2()->in_opr2(); + if (opr1->is_cpu_register() && opr1->is_single_cpu() + && opr1->as_register() == reg + && opr2->is_constant() + && opr2->type() == T_INT + && opr2->as_constant_ptr()->as_jint() == next_key) { + last_key = next_key; + next_key++; + state = cmp_s; + goto next_state; + } + } + } + last_key = next_key; + state = start_s; + break; + default: + assert(false, "impossible state"); + } + if (state == start_s) { + if (first_key < last_key - 5L && reg != noreg) { + { + // printf("found run register %d starting at insn %d low value %d high value %d\n", + // reg->encoding(), + // start_insn, first_key, last_key); + // for (int i = 0; i < inst->length(); i++) { + // inst->at(i)->print(); + // tty->print("\n"); + // } + // tty->print("\n"); + } + + struct tableswitch *sw = &switches[tableswitch_count]; + sw->_insn_index = start_insn, sw->_first_key = first_key, + sw->_last_key = last_key, sw->_reg = reg; + inst->insert_before(last_insn + 1, new LIR_OpLabel(&sw->_after)); + { + // Insert the new table of branches + int offset = last_insn; + for (int n = first_key; n < last_key; n++) { + inst->insert_before + (last_insn + 1, + new LIR_OpBranch(lir_cond_always, T_ILLEGAL, + inst->at(offset)->as_OpBranch()->label())); + offset -= 2, i++; + } + } + // Delete all the old compare-and-branch instructions + for (int n = first_key; n < last_key; n++) { + inst->remove_at(start_insn); + inst->remove_at(start_insn); + } + // Insert the tableswitch instruction + inst->insert_before(start_insn, + new LIR_Op2(lir_cmp, lir_cond_always, + LIR_OprFact::intConst(tableswitch_count), + reg_opr)); + inst->insert_before(start_insn + 1, new LIR_OpLabel(&sw->_branches)); + tableswitch_count++; + } + reg = noreg; + last_key = -2147483648; + } + next_state: + ; + } +#endif +} + +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) { + Address addr = as_Address(src->as_address_ptr()); + BasicType type = src->type(); + bool is_oop = type == T_OBJECT || type == T_ARRAY; + + void (MacroAssembler::* add)(Register prev, RegisterOrConstant incr, Register addr); + void (MacroAssembler::* xchg)(Register prev, Register newv, Register addr); + + switch(type) { + case T_INT: + xchg = &MacroAssembler::atomic_xchgalw; + add = &MacroAssembler::atomic_addalw; + break; + case T_LONG: + xchg = &MacroAssembler::atomic_xchgal; + add = &MacroAssembler::atomic_addal; + break; + case T_OBJECT: + case T_ARRAY: + if (UseCompressedOops) { + xchg = &MacroAssembler::atomic_xchgalw; + add = &MacroAssembler::atomic_addalw; + } else { + xchg = &MacroAssembler::atomic_xchgal; + add = &MacroAssembler::atomic_addal; + } + break; + default: + ShouldNotReachHere(); + xchg = &MacroAssembler::atomic_xchgal; + add = &MacroAssembler::atomic_addal; // unreachable + } + + switch (code) { + case lir_xadd: + { + RegisterOrConstant inc; + Register tmp = as_reg(tmp_op); + Register dst = as_reg(dest); + if (data->is_constant()) { + inc = RegisterOrConstant(as_long(data)); + assert_different_registers(dst, addr.base(), tmp, + rscratch1, rscratch2); + } else { + inc = RegisterOrConstant(as_reg(data)); + assert_different_registers(inc.as_register(), dst, addr.base(), tmp, + rscratch1, rscratch2); + } + __ lea(tmp, addr); + (_masm->*add)(dst, inc, tmp); + break; + } + case lir_xchg: + { + Register tmp = tmp_op->as_register(); + Register obj = as_reg(data); + Register dst = as_reg(dest); + if (is_oop && UseCompressedOops) { + __ encode_heap_oop(rscratch2, obj); + obj = rscratch2; + } + assert_different_registers(obj, addr.base(), tmp, rscratch1, dst); + __ lea(tmp, addr); + (_masm->*xchg)(dst, obj, tmp); + if (is_oop && UseCompressedOops) { + __ decode_heap_oop(dst); + } + } + break; + default: + ShouldNotReachHere(); + } + __ membar(__ AnyAny); +} + +#undef __ --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp 2021-01-25 19:31:32.066427389 +0000 @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP +#define CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP + +// ArrayCopyStub needs access to bailout +friend class ArrayCopyStub; + + private: + + int array_element_size(BasicType type) const; + + void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack); + + // helper functions which checks for overflow and sets bailout if it + // occurs. Always returns a valid embeddable pointer but in the + // bailout case the pointer won't be to unique storage. + address float_constant(float f); + address double_constant(double d); + + address int_constant(jlong n); + + bool is_literal_address(LIR_Address* addr); + + // When we need to use something other than rscratch1 use this + // method. + Address as_Address(LIR_Address* addr, Register tmp); + + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, + ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done); + void add_debug_info_for_branch(address adr, CodeEmitInfo* info); + + void casw(Register addr, Register newval, Register cmpval); + void casl(Register addr, Register newval, Register cmpval); + + void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL); + + static const int max_tableswitches = 20; + struct tableswitch switches[max_tableswitches]; + int tableswitch_count; + + void init() { tableswitch_count = 0; } + + void deoptimize_trap(CodeEmitInfo *info); + +public: + + void store_parameter(Register r, int offset_from_esp_in_words); + void store_parameter(jint c, int offset_from_esp_in_words); + void store_parameter(jobject c, int offset_from_esp_in_words); + +enum { call_stub_size = 12 * NativeInstruction::instruction_size, + exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), + deopt_handler_size = 7 * NativeInstruction::instruction_size }; + + +#endif // CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp 2021-01-25 19:31:32.473431666 +0000 @@ -0,0 +1,1444 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_LIRGenerator.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArray.hpp" +#include "ci/ciObjArrayKlass.hpp" +#include "ci/ciTypeArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_aarch64.inline.hpp" + +#ifdef ASSERT +#define __ gen()->lir(__FILE__, __LINE__)-> +#else +#define __ gen()->lir()-> +#endif + +// Item will be loaded into a byte register; Intel only +void LIRItem::load_byte_item() { + load_item(); +} + + +void LIRItem::load_nonconstant() { + LIR_Opr r = value()->operand(); + if (r->is_constant()) { + _result = r; + } else { + load_item(); + } +} + +//-------------------------------------------------------------- +// LIRGenerator +//-------------------------------------------------------------- + + +LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::r0_oop_opr; } +LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::r3_opr; } +LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::r0_opr; } +LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } + + +LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { + LIR_Opr opr; + switch (type->tag()) { + case intTag: opr = FrameMap::r0_opr; break; + case objectTag: opr = FrameMap::r0_oop_opr; break; + case longTag: opr = FrameMap::long0_opr; break; + case floatTag: opr = FrameMap::fpu0_float_opr; break; + case doubleTag: opr = FrameMap::fpu0_double_opr; break; + + case addressTag: + default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; + } + + assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); + return opr; +} + + +LIR_Opr LIRGenerator::rlock_byte(BasicType type) { + LIR_Opr reg = new_register(T_INT); + set_vreg_flag(reg, LIRGenerator::byte_reg); + return reg; +} + + +//--------- loading items into registers -------------------------------- + + +bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { + if (v->type()->as_IntConstant() != NULL) { + return v->type()->as_IntConstant()->value() == 0L; + } else if (v->type()->as_LongConstant() != NULL) { + return v->type()->as_LongConstant()->value() == 0L; + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else { + return false; + } +} + +bool LIRGenerator::can_inline_as_constant(Value v) const { + // FIXME: Just a guess + if (v->type()->as_IntConstant() != NULL) { + return Assembler::operand_valid_for_add_sub_immediate(v->type()->as_IntConstant()->value()); + } else if (v->type()->as_LongConstant() != NULL) { + return v->type()->as_LongConstant()->value() == 0L; + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else { + return false; + } +} + + +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; } + + +LIR_Opr LIRGenerator::safepoint_poll_register() { + return LIR_OprFact::illegalOpr; +} + + +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + assert(base->is_register(), "must be"); + + // accumulate fixed displacements + if (index->is_constant()) { + disp += index->as_constant_ptr()->as_jint() << shift; + index = LIR_OprFact::illegalOpr; + } + + if (index->is_register()) { + // apply the shift and accumulate the displacement + if (shift > 0) { + LIR_Opr tmp = new_pointer_register(); + __ shift_left(index, shift, tmp); + index = tmp; + } + if (disp != 0) { + LIR_Opr tmp = new_pointer_register(); + if (Assembler::operand_valid_for_add_sub_immediate(disp)) { + __ add(tmp, tmp, LIR_OprFact::intptrConst(disp)); + index = tmp; + } else { + __ move(tmp, LIR_OprFact::intptrConst(disp)); + __ add(tmp, index, tmp); + index = tmp; + } + disp = 0; + } + } else if (disp != 0 && !Address::offset_ok_for_immed(disp, shift)) { + // index is illegal so replace it with the displacement loaded into a register + index = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(disp), index); + disp = 0; + } + + // at this point we either have base + index or base + displacement + if (disp == 0) { + return new LIR_Address(base, index, type); + } else { + assert(Address::offset_ok_for_immed(disp, 0), "must be"); + return new LIR_Address(base, disp, type); + } +} + + +LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, + BasicType type, bool needs_card_mark) { + int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); + int elem_size = type2aelembytes(type); + int shift = exact_log2(elem_size); + + LIR_Address* addr; + if (index_opr->is_constant()) { + addr = new LIR_Address(array_opr, + offset_in_bytes + index_opr->as_jint() * elem_size, type); + } else { +// #ifdef _LP64 +// if (index_opr->type() == T_INT) { +// LIR_Opr tmp = new_register(T_LONG); +// __ convert(Bytecodes::_i2l, index_opr, tmp); +// index_opr = tmp; +// } +// #endif + if (offset_in_bytes) { + LIR_Opr tmp = new_pointer_register(); + __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp); + array_opr = tmp; + offset_in_bytes = 0; + } + addr = new LIR_Address(array_opr, + index_opr, + LIR_Address::scale(type), + offset_in_bytes, type); + } + if (needs_card_mark) { + // This store will need a precise card mark, so go ahead and + // compute the full adddres instead of computing once for the + // store and again for the card mark. + LIR_Opr tmp = new_pointer_register(); + __ leal(LIR_OprFact::address(addr), tmp); + return new LIR_Address(tmp, type); + } else { + return addr; + } +} + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + LIR_Opr r; + if (type == T_LONG) { + r = LIR_OprFact::longConst(x); + if (!Assembler::operand_valid_for_logical_immediate(false, x)) { + LIR_Opr tmp = new_register(type); + __ move(r, tmp); + return tmp; + } + } else if (type == T_INT) { + r = LIR_OprFact::intConst(x); + if (!Assembler::operand_valid_for_logical_immediate(true, x)) { + // This is all rather nasty. We don't know whether our constant + // is required for a logical or an arithmetic operation, wo we + // don't know what the range of valid values is!! + LIR_Opr tmp = new_register(type); + __ move(r, tmp); + return tmp; + } + } else { + ShouldNotReachHere(); + } + return r; +} + + + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { + LIR_Opr pointer = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(counter), pointer); + LIR_Address* addr = new LIR_Address(pointer, type); + increment_counter(addr, step); +} + + +void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + LIR_Opr imm = NULL; + switch(addr->type()) { + case T_INT: + imm = LIR_OprFact::intConst(step); + break; + case T_LONG: + imm = LIR_OprFact::longConst(step); + break; + default: + ShouldNotReachHere(); + } + LIR_Opr reg = new_register(addr->type()); + __ load(addr, reg); + __ add(reg, imm, reg); + __ store(reg, addr); +} + +void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { + LIR_Opr reg = new_register(T_INT); + __ load(generate_address(base, disp, T_INT), reg, info); + __ cmp(condition, reg, LIR_OprFact::intConst(c)); +} + +void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { + LIR_Opr reg1 = new_register(T_INT); + __ load(generate_address(base, disp, type), reg1, info); + __ cmp(condition, reg, reg1); +} + + +bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { + + if (is_power_of_2(c - 1)) { + __ shift_left(left, exact_log2(c - 1), tmp); + __ add(tmp, left, result); + return true; + } else if (is_power_of_2(c + 1)) { + __ shift_left(left, exact_log2(c + 1), tmp); + __ sub(tmp, left, result); + return true; + } else { + return false; + } +} + +void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { + BasicType type = item->type(); + __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type)); +} + +//---------------------------------------------------------------------- +// visitor functions +//---------------------------------------------------------------------- + + +void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { + assert(x->is_pinned(),""); + bool needs_range_check = x->compute_needs_range_check(); + bool use_length = x->length() != NULL; + bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT; + bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL || + !get_jobject_constant(x->value())->is_null_object() || + x->should_profile()); + + LIRItem array(x->array(), this); + LIRItem index(x->index(), this); + LIRItem value(x->value(), this); + LIRItem length(this); + + array.load_item(); + index.load_nonconstant(); + + if (use_length && needs_range_check) { + length.set_instruction(x->length()); + length.load_item(); + + } + if (needs_store_check || x->check_boolean()) { + value.load_item(); + } else { + value.load_for_store(x->elt_type()); + } + + set_no_result(x); + + // the CodeEmitInfo must be duplicated for each different + // LIR-instruction because spilling can occur anywhere between two + // instructions and so the debug information must be different + CodeEmitInfo* range_check_info = state_for(x); + CodeEmitInfo* null_check_info = NULL; + if (x->needs_null_check()) { + null_check_info = new CodeEmitInfo(range_check_info); + } + + // emit array address setup early so it schedules better + // FIXME? No harm in this on aarch64, and it might help + LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store); + + if (GenerateRangeChecks && needs_range_check) { + if (use_length) { + __ cmp(lir_cond_belowEqual, length.result(), index.result()); + __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result())); + } else { + array_range_check(array.result(), index.result(), null_check_info, range_check_info); + // range_check also does the null check + null_check_info = NULL; + } + } + + if (GenerateArrayStoreCheck && needs_store_check) { + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = new_register(objectType); + + CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info); + __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info, x->profiled_method(), x->profiled_bci()); + } + + if (obj_store) { + // Needs GC write barriers. + pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + __ move(value.result(), array_addr, null_check_info); + // Seems to be a precise + post_barrier(LIR_OprFact::address(array_addr), value.result()); + } else { + LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info); + __ move(result, array_addr, null_check_info); + } +} + +void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + assert(x->is_pinned(),""); + LIRItem obj(x->obj(), this); + obj.load_item(); + + set_no_result(x); + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); + // Need a scratch register for biased locking + LIR_Opr scratch = LIR_OprFact::illegalOpr; + if (UseBiasedLocking) { + scratch = new_register(T_INT); + } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { + info_for_exception = state_for(x); + } + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); + monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + x->monitor_no(), info_for_exception, info); +} + + +void LIRGenerator::do_MonitorExit(MonitorExit* x) { + assert(x->is_pinned(),""); + + LIRItem obj(x->obj(), this); + obj.dont_load_item(); + + LIR_Opr lock = new_register(T_INT); + LIR_Opr obj_temp = new_register(T_INT); + set_no_result(x); + monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); +} + + +void LIRGenerator::do_NegateOp(NegateOp* x) { + + LIRItem from(x->x(), this); + from.load_item(); + LIR_Opr result = rlock_result(x); + __ negate (from.result(), result); + +} + +// for _fadd, _fmul, _fsub, _fdiv, _frem +// _dadd, _dmul, _dsub, _ddiv, _drem +void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { + + if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { + // float remainder is implemented as a direct call into the runtime + LIRItem right(x->x(), this); + LIRItem left(x->y(), this); + + BasicTypeList signature(2); + if (x->op() == Bytecodes::_frem) { + signature.append(T_FLOAT); + signature.append(T_FLOAT); + } else { + signature.append(T_DOUBLE); + signature.append(T_DOUBLE); + } + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + const LIR_Opr result_reg = result_register_for(x->type()); + left.load_item_force(cc->at(1)); + right.load_item(); + + __ move(right.result(), cc->at(0)); + + address entry; + if (x->op() == Bytecodes::_frem) { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); + } else { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); + } + + LIR_Opr result = rlock_result(x); + __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + return; + } + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + + // Always load right hand side. + right.load_item(); + + if (!left.is_register()) + left.load_item(); + + LIR_Opr reg = rlock(x); + LIR_Opr tmp = LIR_OprFact::illegalOpr; + if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) { + tmp = new_register(T_DOUBLE); + } + + arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); + + set_result(x, round_item(reg)); +} + +// for _ladd, _lmul, _lsub, _ldiv, _lrem +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + + // missing test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { + + // the check for division by zero destroys the right operand + right.set_destroys_register(); + + // check for division by zero (destroys registers of right operand!) + CodeEmitInfo* info = state_for(x); + + left.load_item(); + right.load_item(); + + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); + __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info)); + + rlock_result(x); + switch (x->op()) { + case Bytecodes::_lrem: + __ rem (left.result(), right.result(), x->operand()); + break; + case Bytecodes::_ldiv: + __ div (left.result(), right.result(), x->operand()); + break; + default: + ShouldNotReachHere(); + break; + } + + + } else { + assert (x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, + "expect lmul, ladd or lsub"); + // add, sub, mul + left.load_item(); + if (! right.is_register()) { + if (x->op() == Bytecodes::_lmul + || ! right.is_constant() + || ! Assembler::operand_valid_for_add_sub_immediate(right.get_jlong_constant())) { + right.load_item(); + } else { // add, sub + assert (x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub"); + // don't load constants to save register + right.load_nonconstant(); + } + } + rlock_result(x); + arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); + } +} + +// for: _iadd, _imul, _isub, _idiv, _irem +void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { + + // Test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + if (x->is_commutative() && left.is_stack() && right.is_register()) { + // swap them if left is real stack (or cached) and right is real register(not cached) + left_arg = &right; + right_arg = &left; + } + + left_arg->load_item(); + + // do not need to load right, as we can handle stack and constants + if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { + + right_arg->load_item(); + rlock_result(x); + + CodeEmitInfo* info = state_for(x); + LIR_Opr tmp = new_register(T_INT); + __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0)); + __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info)); + info = state_for(x); + + if (x->op() == Bytecodes::_irem) { + __ irem(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL); + } else if (x->op() == Bytecodes::_idiv) { + __ idiv(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL); + } + + } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { + if (right.is_constant() + && Assembler::operand_valid_for_add_sub_immediate(right.get_jint_constant())) { + right.load_nonconstant(); + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr); + } else { + assert (x->op() == Bytecodes::_imul, "expect imul"); + if (right.is_constant()) { + int c = right.get_jint_constant(); + if (! is_power_of_2(c) && ! is_power_of_2(c + 1) && ! is_power_of_2(c - 1)) { + // Cannot use constant op. + right.load_item(); + } else { + right.dont_load_item(); + } + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT)); + } +} + +void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { + // when an operand with use count 1 is the left operand, then it is + // likely that no move for 2-operand-LIR-form is necessary + if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { + x->swap_operands(); + } + + ValueTag tag = x->type()->tag(); + assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); + switch (tag) { + case floatTag: + case doubleTag: do_ArithmeticOp_FPU(x); return; + case longTag: do_ArithmeticOp_Long(x); return; + case intTag: do_ArithmeticOp_Int(x); return; + } + ShouldNotReachHere(); +} + +// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr +void LIRGenerator::do_ShiftOp(ShiftOp* x) { + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + + rlock_result(x); + if (right.is_constant()) { + right.dont_load_item(); + + switch (x->op()) { + case Bytecodes::_ishl: { + int c = right.get_jint_constant() & 0x1f; + __ shift_left(left.result(), c, x->operand()); + break; + } + case Bytecodes::_ishr: { + int c = right.get_jint_constant() & 0x1f; + __ shift_right(left.result(), c, x->operand()); + break; + } + case Bytecodes::_iushr: { + int c = right.get_jint_constant() & 0x1f; + __ unsigned_shift_right(left.result(), c, x->operand()); + break; + } + case Bytecodes::_lshl: { + int c = right.get_jint_constant() & 0x3f; + __ shift_left(left.result(), c, x->operand()); + break; + } + case Bytecodes::_lshr: { + int c = right.get_jint_constant() & 0x3f; + __ shift_right(left.result(), c, x->operand()); + break; + } + case Bytecodes::_lushr: { + int c = right.get_jint_constant() & 0x3f; + __ unsigned_shift_right(left.result(), c, x->operand()); + break; + } + default: + ShouldNotReachHere(); + } + } else { + right.load_item(); + LIR_Opr tmp = new_register(T_INT); + switch (x->op()) { + case Bytecodes::_ishl: { + __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); + __ shift_left(left.result(), tmp, x->operand(), tmp); + break; + } + case Bytecodes::_ishr: { + __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); + __ shift_right(left.result(), tmp, x->operand(), tmp); + break; + } + case Bytecodes::_iushr: { + __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); + __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); + break; + } + case Bytecodes::_lshl: { + __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); + __ shift_left(left.result(), tmp, x->operand(), tmp); + break; + } + case Bytecodes::_lshr: { + __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); + __ shift_right(left.result(), tmp, x->operand(), tmp); + break; + } + case Bytecodes::_lushr: { + __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); + __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); + break; + } + default: + ShouldNotReachHere(); + } + } +} + +// _iand, _land, _ior, _lor, _ixor, _lxor +void LIRGenerator::do_LogicOp(LogicOp* x) { + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + + rlock_result(x); + if (right.is_constant() + && ((right.type()->tag() == intTag + && Assembler::operand_valid_for_logical_immediate(true, right.get_jint_constant())) + || (right.type()->tag() == longTag + && Assembler::operand_valid_for_logical_immediate(false, right.get_jlong_constant())))) { + right.dont_load_item(); + } else { + right.load_item(); + } + switch (x->op()) { + case Bytecodes::_iand: + case Bytecodes::_land: + __ logical_and(left.result(), right.result(), x->operand()); break; + case Bytecodes::_ior: + case Bytecodes::_lor: + __ logical_or (left.result(), right.result(), x->operand()); break; + case Bytecodes::_ixor: + case Bytecodes::_lxor: + __ logical_xor(left.result(), right.result(), x->operand()); break; + default: Unimplemented(); + } +} + +// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg +void LIRGenerator::do_CompareOp(CompareOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + ValueTag tag = x->x()->type()->tag(); + if (tag == longTag) { + left.set_destroys_register(); + } + left.load_item(); + right.load_item(); + LIR_Opr reg = rlock_result(x); + + if (x->x()->type()->is_float_kind()) { + Bytecodes::Code code = x->op(); + __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); + } else if (x->x()->type()->tag() == longTag) { + __ lcmp2int(left.result(), right.result(), reg); + } else { + Unimplemented(); + } +} + +void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) { + assert(x->number_of_arguments() == 4, "wrong type"); + LIRItem obj (x->argument_at(0), this); // object + LIRItem offset(x->argument_at(1), this); // offset of field + LIRItem cmp (x->argument_at(2), this); // value to compare with field + LIRItem val (x->argument_at(3), this); // replace field with val if matches cmp + + assert(obj.type()->tag() == objectTag, "invalid type"); + + // In 64bit the type can be long, sparc doesn't have this assert + // assert(offset.type()->tag() == intTag, "invalid type"); + + assert(cmp.type()->tag() == type->tag(), "invalid type"); + assert(val.type()->tag() == type->tag(), "invalid type"); + + // get address of field + obj.load_item(); + offset.load_nonconstant(); + val.load_item(); + cmp.load_item(); + + LIR_Address* a; + if(offset.result()->is_constant()) { + jlong c = offset.result()->as_jlong(); + if ((jlong)((jint)c) == c) { + a = new LIR_Address(obj.result(), + (jint)c, + as_BasicType(type)); + } else { + LIR_Opr tmp = new_register(T_LONG); + __ move(offset.result(), tmp); + a = new LIR_Address(obj.result(), + tmp, + as_BasicType(type)); + } + } else { + a = new LIR_Address(obj.result(), + offset.result(), + LIR_Address::times_1, + 0, + as_BasicType(type)); + } + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + if (type == objectType) { // Write-barrier needed for Object fields. + // Do the pre-write barrier, if any. + pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + } + + LIR_Opr result = rlock_result(x); + + LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience + if (type == objectType) + __ cas_obj(addr, cmp.result(), val.result(), new_register(T_INT), new_register(T_INT), + result); + else if (type == intType) + __ cas_int(addr, cmp.result(), val.result(), ill, ill); + else if (type == longType) + __ cas_long(addr, cmp.result(), val.result(), ill, ill); + else { + ShouldNotReachHere(); + } + + __ logical_xor(FrameMap::r8_opr, LIR_OprFact::intConst(1), result); + + if (type == objectType) { // Write-barrier needed for Object fields. + // Seems to be precise + post_barrier(addr, val.result()); + } +} + +void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + switch (x->id()) { + case vmIntrinsics::_dabs: + case vmIntrinsics::_dsqrt: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_dsqrt: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + case vmIntrinsics::_dabs: { + __ abs(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + } + break; + } + case vmIntrinsics::_dlog10: // fall through + case vmIntrinsics::_dlog: // fall through + case vmIntrinsics::_dsin: // fall through + case vmIntrinsics::_dtan: // fall through + case vmIntrinsics::_dcos: // fall through + case vmIntrinsics::_dexp: { + assert(x->number_of_arguments() == 1, "wrong type"); + + address runtime_entry = NULL; + switch (x->id()) { + case vmIntrinsics::_dsin: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); + break; + case vmIntrinsics::_dcos: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); + break; + case vmIntrinsics::_dtan: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); + break; + case vmIntrinsics::_dlog: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); + break; + case vmIntrinsics::_dlog10: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); + break; + case vmIntrinsics::_dexp: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + break; + default: + ShouldNotReachHere(); + } + + LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL); + set_result(x, result); + break; + } + case vmIntrinsics::_dpow: { + assert(x->number_of_arguments() == 2, "wrong type"); + address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL); + set_result(x, result); + break; + } + } +} + + +void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + assert(x->number_of_arguments() == 5, "wrong type"); + + // Make all state_for calls early since they can emit code + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem src(x->argument_at(0), this); + LIRItem src_pos(x->argument_at(1), this); + LIRItem dst(x->argument_at(2), this); + LIRItem dst_pos(x->argument_at(3), this); + LIRItem length(x->argument_at(4), this); + + // operands for arraycopy must use fixed registers, otherwise + // LinearScan will fail allocation (because arraycopy always needs a + // call) + + // The java calling convention will give us enough registers + // so that on the stub side the args will be perfect already. + // On the other slow/special case side we call C and the arg + // positions are not similar enough to pick one as the best. + // Also because the java calling convention is a "shifted" version + // of the C convention we can process the java args trivially into C + // args without worry of overwriting during the xfer + + src.load_item_force (FrameMap::as_oop_opr(j_rarg0)); + src_pos.load_item_force (FrameMap::as_opr(j_rarg1)); + dst.load_item_force (FrameMap::as_oop_opr(j_rarg2)); + dst_pos.load_item_force (FrameMap::as_opr(j_rarg3)); + length.load_item_force (FrameMap::as_opr(j_rarg4)); + + LIR_Opr tmp = FrameMap::as_opr(j_rarg5); + + set_no_result(x); + + int flags; + ciArrayKlass* expected_type; + arraycopy_helper(x, &flags, &expected_type); + + __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, expected_type, flags, info); // does add_safepoint +} + +void LIRGenerator::do_update_CRC32(Intrinsic* x) { + assert(UseCRC32Intrinsics, "why are we here?"); + // Make all state_for calls early since they can emit code + LIR_Opr result = rlock_result(x); + int flags = 0; + switch (x->id()) { + case vmIntrinsics::_updateCRC32: { + LIRItem crc(x->argument_at(0), this); + LIRItem val(x->argument_at(1), this); + // val is destroyed by update_crc32 + val.set_destroys_register(); + crc.load_item(); + val.load_item(); + __ update_crc32(crc.result(), val.result(), result); + break; + } + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: { + bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32); + + LIRItem crc(x->argument_at(0), this); + LIRItem buf(x->argument_at(1), this); + LIRItem off(x->argument_at(2), this); + LIRItem len(x->argument_at(3), this); + buf.load_item(); + off.load_nonconstant(); + + LIR_Opr index = off.result(); + int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; + if(off.result()->is_constant()) { + index = LIR_OprFact::illegalOpr; + offset += off.result()->as_jint(); + } + LIR_Opr base_op = buf.result(); + + if (index->is_valid()) { + LIR_Opr tmp = new_register(T_LONG); + __ convert(Bytecodes::_i2l, index, tmp); + index = tmp; + } + + if (offset) { + LIR_Opr tmp = new_pointer_register(); + __ add(base_op, LIR_OprFact::intConst(offset), tmp); + base_op = tmp; + offset = 0; + } + + LIR_Address* a = new LIR_Address(base_op, + index, + LIR_Address::times_1, + offset, + T_BYTE); + BasicTypeList signature(3); + signature.append(T_INT); + signature.append(T_ADDRESS); + signature.append(T_INT); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + const LIR_Opr result_reg = result_register_for(x->type()); + + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + crc.load_item_force(cc->at(0)); + __ move(addr, cc->at(1)); + len.load_item_force(cc->at(2)); + + __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + break; + } + default: { + ShouldNotReachHere(); + } + } +} + +// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f +// _i2b, _i2c, _i2s +void LIRGenerator::do_Convert(Convert* x) { + bool needs_stub; + + switch (x->op()) { + case Bytecodes::_i2l: + case Bytecodes::_l2i: + case Bytecodes::_i2b: + case Bytecodes::_i2c: + case Bytecodes::_i2s: + case Bytecodes::_f2d: + case Bytecodes::_d2f: + case Bytecodes::_i2f: + case Bytecodes::_i2d: + case Bytecodes::_l2f: + case Bytecodes::_l2d: needs_stub = false; + break; + case Bytecodes::_f2l: + case Bytecodes::_d2l: + case Bytecodes::_f2i: + case Bytecodes::_d2i: needs_stub = true; + break; + default: ShouldNotReachHere(); + } + + LIRItem value(x->value(), this); + value.load_item(); + LIR_Opr input = value.result(); + LIR_Opr result = rlock(x); + + // arguments of lir_convert + LIR_Opr conv_input = input; + LIR_Opr conv_result = result; + ConversionStub* stub = NULL; + + if (needs_stub) { + stub = new ConversionStub(x->op(), conv_input, conv_result); + } + + __ convert(x->op(), conv_input, conv_result, stub, new_register(T_INT)); + + assert(result->is_virtual(), "result must be virtual register"); + set_result(x, result); +} + +void LIRGenerator::do_NewInstance(NewInstance* x) { +#ifndef PRODUCT + if (PrintNotLoaded && !x->klass()->is_loaded()) { + tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); + } +#endif + CodeEmitInfo* info = state_for(x, x->state()); + LIR_Opr reg = result_register_for(x->type()); + new_instance(reg, x->klass(), x->is_unresolved(), + FrameMap::r2_oop_opr, + FrameMap::r5_oop_opr, + FrameMap::r4_oop_opr, + LIR_OprFact::illegalOpr, + FrameMap::r3_metadata_opr, info); + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem length(x->length(), this); + length.load_item_force(FrameMap::r19_opr); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::r2_oop_opr; + LIR_Opr tmp2 = FrameMap::r4_oop_opr; + LIR_Opr tmp3 = FrameMap::r5_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::r3_metadata_opr; + LIR_Opr len = length.result(); + BasicType elem_type = x->elt_type(); + + __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); + + CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { + LIRItem length(x->length(), this); + // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction + // and therefore provide the state before the parameters have been consumed + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + } + + CodeEmitInfo* info = state_for(x, x->state()); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::r2_oop_opr; + LIR_Opr tmp2 = FrameMap::r4_oop_opr; + LIR_Opr tmp3 = FrameMap::r5_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::r3_metadata_opr; + + length.load_item_force(FrameMap::r19_opr); + LIR_Opr len = length.result(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); + ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); + if (obj == ciEnv::unloaded_ciobjarrayklass()) { + BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); + } + klass2reg_with_patching(klass_reg, obj, patching_info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { + Values* dims = x->dims(); + int i = dims->length(); + LIRItemList* items = new LIRItemList(dims->length(), NULL); + while (i-- > 0) { + LIRItem* size = new LIRItem(dims->at(i), this); + items->at_put(i, size); + } + + // Evaluate state_for early since it may emit code. + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + + // Cannot re-use same xhandlers for multiple CodeEmitInfos, so + // clone all handlers (NOTE: Usually this is handled transparently + // by the CodeEmitInfo cloning logic in CodeStub constructors but + // is done explicitly here because a stub isn't being used). + x->set_exception_handlers(new XHandlers(x->exception_handlers())); + } + CodeEmitInfo* info = state_for(x, x->state()); + + i = dims->length(); + while (i-- > 0) { + LIRItem* size = items->at(i); + size->load_item(); + + store_stack_parameter(size->result(), in_ByteSize(i*4)); + } + + LIR_Opr klass_reg = FrameMap::r0_metadata_opr; + klass2reg_with_patching(klass_reg, x->klass(), patching_info); + + LIR_Opr rank = FrameMap::r19_opr; + __ move(LIR_OprFact::intConst(x->rank()), rank); + LIR_Opr varargs = FrameMap::r2_opr; + __ move(FrameMap::sp_opr, varargs); + LIR_OprList* args = new LIR_OprList(3); + args->append(klass_reg); + args->append(rank); + args->append(varargs); + LIR_Opr reg = result_register_for(x->type()); + __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), + LIR_OprFact::illegalOpr, + reg, args, info); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_BlockBegin(BlockBegin* x) { + // nothing to do for now +} + +void LIRGenerator::do_CheckCast(CheckCast* x) { + LIRItem obj(x->obj(), this); + + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check())) { + // must do this before locking the destination register as an oop register, + // and before the obj is loaded (the latter is for deoptimization) + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + + // info for exceptions + CodeEmitInfo* info_for_exception = + (x->needs_exception_state() ? state_for(x) : + state_for(x, x->state_before(), true /*ignore_xhandler*/)); + + CodeStub* stub; + if (x->is_incompatible_class_change_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception); + } else if (x->is_invokespecial_receiver_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new DeoptimizeStub(info_for_exception); + } else { + stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception); + } + LIR_Opr reg = rlock_result(x); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded() || UseCompressedClassPointers) { + tmp3 = new_register(objectType); + } + __ checkcast(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), info_for_exception, patching_info, stub, + x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_InstanceOf(InstanceOf* x) { + LIRItem obj(x->obj(), this); + + // result and test object may not be in same register + LIR_Opr reg = rlock_result(x); + CodeEmitInfo* patching_info = NULL; + if ((!x->klass()->is_loaded() || PatchALot)) { + // must do this before locking the destination register as an oop register + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded() || UseCompressedClassPointers) { + tmp3 = new_register(objectType); + } + __ instanceof(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_If(If* x) { + assert(x->number_of_sux() == 2, "inconsistency"); + ValueTag tag = x->x()->type()->tag(); + bool is_safepoint = x->is_safepoint(); + + If::Condition cond = x->cond(); + + LIRItem xitem(x->x(), this); + LIRItem yitem(x->y(), this); + LIRItem* xin = &xitem; + LIRItem* yin = &yitem; + + if (tag == longTag) { + // for longs, only conditions "eql", "neq", "lss", "geq" are valid; + // mirror for other conditions + if (cond == If::gtr || cond == If::leq) { + cond = Instruction::mirror(cond); + xin = &yitem; + yin = &xitem; + } + xin->set_destroys_register(); + } + xin->load_item(); + + if (tag == longTag) { + if (yin->is_constant() + && Assembler::operand_valid_for_add_sub_immediate(yin->get_jlong_constant())) { + yin->dont_load_item(); + } else { + yin->load_item(); + } + } else if (tag == intTag) { + if (yin->is_constant() + && Assembler::operand_valid_for_add_sub_immediate(yin->get_jint_constant())) { + yin->dont_load_item(); + } else { + yin->load_item(); + } + } else { + yin->load_item(); + } + + // add safepoint before generating condition code so it can be recomputed + if (x->is_safepoint()) { + // increment backedge counter if needed + increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci()); + __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); + } + set_no_result(x); + + LIR_Opr left = xin->result(); + LIR_Opr right = yin->result(); + + __ cmp(lir_cond(cond), left, right); + // Generate branch profiling. Profiling code doesn't kill flags. + profile_branch(x, cond); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { + __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux()); + } else { + __ branch(lir_cond(cond), right->type(), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); +} + +LIR_Opr LIRGenerator::getThreadPointer() { + return FrameMap::as_pointer_opr(rthread); +} + +void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); } + +void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, + CodeEmitInfo* info) { + __ volatile_store_mem_reg(value, address, info); +} + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { + + // 8179954: We need to make sure that the code generated for + // volatile accesses forms a sequentially-consistent set of + // operations when combined with STLR and LDAR. Without a leading + // membar it's possible for a simple Dekker test to fail if loads + // use LD;DMB but stores use STLR. This can happen if C2 compiles + // the stores in one method and C1 compiles the loads in another. + if (! UseBarriersForVolatile) { + __ membar(); + } + + __ volatile_load_mem_reg(address, result, info); +} + +void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset, + BasicType type, bool is_volatile) { + LIR_Address* addr = new LIR_Address(src, offset, type); + __ load(addr, dst); +} + + +void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data, + BasicType type, bool is_volatile) { + LIR_Address* addr = new LIR_Address(src, offset, type); + bool is_obj = (type == T_ARRAY || type == T_OBJECT); + if (is_obj) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + __ move(data, addr); + assert(src->is_register(), "must be register"); + // Seems to be a precise address + post_barrier(LIR_OprFact::address(addr), data); + } else { + __ move(data, addr); + } +} + +void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { + BasicType type = x->basic_type(); + LIRItem src(x->object(), this); + LIRItem off(x->offset(), this); + LIRItem value(x->value(), this); + + src.load_item(); + off.load_nonconstant(); + + // We can cope with a constant increment in an xadd + if (! (x->is_add() + && value.is_constant() + && can_inline_as_constant(x->value()))) { + value.load_item(); + } + + LIR_Opr dst = rlock_result(x, type); + LIR_Opr data = value.result(); + bool is_obj = (type == T_ARRAY || type == T_OBJECT); + LIR_Opr offset = off.result(); + + if (data == dst) { + LIR_Opr tmp = new_register(data->type()); + __ move(data, tmp); + data = tmp; + } + + LIR_Address* addr; + if (offset->is_constant()) { + jlong l = offset->as_jlong(); + assert((jlong)((jint)l) == l, "offset too large for constant"); + jint c = (jint)l; + addr = new LIR_Address(src.result(), c, type); + } else { + addr = new LIR_Address(src.result(), offset, type); + } + + LIR_Opr tmp = new_register(T_INT); + LIR_Opr ptr = LIR_OprFact::illegalOpr; + + if (x->is_add()) { + __ xadd(LIR_OprFact::address(addr), data, dst, tmp); + } else { + if (is_obj) { + // Do the pre-write barrier, if any. + ptr = new_pointer_register(); + __ add(src.result(), off.result(), ptr); + pre_barrier(ptr, LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + } + __ xchg(LIR_OprFact::address(addr), data, dst, tmp); + if (is_obj) { + post_barrier(ptr, data); + } + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_LinearScan_aarch64.cpp 2021-01-25 19:31:32.901436165 +0000 @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LinearScan.hpp" +#include "utilities/bitMap.inline.hpp" + +void LinearScan::allocate_fpu_stack() { + // No FPU stack on AArch64 +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_LinearScan_aarch64.hpp 2021-01-25 19:31:33.333440705 +0000 @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_C1_LINEARSCAN_HPP +#define CPU_AARCH64_VM_C1_LINEARSCAN_HPP + +inline bool LinearScan::is_processed_reg_num(int reg_num) { + return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; +} + +inline int LinearScan::num_physical_regs(BasicType type) { + return 1; +} + + +inline bool LinearScan::requires_adjacent_regs(BasicType type) { + return false; +} + +inline bool LinearScan::is_caller_save(int assigned_reg) { + assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); + if (assigned_reg < pd_first_callee_saved_reg) + return true; + if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg) + return true; + if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg) + return true; + return false; +} + + +inline void LinearScan::pd_add_temps(LIR_Op* op) { + // FIXME ?? +} + + +// Implementation of LinearScanWalker + +inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { + if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) { + assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only"); + _first_reg = pd_first_callee_saved_reg; + _last_reg = pd_last_callee_saved_reg; + return true; + } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || cur->type() == T_ADDRESS || cur->type() == T_METADATA) { + _first_reg = pd_first_cpu_reg; + _last_reg = pd_last_allocatable_cpu_reg; + return true; + } + return false; +} + + +#endif // CPU_AARCH64_VM_C1_LINEARSCAN_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp 2021-01-25 19:31:33.778445383 +0000 @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1999, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/systemDictionary.hpp" +#include "gc_interface/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/stubRoutines.hpp" + +void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, + FloatRegister f0, FloatRegister f1, + Register result) +{ + Label done; + if (is_float) { + fcmps(f0, f1); + } else { + fcmpd(f0, f1); + } + if (unordered_result < 0) { + // we want -1 for unordered or less than, 0 for equal and 1 for + // greater than. + cset(result, NE); // Not equal or unordered + cneg(result, result, LT); // Less than or unordered + } else { + // we want -1 for less than, 0 for equal and 1 for unordered or + // greater than. + cset(result, NE); // Not equal or unordered + cneg(result, result, LO); // Less than + } +} + +int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { + const int aligned_mask = BytesPerWord -1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done, fail; + int null_check_offset = -1; + + verify_oop(obj); + + // save object being locked into the BasicObjectLock + str(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + + if (UseBiasedLocking) { + assert(scratch != noreg, "should have scratch register at this point"); + null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); + } else { + null_check_offset = offset(); + } + + // Load object header + ldr(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked + orr(hdr, hdr, markOopDesc::unlocked_value); + // save unlocked object header into the displaced header location on the stack + str(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header - if it is not the same, get the + // object header instead + lea(rscratch2, Address(obj, hdr_offset)); + cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL); + // if the object header was the same, we're done + // if the object header was not the same, it is now in the hdr register + // => test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & aligned_mask) == 0 + // 2) sp <= hdr + // 3) hdr <= sp + page_size + // + // these 3 tests can be done by evaluating the following expression: + // + // (hdr - sp) & (aligned_mask - page_size) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + mov(rscratch1, sp); + sub(hdr, hdr, rscratch1); + ands(hdr, hdr, aligned_mask - os::vm_page_size()); + // for recursive locking, the result is zero => save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking) + str(hdr, Address(disp_hdr, 0)); + // otherwise we don't care about the result and handle locking via runtime call + cbnz(hdr, slow_case); + // done + bind(done); + if (PrintBiasedLockingStatistics) { + lea(rscratch2, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); + addmw(Address(rscratch2, 0), 1, rscratch1); + } + return null_check_offset; +} + + +void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { + const int aligned_mask = BytesPerWord -1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + + if (UseBiasedLocking) { + // load object + ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + biased_locking_exit(obj, hdr, done); + } + + // load displaced header + ldr(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + cbz(hdr, done); + if (!UseBiasedLocking) { + // load object + ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + } + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to + // the displaced header, get the object header instead + // if the object header was not pointing to the displaced header, + // we do unlocking via runtime call + if (hdr_offset) { + lea(rscratch1, Address(obj, hdr_offset)); + cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case); + } else { + cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case); + } + // done + bind(done); +} + + +// Defines obj, preserves var_size_in_bytes +void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, Label& slow_case) { + if (UseTLAB) { + tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); + } else { + eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); + incr_allocated_bytes(noreg, var_size_in_bytes, con_size_in_bytes, t1); + } +} + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { + assert_different_registers(obj, klass, len); + if (UseBiasedLocking && !len->is_valid()) { + assert_different_registers(obj, klass, len, t1, t2); + ldr(t1, Address(klass, Klass::prototype_header_offset())); + } else { + // This assumes that all prototype bits fit in an int32_t + mov(t1, (int32_t)(intptr_t)markOopDesc::prototype()); + } + str(t1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass + encode_klass_not_null(t1, klass); + strw(t1, Address(obj, oopDesc::klass_offset_in_bytes())); + } else { + str(klass, Address(obj, oopDesc::klass_offset_in_bytes())); + } + + if (len->is_valid()) { + strw(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); + } else if (UseCompressedClassPointers) { + store_klass_gap(obj, zr); + } +} + +// Zero words; len is in bytes +// Destroys all registers except addr +// len must be a nonzero multiple of wordSize +void C1_MacroAssembler::zero_memory(Register addr, Register len, Register t1) { + assert_different_registers(addr, len, t1, rscratch1, rscratch2); + +#ifdef ASSERT + { Label L; + tst(len, BytesPerWord - 1); + br(Assembler::EQ, L); + stop("len is not a multiple of BytesPerWord"); + bind(L); + } +#endif + +#ifndef PRODUCT + block_comment("zero memory"); +#endif + + Label loop; + Label entry; + +// Algorithm: +// +// scratch1 = cnt & 7; +// cnt -= scratch1; +// p += scratch1; +// switch (scratch1) { +// do { +// cnt -= 8; +// p[-8] = 0; +// case 7: +// p[-7] = 0; +// case 6: +// p[-6] = 0; +// // ... +// case 1: +// p[-1] = 0; +// case 0: +// p += 8; +// } while (cnt); +// } + + const int unroll = 8; // Number of str(zr) instructions we'll unroll + + lsr(len, len, LogBytesPerWord); + andr(rscratch1, len, unroll - 1); // tmp1 = cnt % unroll + sub(len, len, rscratch1); // cnt -= unroll + // t1 always points to the end of the region we're about to zero + add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord); + adr(rscratch2, entry); + sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2); + br(rscratch2); + bind(loop); + sub(len, len, unroll); + for (int i = -unroll; i < 0; i++) + str(zr, Address(t1, i * wordSize)); + bind(entry); + add(t1, t1, unroll * wordSize); + cbnz(len, loop); +} + +// preserves obj, destroys len_in_bytes +void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) { + Label done; + assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different"); + assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord"); + Register index = len_in_bytes; + // index is positive and ptr sized + subs(index, index, hdr_size_in_bytes); + br(Assembler::EQ, done); + // note: for the remaining code to work, index must be a multiple of BytesPerWord +#ifdef ASSERT + { Label L; + tst(index, BytesPerWord - 1); + br(Assembler::EQ, L); + stop("index is not a multiple of BytesPerWord"); + bind(L); + } +#endif + + // Preserve obj + if (hdr_size_in_bytes) + add(obj, obj, hdr_size_in_bytes); + zero_memory(obj, index, t1); + if (hdr_size_in_bytes) + sub(obj, obj, hdr_size_in_bytes); + + // done + bind(done); +} + + +void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case) { + assert_different_registers(obj, t1, t2); // XXX really? + assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); + + try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case); + + initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2); +} + +void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) { + assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, + "con_size_in_bytes is not multiple of alignment"); + const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; + + initialize_header(obj, klass, noreg, t1, t2); + + // clear rest of allocated space + const Register index = t2; + const int threshold = 16 * BytesPerWord; // approximate break even point for code size (see comments below) + if (var_size_in_bytes != noreg) { + mov(index, var_size_in_bytes); + initialize_body(obj, index, hdr_size_in_bytes, t1); + } else if (con_size_in_bytes <= threshold) { + // use explicit null stores + int i = hdr_size_in_bytes; + if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) { + str(zr, Address(obj, i)); + i += BytesPerWord; + } + for (; i < con_size_in_bytes; i += 2 * BytesPerWord) + stp(zr, zr, Address(obj, i)); + } else if (con_size_in_bytes > hdr_size_in_bytes) { + block_comment("zero memory"); + // use loop to null out the fields + + int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord; + mov(index, words / 8); + + const int unroll = 8; // Number of str(zr) instructions we'll unroll + int remainder = words % unroll; + lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord)); + + Label entry_point, loop; + b(entry_point); + + bind(loop); + sub(index, index, 1); + for (int i = -unroll; i < 0; i++) { + if (-i == remainder) + bind(entry_point); + str(zr, Address(rscratch1, i * wordSize)); + } + if (remainder == 0) + bind(entry_point); + add(rscratch1, rscratch1, unroll * wordSize); + cbnz(index, loop); + + } + + membar(StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == r0, "must be"); + far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); + } + + verify_oop(obj); +} +void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int header_size, int f, Register klass, Label& slow_case) { + assert_different_registers(obj, len, t1, t2, klass); + + // determine alignment mask + assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work"); + + // check for negative or excessive length + mov(rscratch1, (int32_t)max_array_allocation_length); + cmp(len, rscratch1); + br(Assembler::HS, slow_case); + + const Register arr_size = t2; // okay to be the same + // align object end + mov(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); + add(arr_size, arr_size, len, ext::uxtw, f); + andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask); + + try_allocate(obj, arr_size, 0, t1, t2, slow_case); + + initialize_header(obj, klass, len, t1, t2); + + // clear rest of allocated space + const Register len_zero = len; + initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero); + + membar(StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == r0, "must be"); + far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); + } + + verify_oop(obj); +} + + +void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) { + verify_oop(receiver); + // explicit NULL check not needed since load from [klass_offset] causes a trap + // check against inline cache + assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check"); + + cmp_klass(receiver, iCache, rscratch1); +} + + +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { + // If we have to make this method not-entrant we'll overwrite its + // first instruction with a jump. For this action to be legal we + // must ensure that this first instruction is a B, BL, NOP, BKPT, + // SVC, HVC, or SMC. Make it a NOP. + nop(); + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); + // Make sure there is enough stack space for this method's activation. + // Note that we do this before doing an enter(). + generate_stack_overflow_check(bang_size_in_bytes); + MacroAssembler::build_frame(framesize + 2 * wordSize); +} + +void C1_MacroAssembler::remove_frame(int framesize) { + MacroAssembler::remove_frame(framesize + 2 * wordSize); +} + + +void C1_MacroAssembler::verified_entry() { +} + +#ifndef PRODUCT + +void C1_MacroAssembler::verify_stack_oop(int stack_offset) { + if (!VerifyOops) return; + verify_oop_addr(Address(sp, stack_offset), "oop"); +} + +void C1_MacroAssembler::verify_not_null_oop(Register r) { + if (!VerifyOops) return; + Label not_null; + cbnz(r, not_null); + stop("non-null oop required"); + bind(not_null); + verify_oop(r); +} + +void C1_MacroAssembler::invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) { +#ifdef ASSERT + static int nn; + if (inv_r0) mov(r0, 0xDEAD); + if (inv_r19) mov(r19, 0xDEAD); + if (inv_r2) mov(r2, nn++); + if (inv_r3) mov(r3, 0xDEAD); + if (inv_r4) mov(r4, 0xDEAD); + if (inv_r5) mov(r5, 0xDEAD); +#endif +} +#endif // ifndef PRODUCT --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp 2021-01-25 19:31:34.195449766 +0000 @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP +#define CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP + +using MacroAssembler::build_frame; +using MacroAssembler::null_check; + +// C1_MacroAssembler contains high-level macros for C1 + + private: + int _rsp_offset; // track rsp changes + // initialization + void pd_init() { _rsp_offset = 0; } + +void zero_memory(Register addr, Register len, Register t1); + + public: + void try_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2); + void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1); + + void float_cmp(bool is_float, int unordered_result, + FloatRegister f0, FloatRegister f1, + Register result); + + // locking + // hdr : must be r0, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved + // scratch : scratch register, contents destroyed + // returns code offset at which to add null check debug information + int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + + // unlocking + // hdr : contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must be r0 & must point to the displaced header location, contents destroyed + void unlock_object(Register swap, Register obj, Register lock, Label& slow_case); + + void initialize_object( + Register obj, // result: pointer to object after successful allocation + Register klass, // object klass + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2 // temp register + ); + + // allocation of fixed-size objects + // (can also be used to allocate fixed-size arrays, by setting + // hdr_size correctly and storing the array length afterwards) + // obj : will contain pointer to allocated object + // t1, t2 : scratch registers - contents destroyed + // header_size: size of object header in words + // object_size: total size of object in words + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case); + + enum { + max_array_allocation_length = 0x00FFFFFF + }; + + // allocation of arrays + // obj : will contain pointer to allocated object + // len : array length in number of elements + // t : scratch register - contents destroyed + // header_size: size of object header in words + // f : element scale factor + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, int f, Register klass, Label& slow_case); + + int rsp_offset() const { return _rsp_offset; } + void set_rsp_offset(int n) { _rsp_offset = n; } + + void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN; + +#endif // CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp 2021-01-25 19:31:34.654454590 +0000 @@ -0,0 +1,1455 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1999, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Defs.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "compiler/disassembler.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_aarch64.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "register_aarch64.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/vframe.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_aarch64.inline.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#endif + + +// Implementation of StubAssembler + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { + // setup registers + assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, "registers must be different"); + assert(oop_result1 != rthread && metadata_result != rthread, "registers must be different"); + assert(args_size >= 0, "illegal args_size"); + bool align_stack = false; + + mov(c_rarg0, rthread); + set_num_rt_args(0); // Nothing on stack + + Label retaddr; + set_last_Java_frame(sp, rfp, retaddr, rscratch1); + + // do the call + lea(rscratch1, RuntimeAddress(entry)); + blr(rscratch1); + bind(retaddr); + int call_offset = offset(); + // verify callee-saved register +#ifdef ASSERT + push(r0, sp); + { Label L; + get_thread(r0); + cmp(rthread, r0); + br(Assembler::EQ, L); + stop("StubAssembler::call_RT: rthread not callee saved?"); + bind(L); + } + pop(r0, sp); +#endif + reset_last_Java_frame(true); + maybe_isb(); + + // check for pending exceptions + { Label L; + // check for pending exceptions (java_thread is set upon return) + ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + cbz(rscratch1, L); + // exception pending => remove activation and forward to exception handler + // make sure that the vm_results are cleared + if (oop_result1->is_valid()) { + str(zr, Address(rthread, JavaThread::vm_result_offset())); + } + if (metadata_result->is_valid()) { + str(zr, Address(rthread, JavaThread::vm_result_2_offset())); + } + if (frame_size() == no_frame_size) { + leave(); + far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + } else if (_stub_id == Runtime1::forward_exception_id) { + should_not_reach_here(); + } else { + far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); + } + bind(L); + } + // get oop results if there are any and reset the values in the thread + if (oop_result1->is_valid()) { + get_vm_result(oop_result1, rthread); + } + if (metadata_result->is_valid()) { + get_vm_result_2(metadata_result, rthread); + } + return call_offset; +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) { + mov(c_rarg1, arg1); + return call_RT(oop_result1, metadata_result, entry, 1); +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) { + if (c_rarg1 == arg2) { + if (c_rarg2 == arg1) { + mov(rscratch1, arg1); + mov(arg1, arg2); + mov(arg2, rscratch1); + } else { + mov(c_rarg2, arg2); + mov(c_rarg1, arg1); + } + } else { + mov(c_rarg1, arg1); + mov(c_rarg2, arg2); + } + return call_RT(oop_result1, metadata_result, entry, 2); +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { + // if there is any conflict use the stack + if (arg1 == c_rarg2 || arg1 == c_rarg3 || + arg2 == c_rarg1 || arg1 == c_rarg3 || + arg3 == c_rarg1 || arg1 == c_rarg2) { + stp(arg3, arg2, Address(pre(sp, 2 * wordSize))); + stp(arg1, zr, Address(pre(sp, -2 * wordSize))); + ldp(c_rarg1, zr, Address(post(sp, 2 * wordSize))); + ldp(c_rarg3, c_rarg2, Address(post(sp, 2 * wordSize))); + } else { + mov(c_rarg1, arg1); + mov(c_rarg2, arg2); + mov(c_rarg3, arg3); + } + return call_RT(oop_result1, metadata_result, entry, 3); +} + +// Implementation of StubFrame + +class StubFrame: public StackObj { + private: + StubAssembler* _sasm; + + public: + StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments); + void load_argument(int offset_in_words, Register reg); + + ~StubFrame(); +};; + + +#define __ _sasm-> + +StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) { + _sasm = sasm; + __ set_info(name, must_gc_arguments); + __ enter(); +} + +// load parameters that were stored with LIR_Assembler::store_parameter +// Note: offsets for store_parameter and load_argument must match +void StubFrame::load_argument(int offset_in_words, Register reg) { + // rbp, + 0: link + // + 1: return address + // + 2: argument with offset 0 + // + 3: argument with offset 1 + // + 4: ... + + __ ldr(reg, Address(rfp, (offset_in_words + 2) * BytesPerWord)); +} + + +StubFrame::~StubFrame() { + __ leave(); + __ ret(lr); +} + +#undef __ + + +// Implementation of Runtime1 + +#define __ sasm-> + +const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2; + +// Stack layout for saving/restoring all the registers needed during a runtime +// call (this includes deoptimization) +// Note: note that users of this frame may well have arguments to some runtime +// while these values are on the stack. These positions neglect those arguments +// but the code in save_live_registers will take the argument count into +// account. +// + +enum reg_save_layout { + reg_save_frame_size = 32 /* float */ + 32 /* integer */ +}; + +// Save off registers which might be killed by calls into the runtime. +// Tries to smart of about FP registers. In particular we separate +// saving and describing the FPU registers for deoptimization since we +// have to save the FPU registers twice if we describe them. The +// deopt blob is the only thing which needs to describe FPU registers. +// In all other cases it should be sufficient to simply save their +// current value. + +static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; +static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; +static int reg_save_size_in_words; +static int frame_size_in_bytes = -1; + +static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { + int frame_size_in_bytes = reg_save_frame_size * BytesPerWord; + sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); + int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + + for (int i = 0; i < FrameMap::nof_cpu_regs; i++) { + Register r = as_Register(i); + if (i <= 18 && i != rscratch1->encoding() && i != rscratch2->encoding()) { + int sp_offset = cpu_reg_save_offsets[i]; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), + r->as_VMReg()); + } + } + + if (save_fpu_registers) { + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + FloatRegister r = as_FloatRegister(i); + { + int sp_offset = fpu_reg_save_offsets[i]; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), + r->as_VMReg()); + } + } + } + return oop_map; +} + +static OopMap* save_live_registers(StubAssembler* sasm, + bool save_fpu_registers = true) { + __ block_comment("save_live_registers"); + + __ push(RegSet::range(r0, r29), sp); // integer registers except lr & sp + + if (save_fpu_registers) { + for (int i = 30; i >= 0; i -= 2) + __ stpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(__ pre(sp, -2 * wordSize))); + } else { + __ add(sp, sp, -32 * wordSize); + } + + return generate_oop_map(sasm, save_fpu_registers); +} + +static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { + if (restore_fpu_registers) { + for (int i = 0; i < 32; i += 2) + __ ldpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(__ post(sp, 2 * wordSize))); + } else { + __ add(sp, sp, 32 * wordSize); + } + + __ pop(RegSet::range(r0, r29), sp); +} + +static void restore_live_registers_except_r0(StubAssembler* sasm, bool restore_fpu_registers = true) { + + if (restore_fpu_registers) { + for (int i = 0; i < 32; i += 2) + __ ldpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(__ post(sp, 2 * wordSize))); + } else { + __ add(sp, sp, 32 * wordSize); + } + + __ ldp(zr, r1, Address(__ post(sp, 16))); + __ pop(RegSet::range(r2, r29), sp); +} + + + +void Runtime1::initialize_pd() { + int i; + int sp_offset = 0; + + // all float registers are saved explicitly + assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here"); + for (i = 0; i < FrameMap::nof_fpu_regs; i++) { + fpu_reg_save_offsets[i] = sp_offset; + sp_offset += 2; // SP offsets are in halfwords + } + + for (i = 0; i < FrameMap::nof_cpu_regs; i++) { + Register r = as_Register(i); + cpu_reg_save_offsets[i] = sp_offset; + sp_offset += 2; // SP offsets are in halfwords + } +} + + +// target: the entry point of the method that creates and posts the exception oop +// has_argument: true if the exception needs an argument (passed in rscratch1) + +OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) { + // make a frame and preserve the caller's caller-save registers + OopMap* oop_map = save_live_registers(sasm); + int call_offset; + if (!has_argument) { + call_offset = __ call_RT(noreg, noreg, target); + } else { + call_offset = __ call_RT(noreg, noreg, target, rscratch1); + } + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + __ should_not_reach_here(); + return oop_maps; +} + + +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { + __ block_comment("generate_handle_exception"); + + // incoming parameters + const Register exception_oop = r0; + const Register exception_pc = r3; + // other registers used in this stub + + // Save registers, if required. + OopMapSet* oop_maps = new OopMapSet(); + OopMap* oop_map = NULL; + switch (id) { + case forward_exception_id: + // We're handling an exception in the context of a compiled frame. + // The registers have been saved in the standard places. Perform + // an exception lookup in the caller and dispatch to the handler + // if found. Otherwise unwind and dispatch to the callers + // exception handler. + oop_map = generate_oop_map(sasm, 1 /*thread*/); + + // load and clear pending exception oop into r0 + __ ldr(exception_oop, Address(rthread, Thread::pending_exception_offset())); + __ str(zr, Address(rthread, Thread::pending_exception_offset())); + + // load issuing PC (the return address for this stub) into r3 + __ ldr(exception_pc, Address(rfp, 1*BytesPerWord)); + + // make sure that the vm_results are cleared (may be unnecessary) + __ str(zr, Address(rthread, JavaThread::vm_result_offset())); + __ str(zr, Address(rthread, JavaThread::vm_result_2_offset())); + break; + case handle_exception_nofpu_id: + case handle_exception_id: + // At this point all registers MAY be live. + oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: { + // At this point all registers except exception oop (r0) and + // exception pc (lr) are dead. + const int frame_size = 2 /*fp, return address*/; + oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); + sasm->set_frame_size(frame_size); + break; + } + default: + __ should_not_reach_here(); + break; + } + + // verify that only r0 and r3 are valid at this time + __ invalidate_registers(false, true, true, false, true, true); + // verify that r0 contains a valid exception + __ verify_not_null_oop(exception_oop); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are + // empty before writing to them + Label oop_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_oop_offset())); + __ cbz(rscratch1, oop_empty); + __ stop("exception oop already set"); + __ bind(oop_empty); + + Label pc_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + __ cbz(rscratch1, pc_empty); + __ stop("exception pc already set"); + __ bind(pc_empty); +#endif + + // save exception oop and issuing pc into JavaThread + // (exception handler will load it from here) + __ str(exception_oop, Address(rthread, JavaThread::exception_oop_offset())); + __ str(exception_pc, Address(rthread, JavaThread::exception_pc_offset())); + + // patch throwing pc into return address (has bci & oop map) + __ str(exception_pc, Address(rfp, 1*BytesPerWord)); + + // compute the exception handler. + // the exception oop and the throwing pc are read from the fields in JavaThread + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); + oop_maps->add_gc_map(call_offset, oop_map); + + // r0: handler address + // will be the deopt blob if nmethod was deoptimized while we looked up + // handler regardless of whether handler existed in the nmethod. + + // only r0 is valid at this time, all other registers have been destroyed by the runtime call + __ invalidate_registers(false, true, true, true, true, true); + + // patch the return address, this stub will directly return to the exception handler + __ str(r0, Address(rfp, 1*BytesPerWord)); + + switch (id) { + case forward_exception_id: + case handle_exception_nofpu_id: + case handle_exception_id: + // Restore the registers that were saved at the beginning. + restore_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: + // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP + // since we do a leave anyway. + + // Pop the return address since we are possibly changing SP (restoring from BP). + __ leave(); + + // Restore SP from FP if the exception PC is a method handle call site. + { + Label nope; + __ ldrw(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset())); + __ cbzw(rscratch1, nope); + __ mov(sp, rfp); + __ bind(nope); + } + + __ ret(lr); // jump to exception handler + break; + default: ShouldNotReachHere(); + } + + return oop_maps; +} + + +void Runtime1::generate_unwind_exception(StubAssembler *sasm) { + // incoming parameters + const Register exception_oop = r0; + // callee-saved copy of exception_oop during runtime call + const Register exception_oop_callee_saved = r19; + // other registers used in this stub + const Register exception_pc = r3; + const Register handler_addr = r1; + + // verify that only r0, is valid at this time + __ invalidate_registers(false, true, true, true, true, true); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_oop_offset())); + __ cbz(rscratch1, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + __ cbz(rscratch1, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // Save our return address because + // exception_handler_for_return_address will destroy it. We also + // save exception_oop + __ stp(lr, exception_oop, Address(__ pre(sp, -2 * wordSize))); + + // search the exception handler address of the caller (using the return address) + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rthread, lr); + // r0: exception handler address of the caller + + // Only R0 is valid at this time; all other registers have been + // destroyed by the call. + __ invalidate_registers(false, true, true, true, false, true); + + // move result of call into correct register + __ mov(handler_addr, r0); + + // get throwing pc (= return address). + // lr has been destroyed by the call + __ ldp(lr, exception_oop, Address(__ post(sp, 2 * wordSize))); + __ mov(r3, lr); + + __ verify_not_null_oop(exception_oop); + + { + Label foo; + __ ldrw(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset())); + __ cbzw(rscratch1, foo); + __ mov(sp, rfp); + __ bind(foo); + } + + // continue at exception handler (return address removed) + // note: do *not* remove arguments when unwinding the + // activation since the caller assumes having + // all arguments on the stack when entering the + // runtime to determine the exception handler + // (GC happens at call site with arguments!) + // r0: exception oop + // r3: throwing pc + // r1: exception handler + __ br(handler_addr); +} + + + +OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { + // use the maximum number of runtime-arguments here because it is difficult to + // distinguish each RT-Call. + // Note: This number affects also the RT-Call in generate_handle_exception because + // the oop-map is shared for all calls. + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + OopMap* oop_map = save_live_registers(sasm); + + __ mov(c_rarg0, rthread); + Label retaddr; + __ set_last_Java_frame(sp, rfp, retaddr, rscratch1); + // do the call + __ lea(rscratch1, RuntimeAddress(target)); + __ blr(rscratch1); + __ bind(retaddr); + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(__ offset(), oop_map); + // verify callee-saved register +#ifdef ASSERT + { Label L; + __ get_thread(rscratch1); + __ cmp(rthread, rscratch1); + __ br(Assembler::EQ, L); + __ stop("StubAssembler::call_RT: rthread not callee saved?"); + __ bind(L); + } +#endif + __ reset_last_Java_frame(true); + __ maybe_isb(); + + // check for pending exceptions + { Label L; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, L); + // exception pending => remove activation and forward to exception handler + + { Label L1; + __ cbnz(r0, L1); // have we deoptimized? + __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); + __ bind(L1); + } + + // the deopt blob expects exceptions in the special fields of + // JavaThread, so copy and clear pending exception. + + // load and clear pending exception + __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); + __ str(zr, Address(rthread, Thread::pending_exception_offset())); + + // check that there is really a valid exception + __ verify_not_null_oop(r0); + + // load throwing pc: this is the return address of the stub + __ mov(r3, lr); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + __ cbz(rscratch1, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // store exception oop and throwing pc to JavaThread + __ str(r0, Address(rthread, JavaThread::exception_oop_offset())); + __ str(r3, Address(rthread, JavaThread::exception_pc_offset())); + + restore_live_registers(sasm); + + __ leave(); + + // Forward the exception directly to deopt blob. We can blow no + // registers and must leave throwing pc on the stack. A patch may + // have values live in registers so the entry point with the + // exception in tls. + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls())); + + __ bind(L); + } + + + // Runtime will return true if the nmethod has been deoptimized during + // the patching process. In that case we must do a deopt reexecute instead. + + Label reexecuteEntry, cont; + + __ cbz(r0, cont); // have we deoptimized? + + // Will reexecute. Proper return address is already on the stack we just restore + // registers, pop all of our frame but the return address and jump to the deopt blob + restore_live_registers(sasm); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + + __ bind(cont); + restore_live_registers(sasm); + __ leave(); + __ ret(lr); + + return oop_maps; +} + + +OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + const Register exception_oop = r0; + const Register exception_pc = r3; + + // for better readability + const bool must_gc_arguments = true; + const bool dont_gc_arguments = false; + + // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu + bool save_fpu_registers = true; + + // stub code & info for the different stubs + OopMapSet* oop_maps = NULL; + OopMap* oop_map = NULL; + switch (id) { + { + case forward_exception_id: + { + oop_maps = generate_handle_exception(id, sasm); + __ leave(); + __ ret(lr); + } + break; + + case throw_div0_exception_id: + { StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: + { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; + + case new_instance_id: + case fast_new_instance_id: + case fast_new_instance_init_check_id: + { + Register klass = r3; // Incoming + Register obj = r0; // Result + + if (id == new_instance_id) { + __ set_info("new_instance", dont_gc_arguments); + } else if (id == fast_new_instance_id) { + __ set_info("fast new_instance", dont_gc_arguments); + } else { + assert(id == fast_new_instance_init_check_id, "bad StubID"); + __ set_info("fast new_instance init check", dont_gc_arguments); + } + + if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && + UseTLAB && FastTLABRefill) { + Label slow_path; + Register obj_size = r2; + Register t1 = r19; + Register t2 = r4; + assert_different_registers(klass, obj, obj_size, t1, t2); + + __ stp(r5, r19, Address(__ pre(sp, -2 * wordSize))); + + if (id == fast_new_instance_init_check_id) { + // make sure the klass is initialized + __ ldrb(rscratch1, Address(klass, InstanceKlass::init_state_offset())); + __ cmpw(rscratch1, InstanceKlass::fully_initialized); + __ br(Assembler::NE, slow_path); + } + +#ifdef ASSERT + // assert object can be fast path allocated + { + Label ok, not_ok; + __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset())); + __ cmp(obj_size, 0u); + __ br(Assembler::LE, not_ok); // make sure it's an instance (LH > 0) + __ tstw(obj_size, Klass::_lh_instance_slow_path_bit); + __ br(Assembler::EQ, ok); + __ bind(not_ok); + __ stop("assert(can be fast path allocated)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // if we got here then the TLAB allocation failed, so try + // refilling the TLAB or allocating directly from eden. + Label retry_tlab, try_eden; + __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy r3 (klass), returns r5 + + __ bind(retry_tlab); + + // get the instance size (size is postive so movl is fine for 64bit) + __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset())); + + __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path); + + __ initialize_object(obj, klass, obj_size, 0, t1, t2); + __ verify_oop(obj); + __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize))); + __ ret(lr); + + __ bind(try_eden); + // get the instance size (size is postive so movl is fine for 64bit) + __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset())); + + __ eden_allocate(obj, obj_size, 0, t1, slow_path); + __ incr_allocated_bytes(rthread, obj_size, 0, rscratch1); + + __ initialize_object(obj, klass, obj_size, 0, t1, t2); + __ verify_oop(obj); + __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize))); + __ ret(lr); + + __ bind(slow_path); + __ ldp(r5, r19, Address(__ post(sp, 2 * wordSize))); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + __ verify_oop(obj); + __ leave(); + __ ret(lr); + + // r0,: new instance + } + + break; + + case counter_overflow_id: + { + Register bci = r0, method = r1; + __ enter(); + OopMap* map = save_live_registers(sasm); + // Retrieve bci + __ ldrw(bci, Address(rfp, 2*BytesPerWord)); + // And a pointer to the Method* + __ ldr(method, Address(rfp, 3*BytesPerWord)); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + __ ret(lr); + } + break; + + case new_type_array_id: + case new_object_array_id: + { + Register length = r19; // Incoming + Register klass = r3; // Incoming + Register obj = r0; // Result + + if (id == new_type_array_id) { + __ set_info("new_type_array", dont_gc_arguments); + } else { + __ set_info("new_object_array", dont_gc_arguments); + } + +#ifdef ASSERT + // assert object type is really an array of the proper kind + { + Label ok; + Register t0 = obj; + __ ldrw(t0, Address(klass, Klass::layout_helper_offset())); + __ asrw(t0, t0, Klass::_lh_array_tag_shift); + int tag = ((id == new_type_array_id) + ? Klass::_lh_array_tag_type_value + : Klass::_lh_array_tag_obj_value); + __ mov(rscratch1, tag); + __ cmpw(t0, rscratch1); + __ br(Assembler::EQ, ok); + __ stop("assert(is an array klass)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + if (UseTLAB && FastTLABRefill) { + Register arr_size = r4; + Register t1 = r2; + Register t2 = r5; + Label slow_path; + assert_different_registers(length, klass, obj, arr_size, t1, t2); + + // check that array length is small enough for fast path. + __ mov(rscratch1, C1_MacroAssembler::max_array_allocation_length); + __ cmpw(length, rscratch1); + __ br(Assembler::HI, slow_path); + + // if we got here then the TLAB allocation failed, so try + // refilling the TLAB or allocating directly from eden. + Label retry_tlab, try_eden; + const Register thread = + __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves r19 & r3, returns rthread + + __ bind(retry_tlab); + + // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) + // since size is positive ldrw does right thing on 64bit + __ ldrw(t1, Address(klass, Klass::layout_helper_offset())); + __ lslvw(arr_size, length, t1); + __ ubfx(t1, t1, Klass::_lh_header_size_shift, + exact_log2(Klass::_lh_header_size_mask + 1)); + __ add(arr_size, arr_size, t1); + __ add(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up + __ andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask); + + __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path); // preserves arr_size + + __ initialize_header(obj, klass, length, t1, t2); + __ ldrb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); + assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); + assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); + __ andr(t1, t1, Klass::_lh_header_size_mask); + __ sub(arr_size, arr_size, t1); // body length + __ add(t1, t1, obj); // body start + __ initialize_body(t1, arr_size, 0, t2); + __ membar(Assembler::StoreStore); + __ verify_oop(obj); + + __ ret(lr); + + __ bind(try_eden); + // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) + // since size is positive ldrw does right thing on 64bit + __ ldrw(t1, Address(klass, Klass::layout_helper_offset())); + // since size is postive movw does right thing on 64bit + __ movw(arr_size, length); + __ lslvw(arr_size, length, t1); + __ ubfx(t1, t1, Klass::_lh_header_size_shift, + exact_log2(Klass::_lh_header_size_mask + 1)); + __ add(arr_size, arr_size, t1); + __ add(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up + __ andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask); + + __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size + __ incr_allocated_bytes(thread, arr_size, 0, rscratch1); + + __ initialize_header(obj, klass, length, t1, t2); + __ ldrb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); + assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); + assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); + __ andr(t1, t1, Klass::_lh_header_size_mask); + __ sub(arr_size, arr_size, t1); // body length + __ add(t1, t1, obj); // body start + __ initialize_body(t1, arr_size, 0, t2); + __ membar(Assembler::StoreStore); + __ verify_oop(obj); + + __ ret(lr); + + __ bind(slow_path); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + int call_offset; + if (id == new_type_array_id) { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); + } else { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); + } + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + + __ verify_oop(obj); + __ leave(); + __ ret(lr); + + // r0: new array + } + break; + + case new_multi_array_id: + { StubFrame f(sasm, "new_multi_array", dont_gc_arguments); + // r0,: klass + // r19,: rank + // r2: address of 1st dimension + OopMap* map = save_live_registers(sasm); + __ mov(c_rarg1, r0); + __ mov(c_rarg3, r2); + __ mov(c_rarg2, r19); + int call_offset = __ call_RT(r0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), r1, r2, r3); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + + // r0,: new multi array + __ verify_oop(r0); + } + break; + + case register_finalizer_id: + { + __ set_info("register_finalizer", dont_gc_arguments); + + // This is called via call_runtime so the arguments + // will be place in C abi locations + + __ verify_oop(c_rarg0); + + // load the klass and check the has finalizer flag + Label register_finalizer; + Register t = r5; + __ load_klass(t, r0); + __ ldrw(t, Address(t, Klass::access_flags_offset())); + __ tst(t, JVM_ACC_HAS_FINALIZER); + __ br(Assembler::NE, register_finalizer); + __ ret(lr); + + __ bind(register_finalizer); + __ enter(); + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), r0); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + // Now restore all the live registers + restore_live_registers(sasm); + + __ leave(); + __ ret(lr); + } + break; + + case throw_class_cast_exception_id: + { StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } + break; + + case slow_subtype_check_id: + { + // Typical calling sequence: + // __ push(klass_RInfo); // object klass or other subclass + // __ push(sup_k_RInfo); // array element klass or other superclass + // __ bl(slow_subtype_check); + // Note that the subclass is pushed first, and is therefore deepest. + enum layout { + r0_off, r0_off_hi, + r2_off, r2_off_hi, + r4_off, r4_off_hi, + r5_off, r5_off_hi, + sup_k_off, sup_k_off_hi, + klass_off, klass_off_hi, + framesize, + result_off = sup_k_off + }; + + __ set_info("slow_subtype_check", dont_gc_arguments); + __ push(RegSet::of(r0, r2, r4, r5), sp); + + // This is called by pushing args and not with C abi + // __ ldr(r4, Address(sp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass + // __ ldr(r0, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass + + __ ldp(r4, r0, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); + + Label miss; + __ check_klass_subtype_slow_path(r4, r0, r2, r5, NULL, &miss); + + // fallthrough on success: + __ mov(rscratch1, 1); + __ str(rscratch1, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop(RegSet::of(r0, r2, r4, r5), sp); + __ ret(lr); + + __ bind(miss); + __ str(zr, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop(RegSet::of(r0, r2, r4, r5), sp); + __ ret(lr); + } + break; + + case monitorenter_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorenter_id: + { + StubFrame f(sasm, "monitorenter", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + + // Called with store_parameter and not C abi + + f.load_argument(1, r0); // r0,: object + f.load_argument(0, r1); // r1,: lock address + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), r0, r1); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case monitorexit_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorexit_id: + { + StubFrame f(sasm, "monitorexit", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + + // Called with store_parameter and not C abi + + f.load_argument(0, r0); // r0,: lock address + + // note: really a leaf routine but must setup last java sp + // => use call_RT for now (speed can be improved by + // doing last java sp setup manually) + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), r0); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case deoptimize_id: + { + StubFrame f(sasm, "deoptimize", dont_gc_arguments); + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize)); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + } + break; + + case throw_range_check_failed_id: + { StubFrame f(sasm, "range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; + + case unwind_exception_id: + { __ set_info("unwind_exception", dont_gc_arguments); + // note: no stubframe since we are about to leave the current + // activation and we are calling a leaf VM function only. + generate_unwind_exception(sasm); + } + break; + + case access_field_patching_id: + { StubFrame f(sasm, "access_field_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } + break; + + case load_klass_patching_id: + { StubFrame f(sasm, "load_klass_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } + break; + + case load_mirror_patching_id: + { StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } + break; + + case load_appendix_patching_id: + { StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } + break; + + case handle_exception_nofpu_id: + case handle_exception_id: + { StubFrame f(sasm, "handle_exception", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case handle_exception_from_callee_id: + { StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case throw_index_exception_id: + { StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_array_store_exception_id: + { StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments); + // tos + 0: link + // + 1: return address + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); + } + break; + +#if INCLUDE_ALL_GCS + + case g1_pre_barrier_slow_id: + { + StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments); + // arg0 : previous value of memory + + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ mov(r0, (int)id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0); + __ should_not_reach_here(); + break; + } + + const Register pre_val = r0; + const Register thread = rthread; + const Register tmp = rscratch1; + + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + + Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + Label done; + Label runtime; + + // Can we store original value in the thread's buffer? + __ ldr(tmp, queue_index); + __ cbz(tmp, runtime); + + __ sub(tmp, tmp, wordSize); + __ str(tmp, queue_index); + __ ldr(rscratch2, buffer); + __ add(tmp, tmp, rscratch2); + f.load_argument(0, rscratch2); + __ str(rscratch2, Address(tmp, 0)); + __ b(done); + + __ bind(runtime); + __ push_call_clobbered_registers(); + f.load_argument(0, pre_val); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); + __ pop_call_clobbered_registers(); + __ bind(done); + } + break; + case g1_post_barrier_slow_id: + { + StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments); + + // arg0: store_address + Address store_addr(rfp, 2*BytesPerWord); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regions. + // Must check to see if card is already dirty + + const Register thread = rthread; + + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + const Register card_offset = rscratch2; + // LR is free here, so we can use it to hold the byte_map_base. + const Register byte_map_base = lr; + + assert_different_registers(card_offset, byte_map_base, rscratch1); + + f.load_argument(0, card_offset); + __ lsr(card_offset, card_offset, CardTableModRefBS::card_shift); + __ load_byte_map_base(byte_map_base); + __ ldrb(rscratch1, Address(byte_map_base, card_offset)); + __ cmpw(rscratch1, (int)G1SATBCardTableModRefBS::g1_young_card_val()); + __ br(Assembler::EQ, done); + + assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0"); + + __ membar(Assembler::StoreLoad); + __ ldrb(rscratch1, Address(byte_map_base, card_offset)); + __ cbzw(rscratch1, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + __ strb(zr, Address(byte_map_base, card_offset)); + + // Convert card offset into an address in card_addr + Register card_addr = card_offset; + __ add(card_addr, byte_map_base, card_addr); + + __ ldr(rscratch1, queue_index); + __ cbz(rscratch1, runtime); + __ sub(rscratch1, rscratch1, wordSize); + __ str(rscratch1, queue_index); + + // Reuse LR to hold buffer_addr + const Register buffer_addr = lr; + + __ ldr(buffer_addr, buffer); + __ str(card_addr, Address(buffer_addr, rscratch1)); + __ b(done); + + __ bind(runtime); + __ push_call_clobbered_registers(); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); + __ pop_call_clobbered_registers(); + __ bind(done); + + } + break; +#endif + + case predicate_failed_trap_id: + { + StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments); + + OopMap* map = save_live_registers(sasm); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + } + break; + + + default: + { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); + __ mov(r0, (int)id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0); + __ should_not_reach_here(); + } + break; + } + } + return oop_maps; +} + +#undef __ + +// Simple helper to see if the caller of a runtime stub which +// entered the VM has been deoptimized + +static bool caller_is_deopted() { + JavaThread* thread = JavaThread::current(); + RegisterMap reg_map(thread, false); + frame runtime_frame = thread->last_frame(); + frame caller_frame = runtime_frame.sender(®_map); + assert(caller_frame.is_compiled_frame(), "must be compiled"); + return caller_frame.is_deoptimized_frame(); +} + +JRT_ENTRY(void, Runtime1::patch_code_aarch64(JavaThread* thread, Runtime1::StubID stub_id )) +{ + RegisterMap reg_map(thread, false); + + NOT_PRODUCT(_patch_code_slowcase_cnt++;) + // According to the ARMv8 ARM, "Concurrent modification and + // execution of instructions can lead to the resulting instruction + // performing any behavior that can be achieved by executing any + // sequence of instructions that can be executed from the same + // Exception level, except where the instruction before + // modification and the instruction after modification is a B, BL, + // NOP, BKPT, SVC, HVC, or SMC instruction." + // + // This effectively makes the games we play when patching + // impossible, so when we come across an access that needs + // patching we must deoptimize. + + if (TracePatching) { + tty->print_cr("Deoptimizing because patch is needed"); + } + + frame runtime_frame = thread->last_frame(); + frame caller_frame = runtime_frame.sender(®_map); + + // It's possible the nmethod was invalidated in the last + // safepoint, but if it's still alive then make it not_entrant. + nmethod* nm = CodeCache::find_nmethod(caller_frame.pc()); + if (nm != NULL) { + nm->make_not_entrant(); + } + + Deoptimization::deoptimize_frame(thread, caller_frame.id()); + + // Return to the now deoptimized frame. +} +JRT_END + +int Runtime1::access_field_patching(JavaThread* thread) { +// +// NOTE: we are still in Java +// + Thread* THREAD = thread; + debug_only(NoHandleMark nhm;) + { + // Enter VM mode + + ResetNoHandleMark rnhm; + patch_code_aarch64(thread, access_field_patching_id); + } + // Back in JAVA, use no oops DON'T safepoint + + // Return true if calling code is deoptimized + + return caller_is_deopted(); +JRT_END + + +int Runtime1::move_mirror_patching(JavaThread* thread) { +// +// NOTE: we are still in Java +// + Thread* THREAD = thread; + debug_only(NoHandleMark nhm;) + { + // Enter VM mode + + ResetNoHandleMark rnhm; + patch_code_aarch64(thread, load_mirror_patching_id); + } + // Back in JAVA, use no oops DON'T safepoint + + // Return true if calling code is deoptimized + + return caller_is_deopted(); +} + +int Runtime1::move_appendix_patching(JavaThread* thread) { +// +// NOTE: we are still in Java +// + Thread* THREAD = thread; + debug_only(NoHandleMark nhm;) + { + // Enter VM mode + + ResetNoHandleMark rnhm; + patch_code_aarch64(thread, load_appendix_patching_id); + } + // Back in JAVA, use no oops DON'T safepoint + + // Return true if calling code is deoptimized + + return caller_is_deopted(); +} + +int Runtime1::move_klass_patching(JavaThread* thread) { +// +// NOTE: we are still in Java +// + Thread* THREAD = thread; + debug_only(NoHandleMark nhm;) + { + // Enter VM mode + + ResetNoHandleMark rnhm; + patch_code_aarch64(thread, load_klass_patching_id); + } + // Back in JAVA, use no oops DON'T safepoint + + // Return true if calling code is deoptimized + + return caller_is_deopted(); +} + +const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; } --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c1_globals_aarch64.hpp 2021-01-25 19:31:35.113459414 +0000 @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_C1_GLOBALS_AARCH64_HPP +#define CPU_AARCH64_VM_C1_GLOBALS_AARCH64_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) + +#ifndef TIERED +define_pd_global(bool, BackgroundCompilation, true ); +define_pd_global(bool, UseTLAB, true ); +define_pd_global(bool, ResizeTLAB, true ); +define_pd_global(bool, InlineIntrinsics, true ); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); +define_pd_global(bool, UseOnStackReplacement, true ); +define_pd_global(bool, TieredCompilation, false); +define_pd_global(intx, CompileThreshold, 1500 ); +define_pd_global(intx, BackEdgeThreshold, 100000); + +define_pd_global(intx, OnStackReplacePercentage, 933 ); +define_pd_global(intx, FreqInlineSize, 325 ); +define_pd_global(intx, NewSizeThreadIncrease, 4*K ); +define_pd_global(intx, InitialCodeCacheSize, 160*K); +define_pd_global(intx, ReservedCodeCacheSize, 32*M ); +define_pd_global(bool, ProfileInterpreter, false); +define_pd_global(intx, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); +define_pd_global(uintx, MetaspaceSize, 12*M ); +define_pd_global(bool, NeverActAsServerClassMachine, true ); +define_pd_global(uint64_t,MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); +#endif // !TIERED +define_pd_global(bool, UseTypeProfile, false); +define_pd_global(bool, RoundFPResults, true ); + +define_pd_global(bool, LIRFillDelaySlots, false); +define_pd_global(bool, OptimizeSinglePrecision, true ); +define_pd_global(bool, CSEArrayLength, false); +define_pd_global(bool, TwoOperandLIRForm, false ); + +define_pd_global(intx, SafepointPollOffset, 0 ); + +#endif // CPU_AARCH64_VM_C1_GLOBALS_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c2_globals_aarch64.hpp 2021-01-25 19:31:35.577464291 +0000 @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_C2_GLOBALS_AARCH64_HPP +#define CPU_AARCH64_VM_C2_GLOBALS_AARCH64_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the server compiler. +// (see c2_globals.hpp). Alpha-sorted. + +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, UseTLAB, true); +define_pd_global(bool, ResizeTLAB, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, true); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +#ifdef CC_INTERP +define_pd_global(bool, ProfileInterpreter, false); +#else +define_pd_global(bool, ProfileInterpreter, true); +#endif // CC_INTERP +define_pd_global(bool, TieredCompilation, trueInTiered); +define_pd_global(intx, CompileThreshold, 10000); +define_pd_global(intx, BackEdgeThreshold, 100000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 3); +define_pd_global(intx, FLOATPRESSURE, 64); +define_pd_global(intx, FreqInlineSize, 325); +define_pd_global(intx, MinJumpTableSize, 10); +define_pd_global(intx, INTPRESSURE, 25); +define_pd_global(intx, InteriorEntryAlignment, 16); +define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +define_pd_global(intx, LoopUnrollLimit, 60); +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(intx, CodeCacheExpansionSize, 64*K); + +// Ergonomics related flags +define_pd_global(uint64_t,MaxRAM, 128ULL*G); +define_pd_global(intx, RegisterCostAreaRatio, 16000); + +// Peephole and CISC spilling both break the graph, and so makes the +// scheduler sick. +define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, UseCISCSpill, true); +define_pd_global(bool, OptoScheduling, false); +define_pd_global(bool, OptoBundling, false); + +define_pd_global(intx, ReservedCodeCacheSize, 48*M); +define_pd_global(uintx, CodeCacheMinBlockLength, 4); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + +define_pd_global(bool, TrapBasedRangeChecks, false); + +// Heap related flags +define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); + +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + +#endif // CPU_AARCH64_VM_C2_GLOBALS_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/c2_init_aarch64.cpp 2021-01-25 19:31:36.015468895 +0000 @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" + +// processor dependent initialization for i486 + +void Compile::pd_compiler2_init() { + guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); + // QQQ presumably all 64bit cpu's support this. Seems like the ifdef could + // simply be left out. +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/codeBuffer_aarch64.hpp 2021-01-25 19:31:36.429473246 +0000 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_CODEBUFFER_AARCH64_HPP +#define CPU_AARCH64_VM_CODEBUFFER_AARCH64_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_AARCH64_VM_CODEBUFFER_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/compiledIC_aarch64.cpp 2021-01-25 19:31:36.864477818 +0000 @@ -0,0 +1,153 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" + +// Release the CompiledICHolder* associated with this call site is there is one. +void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) { + // This call site might have become stale so inspect it carefully. + NativeCall* call = nativeCall_at(call_site->addr()); + if (is_icholder_entry(call->destination())) { + NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value()); + InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data()); + } +} + +bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) { + // This call site might have become stale so inspect it carefully. + NativeCall* call = nativeCall_at(call_site->addr()); + return is_icholder_entry(call->destination()); +} + +// ---------------------------------------------------------------------------- + +#define __ _masm. +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { + // Stub is fixed up when the corresponding call is converted from + // calling compiled code to calling interpreted code. + // movq rmethod, 0 + // jmp -4 # to self + + // address mark = cbuf.insts_mark(); // Get mark within main instrs section. + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(to_interp_stub_size()); + int offset = __ offset(); + if (base == NULL) { + return NULL; // CodeBuffer::expand failed + } + // static stub relocation stores the instruction address of the call + __ relocate(static_stub_Relocation::spec(mark)); + // static stub relocation also tags the Method* in the code-stream. + __ mov_metadata(rmethod, (Metadata*)NULL); + __ movptr(rscratch1, 0); + __ br(rscratch1); + + assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big"); + __ end_a_stub(); + return base; +} +#undef __ + +int CompiledStaticCall::to_interp_stub_size() { + return 7 * NativeInstruction::instruction_size; +} + +// Relocation entries for call stub, compiled java to interpreter. +int CompiledStaticCall::reloc_to_interp_stub() { + return 4; // 3 in emit_to_interp_stub + 1 in emit_call +} + +void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) { + address stub = find_stub(); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + p2i(instruction_address()), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); +#ifndef PRODUCT + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + + assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), + "a) MT-unsafe modification of inline cache"); + assert(method_holder->data() == 0 || jump->jump_destination() == entry, + "b) MT-unsafe modification of inline cache"); +#endif + // Update stub. + method_holder->set_data((intptr_t)callee()); + NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry); + ICache::invalidate_range(stub, to_interp_stub_size()); + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + method_holder->set_data(0); +} + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledStaticCall::verify() { + // Verify call. + NativeCall::verify(); + if (os::is_MP()) { + verify_alignment(); + } + + // Verify stub. + address stub = find_stub(); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/copy_aarch64.hpp 2021-01-25 19:31:37.285482243 +0000 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_COPY_AARCH64_HPP +#define CPU_AARCH64_VM_COPY_AARCH64_HPP + +// Inline functions for memory copy and fill. + +// Contains inline asm implementations +#ifdef TARGET_OS_ARCH_linux_aarch64 +# include "copy_linux_aarch64.inline.hpp" +#endif + + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*) tohw; + julong v = ((julong) value << 32) | value; + while (count-- > 0) { + *to++ = v; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + pd_fill_to_words(tohw, count, value); +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + (void)memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); +} + +#endif // CPU_AARCH64_VM_COPY_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp 2021-01-25 19:31:37.717486784 +0000 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP +#define CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP + + protected: + + void generate_more_monitors(); + void generate_deopt_handling(); + +#endif // CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/debug_aarch64.cpp 2021-01-25 19:31:38.157491408 +0000 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/codeCache.hpp" +#include "code/nmethod.hpp" +#include "runtime/frame.hpp" +#include "runtime/init.hpp" +#include "runtime/os.hpp" +#include "utilities/debug.hpp" +#include "utilities/top.hpp" + +void pd_ps(frame f) {} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/depChecker_aarch64.cpp 2021-01-25 19:31:38.566495707 +0000 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "compiler/disassembler.hpp" +#include "depChecker_aarch64.hpp" + +// Nothing to do on aarch64 --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/depChecker_aarch64.hpp 2021-01-25 19:31:39.011500384 +0000 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_DEPCHECKER_AARCH64_HPP +#define CPU_AARCH64_VM_DEPCHECKER_AARCH64_HPP + +// Nothing to do on aarch64 + +#endif // CPU_AARCH64_VM_DEPCHECKER_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/disassembler_aarch64.hpp 2021-01-25 19:31:39.460505103 +0000 @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_DISASSEMBLER_AARCH64_HPP +#define CPU_AARCH64_VM_DISASSEMBLER_AARCH64_HPP + + static int pd_instruction_alignment() { + return 1; + } + + static const char* pd_cpu_opts() { + return ""; + } + +#endif // CPU_AARCH64_VM_DISASSEMBLER_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/frame_aarch64.cpp 2021-01-25 19:31:39.880509518 +0000 @@ -0,0 +1,881 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/os.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_aarch64.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#include "runtime/vframeArray.hpp" +#endif + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif + + +// Profiling/safepoint support + +bool frame::safe_for_sender(JavaThread *thread) { + address sp = (address)_sp; + address fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers + static size_t stack_guard_size = os::uses_stack_guard_pages() ? (StackYellowPages + StackRedPages) * os::vm_page_size() : 0; + size_t usable_stack_size = thread->stack_size() - stack_guard_size; + + // sp must be within the usable part of the stack (not in guards) + bool sp_safe = (sp < thread->stack_base()) && + (sp >= thread->stack_base() - usable_stack_size); + + + if (!sp_safe) { + return false; + } + + // When we are running interpreted code the machine stack pointer, SP, is + // set low enough so that the Java expression stack can grow and shrink + // without ever exceeding the machine stack bounds. So, ESP >= SP. + + // When we call out of an interpreted method, SP is incremented so that + // the space between SP and ESP is removed. The SP saved in the callee's + // frame is the SP *before* this increment. So, when we walk a stack of + // interpreter frames the sender's SP saved in a frame might be less than + // the SP at the point of call. + + // So unextended sp must be within the stack but we need not to check + // that unextended sp >= sp + + bool unextended_sp_safe = (unextended_sp < thread->stack_base()); + + if (!unextended_sp_safe) { + return false; + } + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 + bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); + + // We know sp/unextended_sp are safe only fp is questionable here + + // If the current frame is known to the code cache then we can attempt to + // to construct the sender and do some validation of it. This goes a long way + // toward eliminating issues when we get in frame construction code + + if (_cb != NULL ) { + + // First check if frame is complete and tester is reliable + // Unfortunately we can only check frame complete for runtime stubs and nmethod + // other generic buffer blobs are more problematic so we just assume they are + // ok. adapter blobs never have a frame complete and are never ok. + + if (!_cb->is_frame_complete_at(_pc)) { + if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { + return false; + } + } + + // Could just be some random pointer within the codeBlob + if (!_cb->code_contains(_pc)) { + return false; + } + + // Entry frame checks + if (is_entry_frame()) { + // an entry frame must have a valid fp. + + if (!fp_safe) return false; + + // Validate the JavaCallWrapper an entry frame must have + + address jcw = (address)entry_frame_call_wrapper(); + + bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > fp); + + return jcw_safe; + + } + + intptr_t* sender_sp = NULL; + intptr_t* sender_unextended_sp = NULL; + address sender_pc = NULL; + intptr_t* saved_fp = NULL; + + if (is_interpreted_frame()) { + // fp must be safe + if (!fp_safe) { + return false; + } + + sender_pc = (address) this->fp()[return_addr_offset]; + // for interpreted frames, the value below is the sender "raw" sp, + // which can be different from the sender unextended sp (the sp seen + // by the sender) because of current frame local variables + sender_sp = (intptr_t*) addr_at(sender_sp_offset); + sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; + saved_fp = (intptr_t*) this->fp()[link_offset]; + + } else { + // must be some sort of compiled/runtime frame + // fp does not have to be safe (although it could be check for c1?) + + // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc + if (_cb->frame_size() <= 0) { + return false; + } + + sender_sp = _unextended_sp + _cb->frame_size(); + sender_unextended_sp = sender_sp; + sender_pc = (address) *(sender_sp-1); + // Note: frame::sender_sp_offset is only valid for compiled frame + saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); + } + + + // If the potential sender is the interpreter then we can do some more checking + if (Interpreter::contains(sender_pc)) { + + // fp is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp + // is really a frame pointer. + + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + return sender.is_interpreted_frame_valid(thread); + + } + + // We must always be able to find a recognizable pc + CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); + if (sender_pc == NULL || sender_blob == NULL) { + return false; + } + + // Could be a zombie method + if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { + return false; + } + + // Could just be some random pointer within the codeBlob + if (!sender_blob->code_contains(sender_pc)) { + return false; + } + + // We should never be able to see an adapter if the current frame is something from code cache + if (sender_blob->is_adapter_blob()) { + return false; + } + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp()); + + return jcw_safe; + } + + if (sender_blob->is_nmethod()) { + nmethod* nm = sender_blob->as_nmethod_or_null(); + if (nm != NULL) { + if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) { + return false; + } + } + } + + // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size + // because the return address counts against the callee's frame. + + if (sender_blob->frame_size() <= 0) { + assert(!sender_blob->is_nmethod(), "should count return address at least"); + return false; + } + + // We should never be able to see anything here except an nmethod. If something in the + // code cache (current frame) is called by an entity within the code cache that entity + // should not be anything but the call stub (already covered), the interpreter (already covered) + // or an nmethod. + + if (!sender_blob->is_nmethod()) { + return false; + } + + // Could put some more validation for the potential non-interpreted sender + // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... + + // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb + + // We've validated the potential sender that would be created + return true; + } + + // Must be native-compiled frame. Since sender will try and use fp to find + // linkages it must be safe + + if (!fp_safe) { + return false; + } + + // Will the pc we fetch be non-zero (which we'll find at the oldest frame) + + if ( (address) this->fp()[return_addr_offset] == NULL) return false; + + + // could try and do some more potential verification of native frame if we could think of some... + + return true; + +} + +void frame::patch_pc(Thread* thread, address pc) { + address* pc_addr = &(((address*) sp())[-1]); + if (TracePcPatching) { + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", + p2i(pc_addr), p2i(*pc_addr), p2i(pc)); + } + // Either the return address is the original one or we are going to + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); + *pc_addr = pc; + _cb = CodeCache::find_blob(pc); + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); + _deopt_state = is_deoptimized; + // leave _pc as is + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +int frame::frame_size(RegisterMap* map) const { + frame sender = this->sender(map); + return sender.sp() - sp(); +} + +intptr_t* frame::entry_frame_argument_at(int offset) const { + // convert offset to index to deal with tsi + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + // Entry frame's arguments are always in relation to unextended_sp() + return &unextended_sp()[index]; +} + +// sender_sp +#ifdef CC_INTERP +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + // QQQ why does this specialize method exist if frame::sender_sp() does same thing? + // seems odd and if we always know interpreted vs. non then sender_sp() is really + // doing too much work. + return get_interpreterState()->sender_sp(); +} + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return get_interpreterState()->monitor_base(); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + return (BasicObjectLock*) get_interpreterState()->stack_base(); +} + +#else // CC_INTERP + +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + return (intptr_t*) at(interpreter_frame_sender_sp_offset); +} + +void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { + assert(is_interpreted_frame(), "interpreted frame expected"); + ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); +} + + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); + // make sure the pointer points inside the frame + assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer"); + assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer"); + return result; +} + +void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { + *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; +} + +// Used by template based interpreter deoptimization +void frame::interpreter_frame_set_last_sp(intptr_t* sp) { + *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; +} +#endif // CC_INTERP + +frame frame::sender_for_entry_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); + // Since we are walking the stack now this nested anchor is obviously walkable + // even if it wasn't when it was stacked. + if (!jfa->walkable()) { + // Capture _last_Java_pc (if needed) and mark anchor walkable. + jfa->capture_last_Java_pc(); + } + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); + assert(jfa->last_Java_pc() != NULL, "not walkable"); + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); + return fr; +} + +//------------------------------------------------------------------------------ +// frame::verify_deopt_original_pc +// +// Verifies the calculated original PC of a deoptimization PC for the +// given unextended SP. The unextended SP might also be the saved SP +// for MethodHandle call sites. +#ifdef ASSERT +void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) { + frame fr; + + // This is ugly but it's better than to change {get,set}_original_pc + // to take an SP value as argument. And it's only a debugging + // method anyway. + fr._unextended_sp = unextended_sp; + + address original_pc = nm->get_original_pc(&fr); + assert(nm->insts_contains(original_pc), "original PC must be in nmethod"); + assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be"); +} +#endif + +//------------------------------------------------------------------------------ +// frame::adjust_unextended_sp +void frame::adjust_unextended_sp() { + // If we are returning to a compiled MethodHandle call site, the + // saved_fp will in fact be a saved value of the unextended SP. The + // simplest way to tell whether we are returning to such a call site + // is as follows: + + nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null(); + if (sender_nm != NULL) { + // If the sender PC is a deoptimization point, get the original + // PC. For MethodHandle call site the unextended_sp is stored in + // saved_fp. + if (sender_nm->is_deopt_mh_entry(_pc)) { + DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp)); + _unextended_sp = _fp; + } + else if (sender_nm->is_deopt_entry(_pc)) { + DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp)); + } + else if (sender_nm->is_method_handle_return(_pc)) { + _unextended_sp = _fp; + } + } +} + +//------------------------------------------------------------------------------ +// frame::update_map_with_saved_link +void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { + // The interpreter and compiler(s) always save fp in a known + // location on entry. We must record where that location is + // so that if fp was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves fp if we record where it is then + // we don't have to always save fp on entry and exit to c2 compiled + // code, on entry will be enough. + map->set_location(rfp->as_VMReg(), (address) link_addr); + // this is weird "H" ought to be at a higher address however the + // oopMaps seems to have the "H" regs at the same address and the + // vanilla register. + // XXXX make this go away + if (true) { + map->set_location(rfp->as_VMReg()->next(), (address) link_addr); + } +} + + +//------------------------------------------------------------------------------ +// frame::sender_for_interpreter_frame +frame frame::sender_for_interpreter_frame(RegisterMap* map) const { + // SP is the raw SP from the sender after adapter or interpreter + // extension. + intptr_t* sender_sp = this->sender_sp(); + + // This is the sp before any possible extension (adapter/locals). + intptr_t* unextended_sp = interpreter_frame_sender_sp(); + +#ifdef COMPILER2 + if (map->update_map()) { + update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); + } +#endif // COMPILER2 + + return frame(sender_sp, unextended_sp, link(), sender_pc()); +} + + +//------------------------------------------------------------------------------ +// frame::sender_for_compiled_frame +frame frame::sender_for_compiled_frame(RegisterMap* map) const { + // we cannot rely upon the last fp having been saved to the thread + // in C2 code but it will have been pushed onto the stack. so we + // have to find it relative to the unextended sp + + assert(_cb->frame_size() >= 0, "must have non-zero frame size"); + intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size(); + intptr_t* unextended_sp = l_sender_sp; + + // the return_address is always the word on the stack + address sender_pc = (address) *(l_sender_sp-1); + + intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp - frame::sender_sp_offset); + + // assert (sender_sp() == l_sender_sp, "should be"); + // assert (*saved_fp_addr == link(), "should be"); + + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + + // Since the prolog does the save and restore of EBP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + update_map_with_saved_link(map, saved_fp_addr); + } + + return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc); +} + +//------------------------------------------------------------------------------ +// frame::sender +frame frame::sender(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map->set_include_argument_oops(false); + + if (is_entry_frame()) + return sender_for_entry_frame(map); + if (is_interpreted_frame()) + return sender_for_interpreter_frame(map); + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + + // This test looks odd: why is it not is_compiled_frame() ? That's + // because stubs also have OOP maps. + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return frame(sender_sp(), link(), sender_pc()); +} + +bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) { + assert(is_interpreted_frame(), "must be interpreter frame"); + Method* method = interpreter_frame_method(); + // When unpacking an optimized frame the frame pointer is + // adjusted with: + int diff = (method->max_locals() - method->size_of_parameters()) * + Interpreter::stackElementWords; + return _fp == (fp - diff); +} + +void frame::pd_gc_epilog() { + // nothing done here now +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { +// QQQ +#ifdef CC_INTERP +#else + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks + if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { + return false; + } + if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { + return false; + } + if (fp() + interpreter_frame_initial_sp_offset < sp()) { + return false; + } + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (fp() <= sp()) { // this attempts to deal with unsigned comparison above + return false; + } + + // do some validation of frame elements + + // first the method + + Method* m = *interpreter_frame_method_addr(); + + // validate the method we'd find in this potential sender + if (!m->is_valid_method()) return false; + + // stack frames shouldn't be much larger than max_stack elements + // this test requires the use of unextended_sp which is the sp as seen by + // the current frame, and not sp which is the "raw" pc which could point + // further because of local variables of the callee method inserted after + // method arguments + if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { + return false; + } + + // validate bci/bcx + + intptr_t bcx = interpreter_frame_bcx(); + if (m->validate_bci_from_bcx(bcx) < 0) { + return false; + } + + // validate constantPoolCache* + ConstantPoolCache* cp = *interpreter_frame_cache_addr(); + if (cp == NULL || !cp->is_metaspace_object()) return false; + + // validate locals + + address locals = (address) *interpreter_frame_locals_addr(); + + if (locals > thread->stack_base() || locals < (address) fp()) return false; + + // We'd have to be pretty unlucky to be mislead at this point + +#endif // CC_INTERP + return true; +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { +#ifdef CC_INTERP + // Needed for JVMTI. The result should always be in the + // interpreterState object + interpreterState istate = get_interpreterState(); +#endif // CC_INTERP + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + + intptr_t* tos_addr; + if (method->is_native()) { + // TODO : ensure AARCH64 does the same as Intel here i.e. push v0 then r0 + // Prior to calling into the runtime to report the method_exit the possible + // return value is pushed to the native stack. If the result is a jfloat/jdouble + // then ST0 is saved before EAX/EDX. See the note in generate_native_result + tos_addr = (intptr_t*)sp(); + if (type == T_FLOAT || type == T_DOUBLE) { + // This is times two because we do a push(ltos) after pushing XMM0 + // and that takes two interpreter stack slots. + tos_addr += 2 * Interpreter::stackElementWords; + } + } else { + tos_addr = (intptr_t*)interpreter_frame_tos_address(); + } + + switch (type) { + case T_OBJECT : + case T_ARRAY : { + oop obj; + if (method->is_native()) { +#ifdef CC_INTERP + obj = istate->_oop_temp; +#else + obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); +#endif // CC_INTERP + } else { + oop* obj_p = (oop*)tos_addr; + obj = (obj_p == NULL) ? (oop)NULL : *obj_p; + } + assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + break; + } + case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; + case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; + case T_CHAR : value_result->c = *(jchar*)tos_addr; break; + case T_SHORT : value_result->s = *(jshort*)tos_addr; break; + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : { + value_result->f = *(jfloat*)tos_addr; + break; + } + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + + return type; +} + + +intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + return &interpreter_frame_tos_address()[index]; +} + +#ifndef PRODUCT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(frame_no, fp() + frame::name##_offset, #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_method); + DESCRIBE_FP_OFFSET(interpreter_frame_mdx); + DESCRIBE_FP_OFFSET(interpreter_frame_cache); + DESCRIBE_FP_OFFSET(interpreter_frame_locals); + DESCRIBE_FP_OFFSET(interpreter_frame_bcx); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + } +} +#endif + +intptr_t *frame::initial_deoptimization_info() { + // Not used on aarch64, but we must return something. + return NULL; +} + +intptr_t* frame::real_fp() const { + if (_cb != NULL) { + // use the frame size if valid + int size = _cb->frame_size(); + if (size > 0) { + return unextended_sp() + size; + } + } + // else rely on fp() + assert(! is_compiled_frame(), "unknown compiled frame size"); + return fp(); +} + +#undef DESCRIBE_FP_OFFSET + +#define DESCRIBE_FP_OFFSET(name) \ + { \ + unsigned long *p = (unsigned long *)fp; \ + printf("0x%016lx 0x%016lx %s\n", (unsigned long)(p + frame::name##_offset), \ + p[frame::name##_offset], #name); \ + } + +static __thread unsigned long nextfp; +static __thread unsigned long nextpc; +static __thread unsigned long nextsp; +static __thread RegisterMap *reg_map; + +static void printbc(Method *m, intptr_t bcx) { + const char *name; + char buf[16]; + if (m->validate_bci_from_bcx(bcx) < 0 + || !m->contains((address)bcx)) { + name = "???"; + snprintf(buf, sizeof buf, "(bad)"); + } else { + int bci = m->bci_from((address)bcx); + snprintf(buf, sizeof buf, "%d", bci); + name = Bytecodes::name(m->code_at(bci)); + } + ResourceMark rm; + printf("%s : %s ==> %s\n", m->name_and_sig_as_C_string(), buf, name); +} + +void internal_pf(unsigned long sp, unsigned long fp, unsigned long pc, unsigned long bcx) { + if (! fp) + return; + + DESCRIBE_FP_OFFSET(return_addr); + DESCRIBE_FP_OFFSET(link); + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_method); + DESCRIBE_FP_OFFSET(interpreter_frame_mdx); + DESCRIBE_FP_OFFSET(interpreter_frame_cache); + DESCRIBE_FP_OFFSET(interpreter_frame_locals); + DESCRIBE_FP_OFFSET(interpreter_frame_bcx); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + unsigned long *p = (unsigned long *)fp; + + // We want to see all frames, native and Java. For compiled and + // interpreted frames we have special information that allows us to + // unwind them; for everything else we assume that the native frame + // pointer chain is intact. + frame this_frame((intptr_t*)sp, (intptr_t*)fp, (address)pc); + if (this_frame.is_compiled_frame() || + this_frame.is_interpreted_frame()) { + frame sender = this_frame.sender(reg_map); + nextfp = (unsigned long)sender.fp(); + nextpc = (unsigned long)sender.pc(); + nextsp = (unsigned long)sender.unextended_sp(); + } else { + nextfp = p[frame::link_offset]; + nextpc = p[frame::return_addr_offset]; + nextsp = (unsigned long)&p[frame::sender_sp_offset]; + } + + if (bcx == -1ul) + bcx = p[frame::interpreter_frame_bcx_offset]; + + if (Interpreter::contains((address)pc)) { + Method* m = (Method*)p[frame::interpreter_frame_method_offset]; + if(m && m->is_method()) { + printbc(m, bcx); + } else + printf("not a Method\n"); + } else { + CodeBlob *cb = CodeCache::find_blob((address)pc); + if (cb != NULL) { + if (cb->is_nmethod()) { + ResourceMark rm; + nmethod* nm = (nmethod*)cb; + printf("nmethod %s\n", nm->method()->name_and_sig_as_C_string()); + } else if (cb->name()) { + printf("CodeBlob %s\n", cb->name()); + } + } + } +} + +extern "C" void npf() { + CodeBlob *cb = CodeCache::find_blob((address)nextpc); + // C2 does not always chain the frame pointers when it can, instead + // preferring to use fixed offsets from SP, so a simple leave() does + // not work. Instead, it adds the frame size to SP then pops FP and + // LR. We have to do the same thing to get a good call chain. + if (cb && cb->frame_size()) + nextfp = nextsp + wordSize * (cb->frame_size() - 2); + internal_pf (nextsp, nextfp, nextpc, -1); +} + +extern "C" void pf(unsigned long sp, unsigned long fp, unsigned long pc, + unsigned long bcx, unsigned long thread) { + if (!reg_map) { + reg_map = NEW_C_HEAP_OBJ(RegisterMap, mtNone); + ::new (reg_map) RegisterMap((JavaThread*)thread, false); + } else { + *reg_map = RegisterMap((JavaThread*)thread, false); + } + + { + CodeBlob *cb = CodeCache::find_blob((address)pc); + if (cb && cb->frame_size()) + fp = sp + wordSize * (cb->frame_size() - 2); + } + internal_pf(sp, fp, pc, bcx); +} + +// support for printing out where we are in a Java method +// needs to be passed current fp and bcp register values +// prints method name, bc index and bytecode name +extern "C" void pm(unsigned long fp, unsigned long bcx) { + DESCRIBE_FP_OFFSET(interpreter_frame_method); + unsigned long *p = (unsigned long *)fp; + Method* m = (Method*)p[frame::interpreter_frame_method_offset]; + printbc(m, bcx); +} + +#ifndef PRODUCT +// This is a generic constructor which is only used by pns() in debug.cpp. +frame::frame(void* sp, void* fp, void* pc) { + init((intptr_t*)sp, (intptr_t*)fp, (address)pc); +} +#endif + +void JavaFrameAnchor::make_walkable(JavaThread* thread) { + // last frame set? + if (last_Java_sp() == NULL) return; + // already walkable? + if (walkable()) return; + assert(Thread::current() == (Thread*)thread, "not current thread"); + assert(last_Java_sp() != NULL, "not called from Java code?"); + assert(last_Java_pc() == NULL, "already walkable"); + capture_last_Java_pc(); + assert(walkable(), "something went wrong"); +} + +void JavaFrameAnchor::capture_last_Java_pc() { + assert(_last_Java_sp != NULL, "no last frame set"); + assert(_last_Java_pc == NULL, "already walkable"); + _last_Java_pc = (address)_last_Java_sp[-1]; +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/frame_aarch64.hpp 2021-01-25 19:31:40.363514594 +0000 @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_FRAME_AARCH64_HPP +#define CPU_AARCH64_VM_FRAME_AARCH64_HPP + +#include "runtime/synchronizer.hpp" +#include "utilities/top.hpp" + +// A frame represents a physical stack frame (an activation). Frames can be +// C or Java frames, and the Java frames can be interpreted or compiled. +// In contrast, vframes represent source-level activations, so that one physical frame +// can correspond to multiple source level frames because of inlining. +// A frame is comprised of {pc, fp, sp} +// ------------------------------ Asm interpreter ---------------------------------------- +// Layout of asm interpreter frame: +// [expression stack ] * <- sp + +// [monitors[0] ] \ +// ... | monitor block size = k +// [monitors[k-1] ] / +// [frame initial esp ] ( == &monitors[0], initially here) initial_sp_offset +// [byte code index/pointr] = bcx() bcx_offset + +// [pointer to locals ] = locals() locals_offset +// [constant pool cache ] = cache() cache_offset + +// [methodData ] = mdp() mdx_offset +// [methodOop ] = method() method_offset + +// [last esp ] = last_sp() last_sp_offset +// [old stack pointer ] (sender_sp) sender_sp_offset + +// [old frame pointer ] <- fp = link() +// [return pc ] + +// [last sp ] +// [oop temp ] (only for native calls) + +// [locals and parameters ] +// <- sender sp +// ------------------------------ Asm interpreter ---------------------------------------- + +// ------------------------------ C++ interpreter ---------------------------------------- +// +// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run) +// +// <- SP (current esp/rsp) +// [local variables ] BytecodeInterpreter::run local variables +// ... BytecodeInterpreter::run local variables +// [local variables ] BytecodeInterpreter::run local variables +// [old frame pointer ] fp [ BytecodeInterpreter::run's ebp/rbp ] +// [return pc ] (return to frame manager) +// [interpreter_state* ] (arg to BytecodeInterpreter::run) -------------- +// [expression stack ] <- last_Java_sp | +// [... ] * <- interpreter_state.stack | +// [expression stack ] * <- interpreter_state.stack_base | +// [monitors ] \ | +// ... | monitor block size | +// [monitors ] / <- interpreter_state.monitor_base | +// [struct interpretState ] <-----------------------------------------| +// [return pc ] (return to callee of frame manager [1] +// [locals and parameters ] +// <- sender sp + +// [1] When the c++ interpreter calls a new method it returns to the frame +// manager which allocates a new frame on the stack. In that case there +// is no real callee of this newly allocated frame. The frame manager is +// aware of the additional frame(s) and will pop them as nested calls +// complete. Howevers tTo make it look good in the debugger the frame +// manager actually installs a dummy pc pointing to RecursiveInterpreterActivation +// with a fake interpreter_state* parameter to make it easy to debug +// nested calls. + +// Note that contrary to the layout for the assembly interpreter the +// expression stack allocated for the C++ interpreter is full sized. +// However this is not as bad as it seems as the interpreter frame_manager +// will truncate the unused space on succesive method calls. +// +// ------------------------------ C++ interpreter ---------------------------------------- + + public: + enum { + pc_return_offset = 0, + // All frames + link_offset = 0, + return_addr_offset = 1, + sender_sp_offset = 2, + +#ifndef CC_INTERP + + // Interpreter frames + interpreter_frame_oop_temp_offset = 3, // for native calls only + + interpreter_frame_sender_sp_offset = -1, + // outgoing sp before a call to an invoked method + interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, + interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, + interpreter_frame_mdx_offset = interpreter_frame_method_offset - 1, + interpreter_frame_cache_offset = interpreter_frame_mdx_offset - 1, + interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1, + interpreter_frame_bcx_offset = interpreter_frame_locals_offset - 1, + interpreter_frame_initial_sp_offset = interpreter_frame_bcx_offset - 1, + + interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, + interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, + +#endif // CC_INTERP + + // Entry frames + // n.b. these values are determined by the layout defined in + // stubGenerator for the Java call stub + entry_frame_after_call_words = 27, + entry_frame_call_wrapper_offset = -8, + + // we don't need a save area + arg_reg_save_area_bytes = 0, + + // TODO - check that this is still correct + // Native frames + + native_frame_initial_param_offset = 2 + + }; + + intptr_t ptr_at(int offset) const { + return *ptr_at_addr(offset); + } + + void ptr_at_put(int offset, intptr_t value) { + *ptr_at_addr(offset) = value; + } + + private: + // an additional field beyond _sp and _pc: + intptr_t* _fp; // frame pointer + // The interpreter and adapters will extend the frame of the caller. + // Since oopMaps are based on the sp of the caller before extension + // we need to know that value. However in order to compute the address + // of the return address we need the real "raw" sp. Since sparc already + // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's + // original sp we use that convention. + + intptr_t* _unextended_sp; + void adjust_unextended_sp(); + + intptr_t* ptr_at_addr(int offset) const { + return (intptr_t*) addr_at(offset); + } + +#ifdef ASSERT + // Used in frame::sender_for_{interpreter,compiled}_frame + static void verify_deopt_original_pc( nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false); + static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) { + verify_deopt_original_pc(nm, unextended_sp, true); + } +#endif + + public: + // Constructors + + frame(intptr_t* sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* fp); + + void init(intptr_t* sp, intptr_t* fp, address pc); + + // accessors for the instance variables + // Note: not necessarily the real 'frame pointer' (see real_fp) + intptr_t* fp() const { return _fp; } + + inline address* sender_pc_addr() const; + + // return address of param, zero origin index. + inline address* native_param_addr(int idx) const; + + // expression stack tos if we are nested in a java call + intptr_t* interpreter_frame_last_sp() const; + + // helper to update a map with callee-saved RBP + static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + +#ifndef CC_INTERP + // deoptimization support + void interpreter_frame_set_last_sp(intptr_t* sp); +#endif // CC_INTERP + +#ifdef CC_INTERP + inline interpreterState get_interpreterState() const; +#endif // CC_INTERP + +#endif // CPU_AARCH64_VM_FRAME_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/frame_aarch64.inline.hpp 2021-01-25 19:31:40.818519377 +0000 @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_FRAME_AARCH64_INLINE_HPP +#define CPU_AARCH64_VM_FRAME_AARCH64_INLINE_HPP + +#include "code/codeCache.hpp" + +// Inline functions for AArch64 frames: + +// Constructors: + +inline frame::frame() { + _pc = NULL; + _sp = NULL; + _unextended_sp = NULL; + _fp = NULL; + _cb = NULL; + _deopt_state = unknown; +} + +//static int spin; + +inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { + intptr_t a = intptr_t(sp); + intptr_t b = intptr_t(fp); + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { + init(sp, fp, pc); +} + +inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { + intptr_t a = intptr_t(sp); + intptr_t b = intptr_t(fp); + _sp = sp; + _unextended_sp = unextended_sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + assert(((nmethod*)_cb)->insts_contains(_pc), "original PC must be in nmethod"); + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp) { + intptr_t a = intptr_t(sp); + intptr_t b = intptr_t(fp); + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = (address)(sp[-1]); + + // Here's a sticky one. This constructor can be called via AsyncGetCallTrace + // when last_Java_sp is non-null but the pc fetched is junk. If we are truly + // unlucky the junk value could be to a zombied method and we'll die on the + // find_blob call. This is also why we can have no asserts on the validity + // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler + // -> pd_last_frame should use a specialized version of pd_last_frame which could + // call a specilaized frame constructor instead of this one. + // Then we could use the assert below. However this assert is of somewhat dubious + // value. + // assert(_pc != NULL, "no pc?"); + + _cb = CodeCache::find_blob(_pc); + adjust_unextended_sp(); + + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +// Accessors + +inline bool frame::equal(frame other) const { + bool ret = sp() == other.sp() + && unextended_sp() == other.unextended_sp() + && fp() == other.fp() + && pc() == other.pc(); + assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); + return ret; +} + +// Return unique id for this frame. The id must have a value where we can distinguish +// identity and younger/older relationship. NULL represents an invalid (incomparable) +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + +// Relationals on frames based +// Return true if the frame is younger (more recent activation) than the frame represented by id +inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() < id ; } + +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } + + + +inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } +inline void frame::set_link(intptr_t* addr) { *(intptr_t **)addr_at(link_offset) = addr; } + + +inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + +// Return address: + +inline address* frame::sender_pc_addr() const { return (address*) addr_at( return_addr_offset); } +inline address frame::sender_pc() const { return *sender_pc_addr(); } + +// return address of param, zero origin index. +inline address* frame::native_param_addr(int idx) const { return (address*) addr_at( native_frame_initial_param_offset+idx); } + +#ifdef CC_INTERP + +inline interpreterState frame::get_interpreterState() const { + return ((interpreterState)addr_at( -((int)sizeof(BytecodeInterpreter))/wordSize )); +} + +inline intptr_t* frame::sender_sp() const { + // Hmm this seems awfully expensive QQQ, is this really called with interpreted frames? + if (is_interpreted_frame()) { + assert(false, "should never happen"); + return get_interpreterState()->sender_sp(); + } else { + return addr_at(sender_sp_offset); + } +} + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return &(get_interpreterState()->_locals); +} + +inline intptr_t* frame::interpreter_frame_bcx_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return (intptr_t*) &(get_interpreterState()->_bcp); +} + + +// Constant pool cache + +inline constantPoolCacheOop* frame::interpreter_frame_cache_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return &(get_interpreterState()->_constants); +} + +// Method + +inline methodOop* frame::interpreter_frame_method_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return &(get_interpreterState()->_method); +} + +inline intptr_t* frame::interpreter_frame_mdx_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return (intptr_t*) &(get_interpreterState()->_mdx); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + assert(is_interpreted_frame(), "wrong frame type"); + return get_interpreterState()->_stack + 1; +} + +#else /* asm interpreter */ +inline intptr_t* frame::sender_sp() const { return addr_at( sender_sp_offset); } + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + return (intptr_t**)addr_at(interpreter_frame_locals_offset); +} + +inline intptr_t* frame::interpreter_frame_last_sp() const { + return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); +} + +inline intptr_t* frame::interpreter_frame_bcx_addr() const { + return (intptr_t*)addr_at(interpreter_frame_bcx_offset); +} + + +inline intptr_t* frame::interpreter_frame_mdx_addr() const { + return (intptr_t*)addr_at(interpreter_frame_mdx_offset); +} + + + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); +} + +// Method + +inline Method** frame::interpreter_frame_method_addr() const { + return (Method**)addr_at(interpreter_frame_method_offset); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + intptr_t* last_sp = interpreter_frame_last_sp(); + if (last_sp == NULL) { + return sp(); + } else { + // sp() may have been extended or shrunk by an adapter. At least + // check that we don't fall behind the legal region. + // For top deoptimized frame last_sp == interpreter_frame_monitor_end. + assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); + return last_sp; + } +} + +inline oop* frame::interpreter_frame_temp_oop_addr() const { + return (oop *)(fp() + interpreter_frame_oop_temp_offset); +} + +#endif /* CC_INTERP */ + +inline int frame::pd_oop_map_offset_adjustment() const { + return 0; +} + +inline int frame::interpreter_frame_monitor_size() { + return BasicObjectLock::size(); +} + + +// expression stack +// (the max_stack arguments are used by the GC; see class FrameClosure) + +inline intptr_t* frame::interpreter_frame_expression_stack() const { + intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); + return monitor_end-1; +} + + +inline jint frame::interpreter_frame_expression_stack_direction() { return -1; } + + +// Entry frames + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); +} + + +// Compiled frames + +inline int frame::local_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) { + return (nof_args - local_index + (local_index < nof_args ? 1: -1)); +} + +inline int frame::monitor_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) { + return local_offset_for_compiler(local_index, nof_args, max_nof_locals, max_nof_monitors); +} + +inline int frame::min_local_offset_for_compiler(int nof_args, int max_nof_locals, int max_nof_monitors) { + return (nof_args - (max_nof_locals + max_nof_monitors*2) - 1); +} + +inline bool frame::volatile_across_calls(Register reg) { + return true; +} + + + +inline oop frame::saved_oop_result(RegisterMap* map) const { + oop* result_adr = (oop *)map->location(r0->as_VMReg()); + guarantee(result_adr != NULL, "bad register save location"); + + return (*result_adr); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + oop* result_adr = (oop *)map->location(r0->as_VMReg()); + guarantee(result_adr != NULL, "bad register save location"); + + *result_adr = obj; +} + +#endif // CPU_AARCH64_VM_FRAME_AARCH64_INLINE_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp 2021-01-25 19:31:41.249523907 +0000 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP +#define CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP + +const int StackAlignmentInBytes = 16; + +// Indicates whether the C calling conventions require that +// 32-bit integer argument values are properly extended to 64 bits. +// If set, SharedRuntime::c_calling_convention() must adapt +// signatures accordingly. +const bool CCallingConventionRequiresIntsAsLongs = false; + +#define SUPPORTS_NATIVE_CX8 + +// The maximum B/BL offset range on AArch64 is 128MB. +#undef CODE_CACHE_DEFAULT_LIMIT +#define CODE_CACHE_DEFAULT_LIMIT (128*M) + +#endif // CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/globals_aarch64.hpp 2021-01-25 19:31:41.676528395 +0000 @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2000, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_GLOBALS_AARCH64_HPP +#define CPU_AARCH64_VM_GLOBALS_AARCH64_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, ConvertSleepToYield, true); +define_pd_global(bool, ShareVtableStubs, true); +define_pd_global(bool, CountInterpCalls, true); +define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks +define_pd_global(bool, TrapBasedNullChecks, false); +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast + +// See 4827828 for this change. There is no globals_core_i486.hpp. I can't +// assign a different value for C2 without touching a number of files. Use +// #ifdef to minimize the change as it's late in Mantis. -- FIXME. +// c1 doesn't have this problem because the fix to 4858033 assures us +// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns +// the uep and the vep doesn't get real alignment but just slops on by +// only assured that the entry instruction meets the 5 byte size requirement. +define_pd_global(intx, CodeEntryAlignment, 64); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); + +define_pd_global(intx, StackYellowPages, 2); +define_pd_global(intx, StackRedPages, 1); + +define_pd_global(intx, StackShadowPages, 4 DEBUG_ONLY(+5)); + +define_pd_global(intx, PreInflateSpin, 10); + +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); + +define_pd_global(bool, UseMembar, true); + +define_pd_global(bool, PreserveFramePointer, false); + +// GC Ergo Flags +define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread + +define_pd_global(uintx, TypeProfileLevel, 111); + +#if defined(COMPILER1) || defined(COMPILER2) +define_pd_global(intx, InlineSmallCode, 1000); +#endif + +#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ + \ + product(bool, NearCpool, true, \ + "constant pool is close to instructions") \ + \ + product(bool, UseBarriersForVolatile, false, \ + "Use memory barriers to implement volatile accesses") \ + product(bool, UseNeon, false, \ + "Use Neon for CRC32 computation") \ + product(bool, UseCRC32, false, \ + "Use CRC32 instructions for CRC32 computation") \ + product(bool, UseLSE, false, \ + "Use LSE instructions") \ + product(bool, UseSIMDForMemoryOps, false, \ + "Use SIMD instructions in generated memory move code") \ + product(bool, AvoidUnalignedAccesses, false, \ + "Avoid generating unaligned memory accesses") \ + product(bool, UseBlockZeroing, true, \ + "Use DC ZVA for block zeroing") \ + product(intx, BlockZeroingLowLimit, 256, \ + "Minimum size in bytes when block zeroing will be used") \ + product(bool, TraceTraps, false, "Trace all traps the signal handler") + +#endif // CPU_AARCH64_VM_GLOBALS_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/icBuffer_aarch64.cpp 2021-01-25 19:31:42.091532756 +0000 @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_aarch64.hpp" +#include "oops/oop.inline.hpp" +#include "oops/oop.inline2.hpp" + +int InlineCacheBuffer::ic_stub_code_size() { + return (MacroAssembler::far_branches() ? 6 : 4) * NativeInstruction::instruction_size; +} + +#define __ masm-> + +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler* masm = new MacroAssembler(&code); + // note: even though the code contains an embedded value, we do not need reloc info + // because + // (1) the value is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear + // assert(cached_value == NULL || cached_oop->is_perm(), "must be perm oop"); + + address start = __ pc(); + Label l; + + __ ldr(rscratch2, l); + __ far_jump(ExternalAddress(entry_point)); + __ bind(l); + __ emit_int64((int64_t)cached_value); + ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size()); + assert(__ pc() - start == ic_stub_code_size(), "must be"); +} + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + NativeJump* jump = nativeJump_at(code_begin + 4); + return jump->jump_destination(); +} + + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + // The word containing the cached value is at the end of this IC buffer + uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize); + void* o = (void*)*p; + return o; +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/icache_aarch64.cpp 2021-01-25 19:31:42.519537255 +0000 @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/icache.hpp" + +extern void aarch64TestHook(); + +void ICacheStubGenerator::generate_icache_flush( + ICache::flush_icache_stub_t* flush_icache_stub) { + // Give anyone who calls this a surprise + *flush_icache_stub = (ICache::flush_icache_stub_t)NULL; +} + +void ICache::initialize() { +#ifdef ASSERT + aarch64TestHook(); +#endif +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/icache_aarch64.hpp 2021-01-25 19:31:42.974542037 +0000 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_ICACHE_AARCH64_HPP +#define CPU_AARCH64_VM_ICACHE_AARCH64_HPP + +// Interface for updating the instruction cache. Whenever the VM +// modifies code, part of the processor instruction cache potentially +// has to be flushed. + +class ICache : public AbstractICache { + public: + static void initialize(); + static void invalidate_word(address addr) { + __clear_cache((char *)addr, (char *)(addr + 3)); + } + static void invalidate_range(address start, int nbytes) { + __clear_cache((char *)start, (char *)(start + nbytes)); + } +}; + +#endif // CPU_AARCH64_VM_ICACHE_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/immediate_aarch64.cpp 2021-01-25 19:31:43.442546956 +0000 @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include +#include "immediate_aarch64.hpp" + +// there are at most 2^13 possible logical immediate encodings +// however, some combinations of immr and imms are invalid +static const unsigned LI_TABLE_SIZE = (1 << 13); + +static int li_table_entry_count; + +// for forward lookup we just use a direct array lookup +// and assume that the cient has supplied a valid encoding +// table[encoding] = immediate +static u_int64_t LITable[LI_TABLE_SIZE]; + +// for reverse lookup we need a sparse map so we store a table of +// immediate and encoding pairs sorted by immediate value + +struct li_pair { + u_int64_t immediate; + u_int32_t encoding; +}; + +static struct li_pair InverseLITable[LI_TABLE_SIZE]; + +// comparator to sort entries in the inverse table +int compare_immediate_pair(const void *i1, const void *i2) +{ + struct li_pair *li1 = (struct li_pair *)i1; + struct li_pair *li2 = (struct li_pair *)i2; + if (li1->immediate < li2->immediate) { + return -1; + } + if (li1->immediate > li2->immediate) { + return 1; + } + return 0; +} + +// helper functions used by expandLogicalImmediate + +// for i = 1, ... N result = 1 other bits are zero +static inline u_int64_t ones(int N) +{ + return (N == 64 ? (u_int64_t)-1UL : ((1UL << N) - 1)); +} + +/* + * bit twiddling helpers for instruction decode + */ + +// 32 bit mask with bits [hi,...,lo] set +static inline u_int32_t mask32(int hi = 31, int lo = 0) +{ + int nbits = (hi + 1) - lo; + return ((1 << nbits) - 1) << lo; +} + +static inline u_int64_t mask64(int hi = 63, int lo = 0) +{ + int nbits = (hi + 1) - lo; + return ((1L << nbits) - 1) << lo; +} + +// pick bits [hi,...,lo] from val +static inline u_int32_t pick32(u_int32_t val, int hi = 31, int lo = 0) +{ + return (val & mask32(hi, lo)); +} + +// pick bits [hi,...,lo] from val +static inline u_int64_t pick64(u_int64_t val, int hi = 31, int lo = 0) +{ + return (val & mask64(hi, lo)); +} + +// mask [hi,lo] and shift down to start at bit 0 +static inline u_int32_t pickbits32(u_int32_t val, int hi = 31, int lo = 0) +{ + return (pick32(val, hi, lo) >> lo); +} + +// mask [hi,lo] and shift down to start at bit 0 +static inline u_int64_t pickbits64(u_int64_t val, int hi = 63, int lo = 0) +{ + return (pick64(val, hi, lo) >> lo); +} + +// result<0> to val +static inline u_int64_t pickbit(u_int64_t val, int N) +{ + return pickbits64(val, N, N); +} + +static inline u_int32_t uimm(u_int32_t val, int hi, int lo) +{ + return pickbits32(val, hi, lo); +} + +// SPEC bits(M*N) Replicate(bits(M) x, integer N); +// this is just an educated guess + +u_int64_t replicate(u_int64_t bits, int nbits, int count) +{ + u_int64_t result = 0; + // nbits may be 64 in which case we want mask to be -1 + u_int64_t mask = ones(nbits); + for (int i = 0; i < count ; i++) { + result <<= nbits; + result |= (bits & mask); + } + return result; +} + +// this function writes the supplied bimm reference and returns a +// boolean to indicate success (1) or fail (0) because an illegal +// encoding must be treated as an UNALLOC instruction + +// construct a 32 bit immediate value for a logical immediate operation +int expandLogicalImmediate(u_int32_t immN, u_int32_t immr, + u_int32_t imms, u_int64_t &bimm) +{ + int len; // ought to be <= 6 + u_int32_t levels; // 6 bits + u_int32_t tmask_and; // 6 bits + u_int32_t wmask_and; // 6 bits + u_int32_t tmask_or; // 6 bits + u_int32_t wmask_or; // 6 bits + u_int64_t imm64; // 64 bits + u_int64_t tmask, wmask; // 64 bits + u_int32_t S, R, diff; // 6 bits? + + if (immN == 1) { + len = 6; // looks like 7 given the spec above but this cannot be! + } else { + len = 0; + u_int32_t val = (~imms & 0x3f); + for (int i = 5; i > 0; i--) { + if (val & (1 << i)) { + len = i; + break; + } + } + if (len < 1) { + return 0; + } + // for valid inputs leading 1s in immr must be less than leading + // zeros in imms + int len2 = 0; // ought to be < len + u_int32_t val2 = (~immr & 0x3f); + for (int i = 5; i > 0; i--) { + if (!(val2 & (1 << i))) { + len2 = i; + break; + } + } + if (len2 >= len) { + return 0; + } + } + + levels = (1 << len) - 1; + + if ((imms & levels) == levels) { + return 0; + } + + S = imms & levels; + R = immr & levels; + + // 6 bit arithmetic! + diff = S - R; + tmask_and = (diff | ~levels) & 0x3f; + tmask_or = (diff & levels) & 0x3f; + tmask = 0xffffffffffffffffULL; + + for (int i = 0; i < 6; i++) { + int nbits = 1 << i; + u_int64_t and_bit = pickbit(tmask_and, i); + u_int64_t or_bit = pickbit(tmask_or, i); + u_int64_t and_bits_sub = replicate(and_bit, 1, nbits); + u_int64_t or_bits_sub = replicate(or_bit, 1, nbits); + u_int64_t and_bits_top = (and_bits_sub << nbits) | ones(nbits); + u_int64_t or_bits_top = (0 << nbits) | or_bits_sub; + + tmask = ((tmask + & (replicate(and_bits_top, 2 * nbits, 32 / nbits))) + | replicate(or_bits_top, 2 * nbits, 32 / nbits)); + } + + wmask_and = (immr | ~levels) & 0x3f; + wmask_or = (immr & levels) & 0x3f; + + wmask = 0; + + for (int i = 0; i < 6; i++) { + int nbits = 1 << i; + u_int64_t and_bit = pickbit(wmask_and, i); + u_int64_t or_bit = pickbit(wmask_or, i); + u_int64_t and_bits_sub = replicate(and_bit, 1, nbits); + u_int64_t or_bits_sub = replicate(or_bit, 1, nbits); + u_int64_t and_bits_top = (ones(nbits) << nbits) | and_bits_sub; + u_int64_t or_bits_top = (or_bits_sub << nbits) | 0; + + wmask = ((wmask + & (replicate(and_bits_top, 2 * nbits, 32 / nbits))) + | replicate(or_bits_top, 2 * nbits, 32 / nbits)); + } + + if (diff & (1U << 6)) { + imm64 = tmask & wmask; + } else { + imm64 = tmask | wmask; + } + + + bimm = imm64; + return 1; +} + +// constructor to initialise the lookup tables + +static void initLITables() __attribute__ ((constructor)); +static void initLITables() +{ + li_table_entry_count = 0; + for (unsigned index = 0; index < LI_TABLE_SIZE; index++) { + u_int32_t N = uimm(index, 12, 12); + u_int32_t immr = uimm(index, 11, 6); + u_int32_t imms = uimm(index, 5, 0); + if (expandLogicalImmediate(N, immr, imms, LITable[index])) { + InverseLITable[li_table_entry_count].immediate = LITable[index]; + InverseLITable[li_table_entry_count].encoding = index; + li_table_entry_count++; + } + } + // now sort the inverse table + qsort(InverseLITable, li_table_entry_count, + sizeof(InverseLITable[0]), compare_immediate_pair); +} + +// public APIs provided for logical immediate lookup and reverse lookup + +u_int64_t logical_immediate_for_encoding(u_int32_t encoding) +{ + return LITable[encoding]; +} + +u_int32_t encoding_for_logical_immediate(u_int64_t immediate) +{ + struct li_pair pair; + struct li_pair *result; + + pair.immediate = immediate; + + result = (struct li_pair *) + bsearch(&pair, InverseLITable, li_table_entry_count, + sizeof(InverseLITable[0]), compare_immediate_pair); + + if (result) { + return result->encoding; + } + + return 0xffffffff; +} + +// floating point immediates are encoded in 8 bits +// fpimm[7] = sign bit +// fpimm[6:4] = signed exponent +// fpimm[3:0] = fraction (assuming leading 1) +// i.e. F = s * 1.f * 2^(e - b) + +u_int64_t fp_immediate_for_encoding(u_int32_t imm8, int is_dp) +{ + union { + float fpval; + double dpval; + u_int64_t val; + }; + + u_int32_t s, e, f; + s = (imm8 >> 7 ) & 0x1; + e = (imm8 >> 4) & 0x7; + f = imm8 & 0xf; + // the fp value is s * n/16 * 2r where n is 16+e + fpval = (16.0 + f) / 16.0; + // n.b. exponent is signed + if (e < 4) { + int epos = e; + for (int i = 0; i <= epos; i++) { + fpval *= 2.0; + } + } else { + int eneg = 7 - e; + for (int i = 0; i < eneg; i++) { + fpval /= 2.0; + } + } + + if (s) { + fpval = -fpval; + } + if (is_dp) { + dpval = (double)fpval; + } + return val; +} + +u_int32_t encoding_for_fp_immediate(float immediate) +{ + // given a float which is of the form + // + // s * n/16 * 2r + // + // where n is 16+f and imm1:s, imm4:f, simm3:r + // return the imm8 result [s:r:f] + // + + union { + float fpval; + u_int32_t val; + }; + fpval = immediate; + u_int32_t s, r, f, res; + // sign bit is 31 + s = (val >> 31) & 0x1; + // exponent is bits 30-23 but we only want the bottom 3 bits + // strictly we ought to check that the bits bits 30-25 are + // either all 1s or all 0s + r = (val >> 23) & 0x7; + // fraction is bits 22-0 + f = (val >> 19) & 0xf; + res = (s << 7) | (r << 4) | f; + return res; +} + --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/immediate_aarch64.hpp 2021-01-25 19:31:43.892551686 +0000 @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef _IMMEDIATE_H +#define _IMMEDIATE_H + +#include + +/* + * functions to map backwards and forwards between logical or floating + * point immediates and their corresponding encodings. the mapping + * from encoding to immediate is required by the simulator. the reverse + * mapping is required by the OpenJDK assembler. + * + * a logical immediate value supplied to or returned from a map lookup + * is always 64 bits. this is sufficient for looking up 32 bit + * immediates or their encodings since a 32 bit immediate has the same + * encoding as the 64 bit immediate produced by concatenating the + * immediate with itself. + * + * a logical immediate encoding is 13 bits N:immr:imms (3 fields of + * widths 1:6:6 -- see the arm spec). they appear as bits [22:10] of a + * logical immediate instruction. encodings are supplied and returned + * as 32 bit values. if a given 13 bit immediate has no corresponding + * encoding then a map lookup will return 0xffffffff. + */ + +u_int64_t logical_immediate_for_encoding(u_int32_t encoding); +u_int32_t encoding_for_logical_immediate(u_int64_t immediate); +u_int64_t fp_immediate_for_encoding(u_int32_t imm8, int is_dp); +u_int32_t encoding_for_fp_immediate(float immediate); + +#endif // _IMMEDIATE_H --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/interp_masm_aarch64.cpp 2021-01-25 19:31:44.348556479 +0000 @@ -0,0 +1,1730 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2003, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interp_masm_aarch64.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiRedefineClassesTrace.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" + + +void InterpreterMacroAssembler::narrow(Register result) { + + // Get method->_constMethod->_result_type + ldr(rscratch1, Address(rfp, frame::interpreter_frame_method_offset * wordSize)); + ldr(rscratch1, Address(rscratch1, Method::const_offset())); + ldrb(rscratch1, Address(rscratch1, ConstMethod::result_type_offset())); + + Label done, notBool, notByte, notChar; + + // common case first + cmpw(rscratch1, T_INT); + br(Assembler::EQ, done); + + // mask integer result to narrower return type. + cmpw(rscratch1, T_BOOLEAN); + br(Assembler::NE, notBool); + andw(result, result, 0x1); + b(done); + + bind(notBool); + cmpw(rscratch1, T_BYTE); + br(Assembler::NE, notByte); + sbfx(result, result, 0, 8); + b(done); + + bind(notByte); + cmpw(rscratch1, T_CHAR); + br(Assembler::NE, notChar); + ubfx(result, result, 0, 16); // truncate upper 16 bits + b(done); + + bind(notChar); + sbfx(result, result, 0, 16); // sign-extend short + + // Nothing to do for T_INT + bind(done); +} + +#ifndef CC_INTERP + +void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { + if (JvmtiExport::can_pop_frame()) { + Label L; + // Initiate popframe handling only if it is not already being + // processed. If the flag has the popframe_processing bit set, it + // means that this code is called *during* popframe handling - we + // don't want to reenter. + // This method is only called just after the call into the vm in + // call_VM_base, so the arg registers are available. + ldrw(rscratch1, Address(rthread, JavaThread::popframe_condition_offset())); + tstw(rscratch1, JavaThread::popframe_pending_bit); + br(Assembler::EQ, L); + tstw(rscratch1, JavaThread::popframe_processing_bit); + br(Assembler::NE, L); + // Call Interpreter::remove_activation_preserving_args_entry() to get the + // address of the same-named entrypoint in the generated interpreter code. + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + br(r0); + bind(L); + } +} + + +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { + ldr(r2, Address(rthread, JavaThread::jvmti_thread_state_offset())); + const Address tos_addr(r2, JvmtiThreadState::earlyret_tos_offset()); + const Address oop_addr(r2, JvmtiThreadState::earlyret_oop_offset()); + const Address val_addr(r2, JvmtiThreadState::earlyret_value_offset()); + switch (state) { + case atos: ldr(r0, oop_addr); + str(zr, oop_addr); + verify_oop(r0, state); break; + case ltos: ldr(r0, val_addr); break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: ldrw(r0, val_addr); break; + case ftos: ldrs(v0, val_addr); break; + case dtos: ldrd(v0, val_addr); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + // Clean up tos value in the thread object + movw(rscratch1, (int) ilgl); + strw(rscratch1, tos_addr); + strw(zr, val_addr); +} + + +void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { + if (JvmtiExport::can_force_early_return()) { + Label L; + ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset())); + cbz(rscratch1, L); // if (thread->jvmti_thread_state() == NULL) exit; + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + ldrw(rscratch1, Address(rscratch1, JvmtiThreadState::earlyret_state_offset())); + cmpw(rscratch1, JvmtiThreadState::earlyret_pending); + br(Assembler::NE, L); + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code. + ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset())); + ldrw(rscratch1, Address(rscratch1, JvmtiThreadState::earlyret_tos_offset())); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), rscratch1); + br(r0); + bind(L); + } +} + +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp( + Register reg, + int bcp_offset) { + assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); + ldrh(reg, Address(rbcp, bcp_offset)); + rev16(reg, reg); +} + +void InterpreterMacroAssembler::get_dispatch() { + unsigned long offset; + adrp(rdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset); + lea(rdispatch, Address(rdispatch, offset)); +} + +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, + int bcp_offset, + size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (index_size == sizeof(u2)) { + load_unsigned_short(index, Address(rbcp, bcp_offset)); + } else if (index_size == sizeof(u4)) { + assert(EnableInvokeDynamic, "giant index used only for JSR 292"); + ldrw(index, Address(rbcp, bcp_offset)); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + eonw(index, index, zr); // convert to plain index + } else if (index_size == sizeof(u1)) { + load_unsigned_byte(index, Address(rbcp, bcp_offset)); + } else { + ShouldNotReachHere(); + } +} + +// Return +// Rindex: index into constant pool +// Rcache: address of cache entry - ConstantPoolCache::base_offset() +// +// A caller must add ConstantPoolCache::base_offset() to Rcache to get +// the true address of the cache entry. +// +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, + Register index, + int bcp_offset, + size_t index_size) { + assert_different_registers(cache, index); + assert_different_registers(cache, rcpool); + get_cache_index_at_bcp(index, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // convert from field index to ConstantPoolCacheEntry + // aarch64 already has the cache in rcpool so there is no need to + // install it in cache. instead we pre-add the indexed offset to + // rcpool and return it in cache. All clients of this method need to + // be modified accordingly. + add(cache, rcpool, index, Assembler::LSL, 5); +} + + +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, + Register index, + Register bytecode, + int byte_no, + int bcp_offset, + size_t index_size) { + get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); + // We use a 32-bit load here since the layout of 64-bit words on + // little-endian machines allow us that. + // n.b. unlike x86 cache already includes the index offset + lea(bytecode, Address(cache, + ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::indices_offset())); + ldarw(bytecode, bytecode); + const int shift_count = (1 + byte_no) * BitsPerByte; + ubfx(bytecode, bytecode, shift_count, BitsPerByte); +} + +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, + Register tmp, + int bcp_offset, + size_t index_size) { + assert(cache != tmp, "must use different register"); + get_cache_index_at_bcp(tmp, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset + assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); + ldr(cache, Address(rfp, frame::interpreter_frame_cache_offset * wordSize)); + // skip past the header + add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); + add(cache, cache, tmp, Assembler::LSL, 2 + LogBytesPerWord); // construct pointer to cache entry +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register mcs, Label& skip) { + Label has_counters; + ldr(mcs, Address(method, Method::method_counters_offset())); + cbnz(mcs, has_counters); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), method); + ldr(mcs, Address(method, Method::method_counters_offset())); + cbz(mcs, skip); // No MethodCounters allocated, OutOfMemory + bind(has_counters); +} + +// Load object from cpool->resolved_references(index) +void InterpreterMacroAssembler::load_resolved_reference_at_index( + Register result, Register index) { + assert_different_registers(result, index); + // convert from field index to resolved_references() index and from + // word index to byte offset. Since this is a java object, it can be compressed + Register tmp = index; // reuse + lslw(tmp, tmp, LogBytesPerHeapOop); + + get_constant_pool(result); + // load pointer for resolved_references[] objArray + ldr(result, Address(result, ConstantPool::resolved_references_offset_in_bytes())); + // JNIHandles::resolve(obj); + ldr(result, Address(result, 0)); + // Add in the index + add(result, result, tmp); + load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); +} + +// Generate a subtype check: branch to ok_is_subtype if sub_klass is a +// subtype of super_klass. +// +// Args: +// r0: superklass +// Rsub_klass: subklass +// +// Kills: +// r2, r5 +void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, + Label& ok_is_subtype) { + assert(Rsub_klass != r0, "r0 holds superklass"); + assert(Rsub_klass != r2, "r2 holds 2ndary super array length"); + assert(Rsub_klass != r5, "r5 holds 2ndary super array scan ptr"); + + // Profile the not-null value's klass. + profile_typecheck(r2, Rsub_klass, r5); // blows r2, reloads r5 + + // Do the check. + check_klass_subtype(Rsub_klass, r0, r2, ok_is_subtype); // blows r2 + + // Profile the failure of the check. + profile_typecheck_failed(r2); // blows r2 +} + +// Java Expression Stack + +void InterpreterMacroAssembler::pop_ptr(Register r) { + ldr(r, post(esp, wordSize)); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + ldrw(r, post(esp, wordSize)); +} + +void InterpreterMacroAssembler::pop_l(Register r) { + ldr(r, post(esp, 2 * Interpreter::stackElementSize)); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + str(r, pre(esp, -wordSize)); + } + +void InterpreterMacroAssembler::push_i(Register r) { + str(r, pre(esp, -wordSize)); +} + +void InterpreterMacroAssembler::push_l(Register r) { + str(r, pre(esp, 2 * -wordSize)); +} + +void InterpreterMacroAssembler::pop_f(FloatRegister r) { + ldrs(r, post(esp, wordSize)); +} + +void InterpreterMacroAssembler::pop_d(FloatRegister r) { + ldrd(r, post(esp, 2 * Interpreter::stackElementSize)); +} + +void InterpreterMacroAssembler::push_f(FloatRegister r) { + strs(r, pre(esp, -wordSize)); +} + +void InterpreterMacroAssembler::push_d(FloatRegister r) { + strd(r, pre(esp, 2* -wordSize)); +} + +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: pop_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: pop_i(); break; + case ltos: pop_l(); break; + case ftos: pop_f(); break; + case dtos: pop_d(); break; + case vtos: /* nothing to do */ break; + default: ShouldNotReachHere(); + } + verify_oop(r0, state); +} + +void InterpreterMacroAssembler::push(TosState state) { + verify_oop(r0, state); + switch (state) { + case atos: push_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: push_i(); break; + case ltos: push_l(); break; + case ftos: push_f(); break; + case dtos: push_d(); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } +} + +// Helpers for swap and dup +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + ldr(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); +} + +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + str(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); +} + + +void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { + // set sender sp + mov(r13, sp); + // record last_sp + str(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); +} + +// Jump to from_interpreted entry of a call unless single stepping is possible +// in this thread in which case we must call the i2i entry +void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { + prepare_to_jump_from_interpreted(); + + if (JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + // interp_only is an int, on little endian it is sufficient to test the byte only + // Is a cmpl faster? + ldr(rscratch1, Address(rthread, JavaThread::interp_only_mode_offset())); + cbz(rscratch1, run_compiled_code); + ldr(rscratch1, Address(method, Method::interpreter_entry_offset())); + br(rscratch1); + bind(run_compiled_code); + } + + ldr(rscratch1, Address(method, Method::from_interpreted_offset())); + br(rscratch1); +} + +// The following two routines provide a hook so that an implementation +// can schedule the dispatch in two parts. amd64 does not do this. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { +} + +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { + dispatch_next(state, step); +} + +void InterpreterMacroAssembler::dispatch_base(TosState state, + address* table, + bool verifyoop) { + if (VerifyActivationFrameSize) { + Unimplemented(); + } + if (verifyoop) { + verify_oop(r0, state); + } + if (table == Interpreter::dispatch_table(state)) { + addw(rscratch2, rscratch1, Interpreter::distance_from_dispatch_table(state)); + ldr(rscratch2, Address(rdispatch, rscratch2, Address::uxtw(3))); + } else { + mov(rscratch2, (address)table); + ldr(rscratch2, Address(rscratch2, rscratch1, Address::uxtw(3))); + } + br(rscratch2); +} + +void InterpreterMacroAssembler::dispatch_only(TosState state) { + dispatch_base(state, Interpreter::dispatch_table(state)); +} + +void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { + dispatch_base(state, Interpreter::normal_table(state)); +} + +void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { + dispatch_base(state, Interpreter::normal_table(state), false); +} + + +void InterpreterMacroAssembler::dispatch_next(TosState state, int step) { + // load next bytecode + ldrb(rscratch1, Address(pre(rbcp, step))); + dispatch_base(state, Interpreter::dispatch_table(state)); +} + +void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { + // load current bytecode + ldrb(rscratch1, Address(rbcp, 0)); + dispatch_base(state, table); +} + +// remove activation +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +void InterpreterMacroAssembler::remove_activation( + TosState state, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { + // Note: Registers r3 xmm0 may be in use for the + // result check if synchronized method + Label unlocked, unlock, no_unlock; + + // get the value of _do_not_unlock_if_synchronized into r3 + const Address do_not_unlock_if_synchronized(rthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + ldrb(r3, do_not_unlock_if_synchronized); + strb(zr, do_not_unlock_if_synchronized); // reset the flag + + // get method access flags + ldr(r1, Address(rfp, frame::interpreter_frame_method_offset * wordSize)); + ldr(r2, Address(r1, Method::access_flags_offset())); + tst(r2, JVM_ACC_SYNCHRONIZED); + br(Assembler::EQ, unlocked); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag + // is set. + cbnz(r3, no_unlock); + + // unlock monitor + push(state); // save result + + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object has + // not been unlocked by an explicit monitorexit bytecode. + const Address monitor(rfp, frame::interpreter_frame_initial_sp_offset * + wordSize - (int) sizeof(BasicObjectLock)); + // We use c_rarg1 so that if we go slow path it will be the correct + // register for unlock_object to pass to VM directly + lea(c_rarg1, monitor); // address of first monitor + + ldr(r0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + cbnz(r0, unlock); + + pop(state); + if (throw_monitor_exception) { + // Entry already unlocked, need to throw exception + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Monitor already unlocked during a stack unroll. If requested, + // install an illegal_monitor_state_exception. Continue with + // stack unrolling. + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + } + b(unlocked); + } + + bind(unlock); + unlock_object(c_rarg1); + pop(state); + + // Check that for block-structured locking (i.e., that all locked + // objects has been unlocked) + bind(unlocked); + + // r0: Might contain return value + + // Check that all monitors are unlocked + { + Label loop, exception, entry, restart; + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + const Address monitor_block_top( + rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + const Address monitor_block_bot( + rfp, frame::interpreter_frame_initial_sp_offset * wordSize); + + bind(restart); + // We use c_rarg1 so that if we go slow path it will be the correct + // register for unlock_object to pass to VM directly + ldr(c_rarg1, monitor_block_top); // points to current entry, starting + // with top-most entry + lea(r19, monitor_block_bot); // points to word before bottom of + // monitor block + b(entry); + + // Entry already locked, need to throw exception + bind(exception); + + if (throw_monitor_exception) { + // Throw exception + MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime:: + throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Stack unrolling. Unlock object and install illegal_monitor_exception. + // Unlock does not block, so don't have to worry about the frame. + // We don't have to preserve c_rarg1 since we are going to throw an exception. + + push(state); + unlock_object(c_rarg1); + pop(state); + + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + new_illegal_monitor_state_exception)); + } + + b(restart); + } + + bind(loop); + // check if current entry is used + ldr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + cbnz(rscratch1, exception); + + add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry + bind(entry); + cmp(c_rarg1, r19); // check if bottom reached + br(Assembler::NE, loop); // if not at bottom then check this entry + } + + bind(no_unlock); + + // jvmti support + if (notify_jvmdi) { + notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + } else { + notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA + } + + // remove activation + // get sender esp + ldr(esp, + Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); + // remove frame anchor + leave(); + // If we're returning to interpreted code we will shortly be + // adjusting SP to allow some space for ESP. If we're returning to + // compiled code the saved sender SP was saved in sender_sp, so this + // restores it. + andr(sp, esp, -16); +} + +#endif // C_INTERP + +// Lock object +// +// Args: +// c_rarg1: BasicObjectLock to be used for locking +// +// Kills: +// r0 +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs) +// rscratch1, rscratch2 (scratch regs) +void InterpreterMacroAssembler::lock_object(Register lock_reg) +{ + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); + if (UseHeavyMonitors) { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + } else { + Label done; + + const Register swap_reg = r0; + const Register tmp = c_rarg2; + const Register obj_reg = c_rarg3; // Will contain the oop + + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + + BasicLock::displaced_header_offset_in_bytes(); + + Label slow_case; + + // Load object pointer into obj_reg %c_rarg3 + ldr(obj_reg, Address(lock_reg, obj_offset)); + + if (UseBiasedLocking) { + biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); + } + + // Load (object->mark() | 1) into swap_reg + ldr(rscratch1, Address(obj_reg, 0)); + orr(swap_reg, rscratch1, 1); + + // Save (object->mark() | 1) into BasicLock's displaced header + str(swap_reg, Address(lock_reg, mark_offset)); + + assert(lock_offset == 0, + "displached header must be first word in BasicObjectLock"); + + Label fail; + if (PrintBiasedLockingStatistics) { + Label fast; + cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail); + bind(fast); + atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), + rscratch2, rscratch1, tmp); + b(done); + bind(fail); + } else { + cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL); + } + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 7) == 0, and + // 2) rsp <= mark < mark + os::pagesize() + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - rsp) & (7 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 3 bits clear. + // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg + // NOTE2: aarch64 does not like to subtract sp from rn so take a + // copy + mov(rscratch1, sp); + sub(swap_reg, swap_reg, rscratch1); + ands(swap_reg, swap_reg, (unsigned long)(7 - os::vm_page_size())); + + // Save the test result, for recursive case, the result is zero + str(swap_reg, Address(lock_reg, mark_offset)); + + if (PrintBiasedLockingStatistics) { + br(Assembler::NE, slow_case); + atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), + rscratch2, rscratch1, tmp); + } + br(Assembler::EQ, done); + + bind(slow_case); + + // Call the runtime routine for slow case + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + + bind(done); + } +} + + +// Unlocks an object. Used in monitorexit bytecode and +// remove_activation. Throws an IllegalMonitorException if object is +// not locked by current thread. +// +// Args: +// c_rarg1: BasicObjectLock for lock +// +// Kills: +// r0 +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs) +// rscratch1, rscratch2 (scratch regs) +void InterpreterMacroAssembler::unlock_object(Register lock_reg) +{ + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); + + if (UseHeavyMonitors) { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + lock_reg); + } else { + Label done; + + const Register swap_reg = r0; + const Register header_reg = c_rarg2; // Will contain the old oopMark + const Register obj_reg = c_rarg3; // Will contain the oop + + save_bcp(); // Save in case of exception + + // Convert from BasicObjectLock structure to object and BasicLock + // structure Store the BasicLock address into %r0 + lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + + // Load oop into obj_reg(%c_rarg3) + ldr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + + // Free entry + str(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + + if (UseBiasedLocking) { + biased_locking_exit(obj_reg, header_reg, done); + } + + // Load the old header from BasicLock structure + ldr(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); + + // Test for recursion + cbz(header_reg, done); + + // Atomic swap back the old header + cmpxchgptr(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL); + + // Call the runtime routine for slow case. + str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + lock_reg); + + bind(done); + + restore_bcp(); + } +} + +#ifndef CC_INTERP + +void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, + Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ldr(mdp, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize)); + cbz(mdp, zero_continue); +} + +// Set the method data pointer for the current bcp. +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; + stp(r0, r1, Address(pre(sp, -2 * wordSize))); + + // Test MDO to avoid the call if it is NULL. + ldr(r0, Address(rmethod, in_bytes(Method::method_data_offset()))); + cbz(r0, set_mdp); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), rmethod, rbcp); + // r0: mdi + // mdo is guaranteed to be non-zero here, we checked for it before the call. + ldr(r1, Address(rmethod, in_bytes(Method::method_data_offset()))); + lea(r1, Address(r1, in_bytes(MethodData::data_offset()))); + add(r0, r1, r0); + str(r0, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize)); + bind(set_mdp); + ldp(r0, r1, Address(post(sp, 2 * wordSize))); +} + +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + Label verify_continue; + stp(r0, r1, Address(pre(sp, -2 * wordSize))); + stp(r2, r3, Address(pre(sp, -2 * wordSize))); + test_method_data_pointer(r3, verify_continue); // If mdp is zero, continue + get_method(r1); + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + ldrsh(r2, Address(r3, in_bytes(DataLayout::bci_offset()))); + ldr(rscratch1, Address(r1, Method::const_offset())); + add(r2, r2, rscratch1, Assembler::LSL); + lea(r2, Address(r2, ConstMethod::codes_offset())); + cmp(r2, rbcp); + br(Assembler::EQ, verify_continue); + // r1: method + // rbcp: bcp // rbcp == 22 + // r3: mdp + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), + r1, rbcp, r3); + bind(verify_continue); + ldp(r2, r3, Address(post(sp, 2 * wordSize))); + ldp(r0, r1, Address(post(sp, 2 * wordSize))); +#endif // ASSERT +} + + +void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, + int constant, + Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address data(mdp_in, constant); + str(value, data); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + int constant, + bool decrement) { + increment_mdp_data_at(mdp_in, noreg, constant, decrement); +} + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + Register reg, + int constant, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + // %%% this does 64bit counters at best it is wasting space + // at worst it is a rare bug when counters overflow + + assert_different_registers(rscratch2, rscratch1, mdp_in, reg); + + Address addr1(mdp_in, constant); + Address addr2(rscratch2, reg, Address::lsl(0)); + Address &addr = addr1; + if (reg != noreg) { + lea(rscratch2, addr1); + addr = addr2; + } + + if (decrement) { + // Decrement the register. Set condition codes. + // Intel does this + // addptr(data, (int32_t) -DataLayout::counter_increment); + // If the decrement causes the counter to overflow, stay negative + // Label L; + // jcc(Assembler::negative, L); + // addptr(data, (int32_t) DataLayout::counter_increment); + // so we do this + ldr(rscratch1, addr); + subs(rscratch1, rscratch1, (unsigned)DataLayout::counter_increment); + Label L; + br(Assembler::LO, L); // skip store if counter overflow + str(rscratch1, addr); + bind(L); + } else { + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + // Intel does this + // Increment the register. Set carry flag. + // addptr(data, DataLayout::counter_increment); + // If the increment causes the counter to overflow, pull back by 1. + // sbbptr(data, (int32_t)0); + // so we do this + ldr(rscratch1, addr); + adds(rscratch1, rscratch1, DataLayout::counter_increment); + Label L; + br(Assembler::CS, L); // skip store if counter overflow + str(rscratch1, addr); + bind(L); + } +} + +void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, + int flag_byte_constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + int header_offset = in_bytes(DataLayout::header_offset()); + int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); + // Set the flag + ldr(rscratch1, Address(mdp_in, header_offset)); + orr(rscratch1, rscratch1, header_bits); + str(rscratch1, Address(mdp_in, header_offset)); +} + + +void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, + int offset, + Register value, + Register test_value_out, + Label& not_equal_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if (test_value_out == noreg) { + ldr(rscratch1, Address(mdp_in, offset)); + cmp(value, rscratch1); + } else { + // Put the test value into a register, so caller can use it: + ldr(test_value_out, Address(mdp_in, offset)); + cmp(value, test_value_out); + } + br(Assembler::NE, not_equal_continue); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ldr(rscratch1, Address(mdp_in, offset_of_disp)); + add(mdp_in, mdp_in, rscratch1, LSL); + str(mdp_in, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + Register reg, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + lea(rscratch1, Address(mdp_in, offset_of_disp)); + ldr(rscratch1, Address(rscratch1, reg, Address::lsl(0))); + add(mdp_in, mdp_in, rscratch1, LSL); + str(mdp_in, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, + int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + add(mdp_in, mdp_in, (unsigned)constant); + str(mdp_in, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + // save/restore across call_VM + stp(zr, return_bci, Address(pre(sp, -2 * wordSize))); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), + return_bci); + ldp(zr, return_bci, Address(post(sp, 2 * wordSize))); +} + + +void InterpreterMacroAssembler::profile_taken_branch(Register mdp, + Register bumped_count) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + // Otherwise, assign to mdp + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the taken count. + // We inline increment_mdp_data_at to return bumped_count in a register + //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); + Address data(mdp, in_bytes(JumpData::taken_offset())); + ldr(bumped_count, data); + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + // Intel does this to catch overflow + // addptr(bumped_count, DataLayout::counter_increment); + // sbbptr(bumped_count, 0); + // so we do this + adds(bumped_count, bumped_count, DataLayout::counter_increment); + Label L; + br(Assembler::CS, L); // skip store if counter overflow + str(bumped_count, data); + bind(L); + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); + + // The method data pointer needs to be updated to correspond to + // the next bytecode + update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_final_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_virtual_call(Register receiver, + Register mdp, + Register reg2, + bool receiver_can_be_null) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label skip_receiver_profile; + if (receiver_can_be_null) { + Label not_null; + // We are making a call. Increment the count for null receiver. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + b(skip_receiver_profile); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, mdp, reg2, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + +// This routine creates a state machine for updating the multi-row +// type profile at a virtual call site (or other type-sensitive bytecode). +// The machine visits each row (of receiver/count) until the receiver type +// is found, or until it runs out of rows. At the same time, it remembers +// the location of the first empty row. (An empty row records null for its +// receiver, and can be allocated for a newly-observed receiver type.) +// Because there are two degrees of freedom in the state, a simple linear +// search will not work; it must be a decision tree. Hence this helper +// function is recursive, to generate the required tree structured code. +// It's the interpreter, so we are trading off code space for speed. +// See below for example code. +void InterpreterMacroAssembler::record_klass_in_profile_helper( + Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) { + if (is_virtual_call) { + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + } + return; + } + + int last_row = VirtualCallData::row_limit() - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the receiver and for null. + // Take any of three different outcomes: + // 1. found receiver => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + Label next_test; + bool test_for_null_also = (row == start_row); + + // See if the receiver is receiver[n]. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); + test_mdp_data_at(mdp, recvr_offset, receiver, + (test_for_null_also ? reg2 : noreg), + next_test); + // (Reg2 now contains the receiver from the CallData.) + + // The receiver is receiver[n]. Increment count[n]. + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); + increment_mdp_data_at(mdp, count_offset); + b(done); + bind(next_test); + + if (test_for_null_also) { + Label found_null; + // Failed the equality check on receiver[n]... Test for null. + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (is_virtual_call) { + cbz(reg2, found_null); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + b(done); + bind(found_null); + } else { + cbnz(reg2, done); + } + break; + } + // Since null is rare, make it be the branch-taken case. + cbz(reg2, found_null); + + // Put all the "Case 3" tests here. + record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); + + // Found a null. Keep searching for a matching receiver, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching receiver, but we + // observed the receiver[start_row] is NULL. + + // Fill in the receiver field and increment the count. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); + set_mdp_data_at(mdp, recvr_offset, receiver); + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); + mov(reg2, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg2); + if (start_row > 0) { + b(done); + } +} + +// Example state machine code for three profile rows: +// // main copy of decision tree, rooted at row[1] +// if (row[0].rec == rec) { row[0].incr(); goto done; } +// if (row[0].rec != NULL) { +// // inner copy of decision tree, rooted at row[1] +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[1].rec != NULL) { +// // degenerate decision tree, rooted at row[2] +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// if (row[2].rec != NULL) { count.incr(); goto done; } // overflow +// row[2].init(rec); goto done; +// } else { +// // remember row[1] is empty +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[1].init(rec); goto done; +// } +// } else { +// // remember row[0] is empty +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[0].init(rec); goto done; +// } +// done: + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register mdp, Register reg2, + bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + Label done; + + record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); + + bind (done); +} + +void InterpreterMacroAssembler::profile_ret(Register return_bci, + Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + uint row; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the total ret count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + for (row = 0; row < RetData::row_limit(); row++) { + Label next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(mdp, + in_bytes(RetData::bci_offset(row)), + return_bci, noreg, + next_test); + + // return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, + in_bytes(RetData::bci_displacement_offset(row))); + b(profile_continue); + bind(next_test); + } + + update_mdp_for_ret(return_bci); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_null_seen(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(mdp, count_offset, true); + + bind (profile_continue); + } +} + +void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + + // Record the object type. + record_klass_in_profile(klass, mdp, reg2, false); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_switch_default(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the default case count + increment_mdp_data_at(mdp, + in_bytes(MultiBranchData::default_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + in_bytes(MultiBranchData:: + default_displacement_offset())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_switch_case(Register index, + Register mdp, + Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + + // case_array_offset_in_bytes() + movw(reg2, in_bytes(MultiBranchData::per_case_size())); + movw(rscratch1, in_bytes(MultiBranchData::case_array_offset())); + Assembler::maddw(index, index, reg2, rscratch1); + + // Update the case count + increment_mdp_data_at(mdp, + index, + in_bytes(MultiBranchData::relative_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + index, + in_bytes(MultiBranchData:: + relative_displacement_offset())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { + if (state == atos) { + MacroAssembler::verify_oop(reg); + } +} + +void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; } +#endif // !CC_INTERP + + +void InterpreterMacroAssembler::notify_method_entry() { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (JvmtiExport::can_post_interpreter_events()) { + Label L; + ldrw(r3, Address(rthread, JavaThread::interp_only_mode_offset())); + cbzw(r3, L); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_method_entry)); + bind(L); + } + + { + SkipIfEqual skip(this, &DTraceMethodProbes, false); + get_method(c_rarg1); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + rthread, c_rarg1); + } + + // RedefineClasses() tracing support for obsolete method entry + if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { + get_method(c_rarg1); + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + rthread, c_rarg1); + } + + } + + +void InterpreterMacroAssembler::notify_method_exit( + TosState state, NotifyMethodExitMode mode) { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { + Label L; + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. If this + // is changed then the interpreter_frame_result implementation will + // need to be updated too. + + // For c++ interpreter the result is always stored at a known location in the frame + // template interpreter will leave it on the top of the stack. + NOT_CC_INTERP(push(state);) + ldrw(r3, Address(rthread, JavaThread::interp_only_mode_offset())); + cbz(r3, L); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + bind(L); + NOT_CC_INTERP(pop(state)); + } + + { + SkipIfEqual skip(this, &DTraceMethodProbes, false); + NOT_CC_INTERP(push(state)); + get_method(c_rarg1); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + rthread, c_rarg1); + NOT_CC_INTERP(pop(state)); + } +} + + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, int mask, + Register scratch, Register scratch2, + bool preloaded, + Condition cond, Label* where) { + if (!preloaded) { + ldrw(scratch, counter_addr); + } + add(scratch, scratch, increment); + strw(scratch, counter_addr); + if (operand_valid_for_logical_immediate(/*is32*/true, mask)) { + andsw(scratch, scratch, mask); + } else { + movw(scratch2, (unsigned)mask); + andsw(scratch, scratch, scratch2); + } + br(cond, *where); +} + +void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments) { + // interpreter specific + // + // Note: No need to save/restore rbcp & rlocals pointer since these + // are callee saved registers and no blocking/ GC can happen + // in leaf calls. +#ifdef ASSERT + { + Label L; + ldr(rscratch1, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + cbz(rscratch1, L); + stop("InterpreterMacroAssembler::call_VM_leaf_base:" + " last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); +} + +void InterpreterMacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // interpreter specific + // + // Note: Could avoid restoring locals ptr (callee saved) - however doesn't + // really make a difference for these runtime calls, since they are + // slow anyway. Btw., bcp must be saved/restored since it may change + // due to GC. + // assert(java_thread == noreg , "not expecting a precomputed java thread"); + save_bcp(); +#ifdef ASSERT + { + Label L; + ldr(rscratch1, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + cbz(rscratch1, L); + stop("InterpreterMacroAssembler::call_VM_leaf_base:" + " last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp, + entry_point, number_of_arguments, + check_exceptions); +// interpreter specific + restore_bcp(); + restore_locals(); +} + +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { + Label update, next, none; + + verify_oop(obj); + + cbnz(obj, update); + orptr(mdo_addr, TypeEntries::null_seen); + b(next); + + bind(update); + load_klass(obj, obj); + + ldr(rscratch1, mdo_addr); + eor(obj, obj, rscratch1); + tst(obj, TypeEntries::type_klass_mask); + br(Assembler::EQ, next); // klass seen before, nothing to + // do. The unknown bit may have been + // set already but no need to check. + + tst(obj, TypeEntries::type_unknown); + br(Assembler::NE, next); // already unknown. Nothing to do anymore. + + ldr(rscratch1, mdo_addr); + cbz(rscratch1, none); + cmp(rscratch1, TypeEntries::null_seen); + br(Assembler::EQ, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + ldr(rscratch1, mdo_addr); + eor(obj, obj, rscratch1); + tst(obj, TypeEntries::type_klass_mask); + br(Assembler::EQ, next); + + // different than before. Cannot keep accurate profile. + orptr(mdo_addr, TypeEntries::type_unknown); + b(next); + + bind(none); + // first time here. Set profile type. + str(obj, mdo_addr); + + bind(next); +} + +void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + ldrb(rscratch1, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start)); + cmp(rscratch1, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); + br(Assembler::NE, profile_continue); + + if (MethodData::profile_arguments()) { + Label done; + int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); + + for (int i = 0; i < TypeProfileArgsLimit; i++) { + if (i > 0 || MethodData::profile_return()) { + // If return value type is profiled we may have no argument to profile + ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); + sub(tmp, tmp, i*TypeStackSlotEntries::per_arg_count()); + cmp(tmp, TypeStackSlotEntries::per_arg_count()); + add(rscratch1, mdp, off_to_args); + br(Assembler::LT, done); + } + ldr(tmp, Address(callee, Method::const_offset())); + load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset())); + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list + ldr(rscratch1, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i)))); + sub(tmp, tmp, rscratch1); + sub(tmp, tmp, 1); + Address arg_addr = argument_address(tmp); + ldr(tmp, arg_addr); + + Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))); + profile_obj_type(tmp, mdo_arg_addr); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + off_to_args += to_add; + } + + if (MethodData::profile_return()) { + ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); + sub(tmp, tmp, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); + } + + add(rscratch1, mdp, off_to_args); + bind(done); + mov(mdp, rscratch1); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + add(mdp, mdp, tmp, LSL, exact_log2(DataLayout::cell_size)); + } + str(mdp, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize)); + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { + assert_different_registers(mdp, ret, tmp, rbcp); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // begining of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length + Label do_profile; + ldrb(rscratch1, Address(rbcp, 0)); + cmp(rscratch1, Bytecodes::_invokedynamic); + br(Assembler::EQ, do_profile); + cmp(rscratch1, Bytecodes::_invokehandle); + br(Assembler::EQ, do_profile); + get_method(tmp); + ldrb(rscratch1, Address(tmp, Method::intrinsic_id_offset_in_bytes())); + cmp(rscratch1, vmIntrinsics::_compiledLambdaForm); + br(Assembler::NE, profile_continue); + + bind(do_profile); + } + + Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + mov(tmp, ret); + profile_obj_type(tmp, mdo_ret_addr); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters + ldr(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()))); + cmp(tmp1, 0u); + br(Assembler::LT, profile_continue); + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + add(mdp, mdp, tmp1); + ldr(tmp1, Address(mdp, ArrayData::array_len_offset())); + sub(tmp1, tmp1, TypeStackSlotEntries::per_arg_count()); + + Label loop; + bind(loop); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + int per_arg_scale = exact_log2(DataLayout::cell_size); + add(rscratch1, mdp, off_base); + add(rscratch2, mdp, type_base); + + Address arg_off(rscratch1, tmp1, Address::lsl(per_arg_scale)); + Address arg_type(rscratch2, tmp1, Address::lsl(per_arg_scale)); + + // load offset on the stack from the slot for this parameter + ldr(tmp2, arg_off); + neg(tmp2, tmp2); + // read the parameter from the local area + ldr(tmp2, Address(rlocals, tmp2, Address::lsl(Interpreter::logStackElementSize))); + + // profile the parameter + profile_obj_type(tmp2, arg_type); + + // go to next parameter + subs(tmp1, tmp1, TypeStackSlotEntries::per_arg_count()); + br(Assembler::GE, loop); + + bind(profile_continue); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/interp_masm_aarch64.hpp 2021-01-25 19:31:44.793561156 +0000 @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2003, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_INTERP_MASM_AARCH64_64_HPP +#define CPU_AARCH64_VM_INTERP_MASM_AARCH64_64_HPP + +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/invocationCounter.hpp" +#include "runtime/frame.hpp" + +// This file specializes the assember with interpreter-specific macros + + +class InterpreterMacroAssembler: public MacroAssembler { +#ifndef CC_INTERP + protected: + + protected: + // Interpreter specific version of call_VM_base + using MacroAssembler::call_VM_leaf_base; + + virtual void call_VM_leaf_base(address entry_point, + int number_of_arguments); + + virtual void call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions); + + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + // base routine for all dispatches + void dispatch_base(TosState state, address* table, bool verifyoop = true); +#endif // CC_INTERP + + public: + InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {} + + void load_earlyret_value(TosState state); + +#ifdef CC_INTERP + void save_bcp() { /* not needed in c++ interpreter and harmless */ } + void restore_bcp() { /* not needed in c++ interpreter and harmless */ } + + // Helpers for runtime call arguments/results + void get_method(Register reg); + +#else + + // Interpreter-specific registers + void save_bcp() { + str(rbcp, Address(rfp, frame::interpreter_frame_bcx_offset * wordSize)); + } + + void restore_bcp() { + ldr(rbcp, Address(rfp, frame::interpreter_frame_bcx_offset * wordSize)); + } + + void restore_locals() { + ldr(rlocals, Address(rfp, frame::interpreter_frame_locals_offset * wordSize)); + } + + void restore_constant_pool_cache() { + ldr(rcpool, Address(rfp, frame::interpreter_frame_cache_offset * wordSize)); + } + + void get_dispatch(); + + // Helpers for runtime call arguments/results + + // Helpers for runtime call arguments/results + void get_method(Register reg) { + ldr(reg, Address(rfp, frame::interpreter_frame_method_offset * wordSize)); + } + + void get_const(Register reg) { + get_method(reg); + ldr(reg, Address(reg, in_bytes(Method::const_offset()))); + } + + void get_constant_pool(Register reg) { + get_const(reg); + ldr(reg, Address(reg, in_bytes(ConstMethod::constants_offset()))); + } + + void get_constant_pool_cache(Register reg) { + get_constant_pool(reg); + ldr(reg, Address(reg, ConstantPool::cache_offset_in_bytes())); + } + + void get_cpool_and_tags(Register cpool, Register tags) { + get_constant_pool(cpool); + ldr(tags, Address(cpool, ConstantPool::tags_offset_in_bytes())); + } + + void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); + void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_method_counters(Register method, Register mcs, Label& skip); + + // load cpool->resolved_references(index); + void load_resolved_reference_at_index(Register result, Register index); + + void pop_ptr(Register r = r0); + void pop_i(Register r = r0); + void pop_l(Register r = r0); + void pop_f(FloatRegister r = v0); + void pop_d(FloatRegister r = v0); + void push_ptr(Register r = r0); + void push_i(Register r = r0); + void push_l(Register r = r0); + void push_f(FloatRegister r = v0); + void push_d(FloatRegister r = v0); + + void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } + + void push(Register r ) { ((MacroAssembler*)this)->push(r); } + + void pop(TosState state); // transition vtos -> state + void push(TosState state); // transition state -> vtos + + void pop(RegSet regs, Register stack) { ((MacroAssembler*)this)->pop(regs, stack); } + void push(RegSet regs, Register stack) { ((MacroAssembler*)this)->push(regs, stack); } + + void empty_expression_stack() { + ldr(esp, Address(rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); + // NULL last_sp until next java call + str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + } + + // Helpers for swap and dup + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + + // Generate a subtype check: branch to ok_is_subtype if sub_klass is + // a subtype of super_klass. + void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + + // Dispatching + void dispatch_prolog(TosState state, int step = 0); + void dispatch_epilog(TosState state, int step = 0); + // dispatch via rscratch1 + void dispatch_only(TosState state); + // dispatch normal table via rscratch1 (assume rscratch1 is loaded already) + void dispatch_only_normal(TosState state); + void dispatch_only_noverify(TosState state); + // load rscratch1 from [rbcp + step] and dispatch via rscratch1 + void dispatch_next(TosState state, int step = 0); + // load rscratch1 from [esi] and dispatch via rscratch1 and table + void dispatch_via (TosState state, address* table); + + // jump to an invoked target + void prepare_to_jump_from_interpreted(); + void jump_from_interpreted(Register method, Register temp); + + + // Returning from interpreted functions + // + // Removes the current activation (incl. unlocking of monitors) + // and sets up the return address. This code is also used for + // exception unwindwing. In that case, we do not want to throw + // IllegalMonitorStateExceptions, since that might get us into an + // infinite rethrow exception loop. + // Additionally this code is used for popFrame and earlyReturn. + // In popFrame case we want to skip throwing an exception, + // installing an exception, and notifying jvmdi. + // In earlyReturn case we only want to skip throwing an exception + // and installing an exception. + void remove_activation(TosState state, + bool throw_monitor_exception = true, + bool install_monitor_exception = true, + bool notify_jvmdi = true); +#endif // CC_INTERP + + // FIXME: Give us a valid frame at a null check. + virtual void null_check(Register reg, int offset = -1) { +// #ifdef ASSERT +// save_bcp(); +// set_last_Java_frame(esp, rfp, (address) pc()); +// #endif + MacroAssembler::null_check(reg, offset); +// #ifdef ASSERT +// reset_last_Java_frame(true); +// #endif + } + + // Object locking + void lock_object (Register lock_reg); + void unlock_object(Register lock_reg); + +#ifndef CC_INTERP + + // Interpreter profiling operations + void set_method_data_pointer_for_bcp(); + void test_method_data_pointer(Register mdp, Label& zero_continue); + void verify_method_data_pointer(); + + void set_mdp_data_at(Register mdp_in, int constant, Register value); + void increment_mdp_data_at(Address data, bool decrement = false); + void increment_mdp_data_at(Register mdp_in, int constant, + bool decrement = false); + void increment_mdp_data_at(Register mdp_in, Register reg, int constant, + bool decrement = false); + void increment_mask_and_jump(Address counter_addr, + int increment, int mask, + Register scratch, Register scratch2, + bool preloaded, + Condition cond, Label* where); + void set_mdp_flag_at(Register mdp_in, int flag_constant); + void test_mdp_data_at(Register mdp_in, int offset, Register value, + Register test_value_out, + Label& not_equal_continue); + + void record_klass_in_profile(Register receiver, Register mdp, + Register reg2, bool is_virtual_call); + void record_klass_in_profile_helper(Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call); + + void update_mdp_by_offset(Register mdp_in, int offset_of_offset); + void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); + void update_mdp_by_constant(Register mdp_in, int constant); + void update_mdp_for_ret(Register return_bci); + + // narrow int return value + void narrow(Register result); + + void profile_taken_branch(Register mdp, Register bumped_count); + void profile_not_taken_branch(Register mdp); + void profile_call(Register mdp); + void profile_final_call(Register mdp); + void profile_virtual_call(Register receiver, Register mdp, + Register scratch2, + bool receiver_can_be_null = false); + void profile_ret(Register return_bci, Register mdp); + void profile_null_seen(Register mdp); + void profile_typecheck(Register mdp, Register klass, Register scratch); + void profile_typecheck_failed(Register mdp); + void profile_switch_default(Register mdp); + void profile_switch_case(Register index_in_scratch, Register mdp, + Register scratch2); + + void profile_obj_type(Register obj, const Address& mdo_addr); + void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); + void profile_return_type(Register mdp, Register ret, Register tmp); + void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); + + // Debugging + // only if +VerifyOops && state == atos + void verify_oop(Register reg, TosState state = atos); + // only if +VerifyFPU && (state == ftos || state == dtos) + void verify_FPU(int stack_depth, TosState state = ftos); + +#endif // !CC_INTERP + + typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + + // support for jvmti/dtrace + void notify_method_entry(); + void notify_method_exit(TosState state, NotifyMethodExitMode mode); + + virtual void _call_Unimplemented(address call_site) { + save_bcp(); + set_last_Java_frame(esp, rfp, (address) pc(), rscratch1); + MacroAssembler::_call_Unimplemented(call_site); + } +}; + +#endif // CPU_AARCH64_VM_INTERP_MASM_AARCH64_64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp 2021-01-25 19:31:45.221565654 +0000 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_INTERPRETERGENERATOR_AARCH64_HPP +#define CPU_AARCH64_VM_INTERPRETERGENERATOR_AARCH64_HPP + + +// Generation of Interpreter +// + friend class AbstractInterpreterGenerator; + +protected: + + void bang_stack_shadow_pages(bool native_call); + +private: + + address generate_normal_entry(bool synchronized); + address generate_native_entry(bool synchronized); + address generate_abstract_entry(void); + address generate_math_entry(AbstractInterpreter::MethodKind kind); +void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs); + address generate_empty_entry(void); + address generate_accessor_entry(void); + address generate_Reference_get_entry(); + address generate_CRC32_update_entry(); + address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); + void lock_method(void); + void generate_stack_overflow_check(void); + + void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue); + void generate_counter_overflow(Label* do_continue); + +#endif // CPU_AARCH64_VM_INTERPRETERGENERATOR_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/interpreterRT_aarch64.cpp 2021-01-25 19:31:45.636570016 +0000 @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/universe.inline.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/signature.hpp" + +#define __ _masm-> + +// Implementation of SignatureHandlerGenerator +Register InterpreterRuntime::SignatureHandlerGenerator::from() { return rlocals; } +Register InterpreterRuntime::SignatureHandlerGenerator::to() { return sp; } +Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return rscratch1; } + +void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + + switch (_num_int_args) { + case 0: + __ ldr(c_rarg1, src); + _num_int_args++; + break; + case 1: + __ ldr(c_rarg2, src); + _num_int_args++; + break; + case 2: + __ ldr(c_rarg3, src); + _num_int_args++; + break; + case 3: + __ ldr(c_rarg4, src); + _num_int_args++; + break; + case 4: + __ ldr(c_rarg5, src); + _num_int_args++; + break; + case 5: + __ ldr(c_rarg6, src); + _num_int_args++; + break; + case 6: + __ ldr(c_rarg7, src); + _num_int_args++; + break; + default: + __ ldr(r0, src); + __ str(r0, Address(to(), _stack_offset)); + _stack_offset += wordSize; + _num_int_args++; + break; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + + switch (_num_int_args) { + case 0: + __ ldr(c_rarg1, src); + _num_int_args++; + break; + case 1: + __ ldr(c_rarg2, src); + _num_int_args++; + break; + case 2: + __ ldr(c_rarg3, src); + _num_int_args++; + break; + case 3: + __ ldr(c_rarg4, src); + _num_int_args++; + break; + case 4: + __ ldr(c_rarg5, src); + _num_int_args++; + break; + case 5: + __ ldr(c_rarg6, src); + _num_int_args++; + break; + case 6: + __ ldr(c_rarg7, src); + _num_int_args++; + break; + default: + __ ldr(r0, src); + __ str(r0, Address(to(), _stack_offset)); + _stack_offset += wordSize; + _num_int_args++; + break; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + + if (_num_fp_args < Argument::n_float_register_parameters_c) { + __ ldrs(as_FloatRegister(_num_fp_args++), src); + } else { + __ ldrw(r0, src); + __ strw(r0, Address(to(), _stack_offset)); + _stack_offset += wordSize; + _num_fp_args++; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + + if (_num_fp_args < Argument::n_float_register_parameters_c) { + __ ldrd(as_FloatRegister(_num_fp_args++), src); + } else { + __ ldr(r0, src); + __ str(r0, Address(to(), _stack_offset)); + _stack_offset += wordSize; + _num_fp_args++; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { + + switch (_num_int_args) { + case 0: + assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); + __ add(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); + _num_int_args++; + break; + case 1: + { + __ add(r0, from(), Interpreter::local_offset_in_bytes(offset())); + __ mov(c_rarg2, 0); + __ ldr(temp(), r0); + Label L; + __ cbz(temp(), L); + __ mov(c_rarg2, r0); + __ bind(L); + _num_int_args++; + break; + } + case 2: + { + __ add(r0, from(), Interpreter::local_offset_in_bytes(offset())); + __ mov(c_rarg3, 0); + __ ldr(temp(), r0); + Label L; + __ cbz(temp(), L); + __ mov(c_rarg3, r0); + __ bind(L); + _num_int_args++; + break; + } + case 3: + { + __ add(r0, from(), Interpreter::local_offset_in_bytes(offset())); + __ mov(c_rarg4, 0); + __ ldr(temp(), r0); + Label L; + __ cbz(temp(), L); + __ mov(c_rarg4, r0); + __ bind(L); + _num_int_args++; + break; + } + case 4: + { + __ add(r0, from(), Interpreter::local_offset_in_bytes(offset())); + __ mov(c_rarg5, 0); + __ ldr(temp(), r0); + Label L; + __ cbz(temp(), L); + __ mov(c_rarg5, r0); + __ bind(L); + _num_int_args++; + break; + } + case 5: + { + __ add(r0, from(), Interpreter::local_offset_in_bytes(offset())); + __ mov(c_rarg6, 0); + __ ldr(temp(), r0); + Label L; + __ cbz(temp(), L); + __ mov(c_rarg6, r0); + __ bind(L); + _num_int_args++; + break; + } + case 6: + { + __ add(r0, from(), Interpreter::local_offset_in_bytes(offset())); + __ mov(c_rarg7, 0); + __ ldr(temp(), r0); + Label L; + __ cbz(temp(), L); + __ mov(c_rarg7, r0); + __ bind(L); + _num_int_args++; + break; + } + default: + { + __ add(r0, from(), Interpreter::local_offset_in_bytes(offset())); + __ ldr(temp(), r0); + Label L; + __ cbnz(temp(), L); + __ mov(r0, zr); + __ bind(L); + __ str(r0, Address(to(), _stack_offset)); + _stack_offset += wordSize; + _num_int_args++; + break; + } + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + // generate code to handle arguments + iterate(fingerprint); + + // return result handler + __ lea(r0, ExternalAddress(Interpreter::result_handler(method()->result_type()))); + __ ret(lr); + + __ flush(); +} + + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) {} + + +class SlowSignatureHandler + : public NativeSignatureIterator { + private: + address _from; + intptr_t* _to; + intptr_t* _int_args; + intptr_t* _fp_args; + intptr_t* _fp_identifiers; + unsigned int _num_int_args; + unsigned int _num_fp_args; + + virtual void pass_int() + { + jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_int_args < Argument::n_int_register_parameters_c-1) { + *_int_args++ = from_obj; + _num_int_args++; + } else { + *_to++ = from_obj; + _num_int_args++; + } + } + + virtual void pass_long() + { + intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2*Interpreter::stackElementSize; + + if (_num_int_args < Argument::n_int_register_parameters_c-1) { + *_int_args++ = from_obj; + _num_int_args++; + } else { + *_to++ = from_obj; + _num_int_args++; + } + } + + virtual void pass_object() + { + intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_int_args < Argument::n_int_register_parameters_c-1) { + *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t)from_addr; + _num_int_args++; + } else { + *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + _num_int_args++; + } + } + + virtual void pass_float() + { + jint from_obj = *(jint*)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_fp_args < Argument::n_float_register_parameters_c) { + *_fp_args++ = from_obj; + _num_fp_args++; + } else { + *_to++ = from_obj; + _num_fp_args++; + } + } + + virtual void pass_double() + { + intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2*Interpreter::stackElementSize; + + if (_num_fp_args < Argument::n_float_register_parameters_c) { + *_fp_args++ = from_obj; + *_fp_identifiers |= (1 << _num_fp_args); // mark as double + _num_fp_args++; + } else { + *_to++ = from_obj; + _num_fp_args++; + } + } + + public: + SlowSignatureHandler(methodHandle method, address from, intptr_t* to) + : NativeSignatureIterator(method) + { + _from = from; + _to = to; + + _int_args = to - (method->is_static() ? 16 : 17); + _fp_args = to - 8; + _fp_identifiers = to - 9; + *(int*) _fp_identifiers = 0; + _num_int_args = (method->is_static() ? 1 : 0); + _num_fp_args = 0; + } +}; + + +IRT_ENTRY(address, + InterpreterRuntime::slow_signature_handler(JavaThread* thread, + Method* method, + intptr_t* from, + intptr_t* to)) + methodHandle m(thread, (Method*)method); + assert(m->is_native(), "sanity check"); + + // handle arguments + SlowSignatureHandler ssh(m, (address)from, to); + ssh.iterate(UCONST64(-1)); + + // return result handler + return Interpreter::result_handler(m->result_type()); +IRT_END --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/interpreterRT_aarch64.hpp 2021-01-25 19:31:46.102574914 +0000 @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_INTERPRETERRT_AARCH64_HPP +#define CPU_AARCH64_VM_INTERPRETERRT_AARCH64_HPP + +#include "memory/allocation.hpp" + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + unsigned int _num_fp_args; + unsigned int _num_int_args; + int _stack_offset; + + void pass_int(); + void pass_long(); + void pass_float(); + void pass_double(); + void pass_object(); + + public: + // Creation + SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) { + _masm = new MacroAssembler(buffer); + _num_int_args = (method->is_static() ? 1 : 0); + _num_fp_args = 0; + _stack_offset = 0; + } + + // Code generation + void generate(uint64_t fingerprint); + + // Code generation support + static Register from(); + static Register to(); + static Register temp(); +}; + +#endif // CPU_AARCH64_VM_INTERPRETERRT_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/interpreter_aarch64.cpp 2021-01-25 19:31:46.578579917 +0000 @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2003, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif + +#define __ _masm-> + + +address AbstractInterpreterGenerator::generate_slow_signature_handler() { + address entry = __ pc(); + + __ andr(esp, esp, -16); + __ mov(c_rarg3, esp); + // rmethod + // rlocals + // c_rarg3: first stack arg - wordSize + + // adjust sp + __ sub(sp, c_rarg3, 18 * wordSize); + __ str(lr, Address(__ pre(sp, -2 * wordSize))); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::slow_signature_handler), + rmethod, rlocals, c_rarg3); + + // r0: result handler + + // Stack layout: + // rsp: return address <- sp + // 1 garbage + // 8 integer args (if static first is unused) + // 1 float/double identifiers + // 8 double args + // stack args <- esp + // garbage + // expression stack bottom + // bcp (NULL) + // ... + + // Restore LR + __ ldr(lr, Address(__ post(sp, 2 * wordSize))); + + // Do FP first so we can use c_rarg3 as temp + __ ldrw(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers + + for (int i = 0; i < Argument::n_float_register_parameters_c; i++) { + const FloatRegister r = as_FloatRegister(i); + + Label d, done; + + __ tbnz(c_rarg3, i, d); + __ ldrs(r, Address(sp, (10 + i) * wordSize)); + __ b(done); + __ bind(d); + __ ldrd(r, Address(sp, (10 + i) * wordSize)); + __ bind(done); + } + + // c_rarg0 contains the result from the call of + // InterpreterRuntime::slow_signature_handler so we don't touch it + // here. It will be loaded with the JNIEnv* later. + __ ldr(c_rarg1, Address(sp, 1 * wordSize)); + for (int i = c_rarg2->encoding(); i <= c_rarg7->encoding(); i += 2) { + Register rm = as_Register(i), rn = as_Register(i+1); + __ ldp(rm, rn, Address(sp, i * wordSize)); + } + + __ add(sp, sp, 18 * wordSize); + __ ret(lr); + + return entry; +} + + +// +// Various method entries +// + +address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { + // rmethod: Method* + // r13: sender sp + // esp: args + + if (!InlineIntrinsics) return NULL; // Generate a vanilla entry + + // These don't need a safepoint check because they aren't virtually + // callable. We won't enter these intrinsics from compiled code. + // If in the future we added an intrinsic which was virtually callable + // we'd have to worry about how to safepoint so that this code is used. + + // mathematical functions inlined by compiler + // (interpreter must provide identical implementation + // in order to avoid monotonicity bugs when switching + // from interpreter to compiler in the middle of some + // computation) + // + // stack: + // [ arg ] <-- esp + // [ arg ] + // retaddr in lr + + address entry_point = NULL; + Register continuation = lr; + switch (kind) { + case Interpreter::java_lang_math_abs: + entry_point = __ pc(); + __ ldrd(v0, Address(esp)); + __ fabsd(v0, v0); + __ mov(sp, r13); // Restore caller's SP + break; + case Interpreter::java_lang_math_sqrt: + entry_point = __ pc(); + __ ldrd(v0, Address(esp)); + __ fsqrtd(v0, v0); + __ mov(sp, r13); + break; + case Interpreter::java_lang_math_sin : + case Interpreter::java_lang_math_cos : + case Interpreter::java_lang_math_tan : + case Interpreter::java_lang_math_log : + case Interpreter::java_lang_math_log10 : + case Interpreter::java_lang_math_exp : + entry_point = __ pc(); + __ ldrd(v0, Address(esp)); + __ mov(sp, r13); + __ mov(r19, lr); + continuation = r19; // The first callee-saved register + generate_transcendental_entry(kind, 1); + break; + case Interpreter::java_lang_math_pow : + entry_point = __ pc(); + __ mov(r19, lr); + continuation = r19; + __ ldrd(v0, Address(esp, 2 * Interpreter::stackElementSize)); + __ ldrd(v1, Address(esp)); + __ mov(sp, r13); + generate_transcendental_entry(kind, 2); + break; + default: + ; + } + if (entry_point) { + __ br(continuation); + } + + return entry_point; +} + + // double trigonometrics and transcendentals + // static jdouble dsin(jdouble x); + // static jdouble dcos(jdouble x); + // static jdouble dtan(jdouble x); + // static jdouble dlog(jdouble x); + // static jdouble dlog10(jdouble x); + // static jdouble dexp(jdouble x); + // static jdouble dpow(jdouble x, jdouble y); + +void InterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) { + address fn; + switch (kind) { + case Interpreter::java_lang_math_sin : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); + break; + case Interpreter::java_lang_math_cos : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); + break; + case Interpreter::java_lang_math_tan : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); + break; + case Interpreter::java_lang_math_log : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); + break; + case Interpreter::java_lang_math_log10 : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); + break; + case Interpreter::java_lang_math_exp : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + break; + case Interpreter::java_lang_math_pow : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + break; + default: + ShouldNotReachHere(); + } + __ mov(rscratch1, fn); + __ blr(rscratch1); +} + +// Abstract method entry +// Attempt to execute abstract method. Throw exception +address InterpreterGenerator::generate_abstract_entry(void) { + // rmethod: Method* + // r13: sender SP + + address entry_point = __ pc(); + + // abstract method entry + + // pop return address, reset last_sp to NULL + __ empty_expression_stack(); + __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + + // throw exception + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_AbstractMethodError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + return entry_point; +} + + +// Empty method, generate a very fast return. + +address InterpreterGenerator::generate_empty_entry(void) { + // rmethod: Method* + // r13: sender sp must set sp to this value on return + + if (!UseFastEmptyMethods) { + return NULL; + } + + address entry_point = __ pc(); + + // If we need a safepoint check, generate full interpreter entry. + Label slow_path; + { + unsigned long offset; + assert(SafepointSynchronize::_not_synchronized == 0, + "SafepointSynchronize::_not_synchronized"); + __ adrp(rscratch2, SafepointSynchronize::address_of_state(), offset); + __ ldrw(rscratch2, Address(rscratch2, offset)); + __ cbnz(rscratch2, slow_path); + } + + // do nothing for empty methods (do not even increment invocation counter) + // Code: _return + // _return + // return w/o popping parameters + __ mov(sp, r13); // Restore caller's SP + __ br(lr); + + __ bind(slow_path); + (void) generate_normal_entry(false); + return entry_point; + +} + +void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { + + // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in + // the days we had adapter frames. When we deoptimize a situation where a + // compiled caller calls a compiled caller will have registers it expects + // to survive the call to the callee. If we deoptimize the callee the only + // way we can restore these registers is to have the oldest interpreter + // frame that we create restore these values. That is what this routine + // will accomplish. + + // At the moment we have modified c2 to not have any callee save registers + // so this problem does not exist and this routine is just a place holder. + + assert(f->is_interpreted_frame(), "must be interpreted"); +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/interpreter_aarch64.hpp 2021-01-25 19:31:47.002584374 +0000 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_INTERPRETER_AARCH64_HPP +#define CPU_AARCH64_VM_INTERPRETER_AARCH64_HPP + + public: + + // Offset from rsp (which points to the last stack element) + static int expr_offset_in_bytes(int i) { return stackElementSize * i; } + + // Stack index relative to tos (which points at value) + static int expr_index_at(int i) { return stackElementWords * i; } + + // Already negated by c++ interpreter + static int local_index_at(int i) { + assert(i <= 0, "local direction already negated"); + return stackElementWords * i; + } + +#endif // CPU_AARCH64_VM_INTERPRETER_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/javaFrameAnchor_aarch64.hpp 2021-01-25 19:31:47.442588998 +0000 @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_JAVAFRAMEANCHOR_AARCH64_HPP +#define CPU_AARCH64_VM_JAVAFRAMEANCHOR_AARCH64_HPP + +private: + + // FP value associated with _last_Java_sp: + intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + +public: + // Each arch must define reset, save, restore + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls) + + void clear(void) { + // clearing _last_Java_sp must be first + _last_Java_sp = NULL; + OrderAccess::release(); + _last_Java_fp = NULL; + _last_Java_pc = NULL; + } + + void copy(JavaFrameAnchor* src) { + // n.b. the writes to fp and pc do not require any preceding + // release(). when copying into the thread anchor, which only + // happens under ~JavaCallWrapper(), sp will have been NULLed by a + // call to zap() and the NULL write will have been published by a + // fence in the state transition to in_vm. contrariwise, when + // copying into the wrapper anchor, which only happens under + // JavaCallWrapper(), there is no ordering requirement at all + // since that object is thread local until the subsequent entry + // into java. JavaCallWrapper() call clear() after copy() thus + // ensuring that all 3 writes are visible() before the wrapper is + // accessible to other threads. + _last_Java_fp = src->_last_Java_fp; + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if + // has_last_frame() is true + OrderAccess::release(); + _last_Java_sp = src->_last_Java_sp; + } + + bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; } + void make_walkable(JavaThread* thread); + void capture_last_Java_pc(void); + + intptr_t* last_Java_sp(void) const { return _last_Java_sp; } + + address last_Java_pc(void) { return _last_Java_pc; } + +private: + + static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } + +public: + + // n.b. set_last_Java_sp and set_last_Java_fp are never called + // (which is good because they would need a preceding or following + // call to OrderAccess::release() to make sure the writes are + // visible in the correct order). +void set_last_Java_sp(intptr_t* sp) { assert(false, "should not be called"); _last_Java_sp = sp; } + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + // Assert (last_Java_sp == NULL || fp == NULL) + void set_last_Java_fp(intptr_t* fp) { assert(false, "should not be called"); _last_Java_fp = fp; } + +#endif // CPU_AARCH64_VM_JAVAFRAMEANCHOR_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp 2021-01-25 19:31:47.913593949 +0000 @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/safepoint.hpp" + +#define __ masm-> + +#define BUFFER_SIZE 30*wordSize + +// Instead of issuing a LoadLoad barrier we create an address +// dependency between loads; this might be more efficient. + +// Common register usage: +// r0/v0: result +// c_rarg0: jni env +// c_rarg1: obj +// c_rarg2: jfield id + +static const Register robj = r3; +static const Register rcounter = r4; +static const Register roffset = r5; +static const Register rcounter_addr = r6; +static const Register result = r7; + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + const char *name; + switch (type) { + case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; + case T_BYTE: name = "jni_fast_GetByteField"; break; + case T_CHAR: name = "jni_fast_GetCharField"; break; + case T_SHORT: name = "jni_fast_GetShortField"; break; + case T_INT: name = "jni_fast_GetIntField"; break; + case T_LONG: name = "jni_fast_GetLongField"; break; + case T_FLOAT: name = "jni_fast_GetFloatField"; break; + case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; + default: ShouldNotReachHere(); + } + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + + Label slow; + + unsigned long offset; + __ adrp(rcounter_addr, + SafepointSynchronize::safepoint_counter_addr(), offset); + Address safepoint_counter_addr(rcounter_addr, offset); + __ ldrw(rcounter, safepoint_counter_addr); + __ andw(rscratch1, rcounter, 1); + __ cbnzw(rscratch1, slow); + __ eor(robj, c_rarg1, rcounter); + __ eor(robj, robj, rcounter); // obj, since + // robj ^ rcounter ^ rcounter == robj + // robj is address dependent on rcounter. + + // If mask changes we need to ensure that the inverse is still encodable as an immediate + STATIC_ASSERT(JNIHandles::weak_tag_mask == 1); + __ andr(robj, robj, ~JNIHandles::weak_tag_mask); + + __ ldr(robj, Address(robj, 0)); // *obj + __ lsr(roffset, c_rarg2, 2); // offset + + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); // Used by the segfault handler + switch (type) { + case T_BOOLEAN: __ ldrb (result, Address(robj, roffset)); break; + case T_BYTE: __ ldrsb (result, Address(robj, roffset)); break; + case T_CHAR: __ ldrh (result, Address(robj, roffset)); break; + case T_SHORT: __ ldrsh (result, Address(robj, roffset)); break; + case T_FLOAT: __ ldrw (result, Address(robj, roffset)); break; + case T_INT: __ ldrsw (result, Address(robj, roffset)); break; + case T_DOUBLE: + case T_LONG: __ ldr (result, Address(robj, roffset)); break; + default: ShouldNotReachHere(); + } + + // counter_addr is address dependent on result. + __ eor(rcounter_addr, rcounter_addr, result); + __ eor(rcounter_addr, rcounter_addr, result); + __ ldrw(rscratch1, safepoint_counter_addr); + __ cmpw(rcounter, rscratch1); + __ br (Assembler::NE, slow); + + switch (type) { + case T_FLOAT: __ fmovs(v0, result); break; + case T_DOUBLE: __ fmovd(v0, result); break; + default: __ mov(r0, result); break; + } + __ ret(lr); + + slowcase_entry_pclist[count++] = __ pc(); + __ bind(slow); + address slow_case_addr; + switch (type) { + case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; + case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; + case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; + case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; + case T_INT: slow_case_addr = jni_GetIntField_addr(); break; + case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; + case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; + case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; + default: ShouldNotReachHere(); + } + + { + __ enter(); + __ lea(rscratch1, ExternalAddress(slow_case_addr)); + __ blr(rscratch1); + __ maybe_isb(); + __ leave(); + __ ret(lr); + } + __ flush (); + + return fast_entry; +} + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + return generate_fast_get_int_field0(T_LONG); +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_int_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_int_field0(T_DOUBLE); +} + --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/jniTypes_aarch64.hpp 2021-01-25 19:31:48.355598594 +0000 @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_JNITYPES_AARCH64_HPP +#define CPU_AARCH64_VM_JNITYPES_AARCH64_HPP + +#include "memory/allocation.hpp" +#include "oops/oop.hpp" +#include "prims/jni.h" + +// This file holds platform-dependent routines used to write primitive jni +// types to the array of arguments passed into JavaCalls::call + +class JNITypes : AllStatic { + // These functions write a java primitive type (in native format) + // to a java stack slot array to be passed as an argument to JavaCalls:calls. + // I.e., they are functionally 'push' operations if they have a 'pos' + // formal parameter. Note that jlong's and jdouble's are written + // _in reverse_ of the order in which they appear in the interpreter + // stack. This is because call stubs (see stubGenerator_sparc.cpp) + // reverse the argument list constructed by JavaCallArguments (see + // javaCalls.hpp). + +public: + // Ints are stored in native format in one JavaCallArgument slot at *to. + static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } + static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } + static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to+1). + static inline void put_long(jlong from, intptr_t *to) { + *(jlong*) (to + 1) = from; + } + + static inline void put_long(jlong from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = from; + pos += 2; + } + + static inline void put_long(jlong *from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = *from; + pos += 2; + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } + static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } + static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + +#undef _JNI_SLOT_OFFSET +#define _JNI_SLOT_OFFSET 1 + // Doubles are stored in native word format in one JavaCallArgument + // slot at *(to+1). + static inline void put_double(jdouble from, intptr_t *to) { + *(jdouble*) (to + 1) = from; + } + + static inline void put_double(jdouble from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = from; + pos += 2; + } + + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = *from; + pos += 2; + } + + // The get_xxx routines, on the other hand, actually _do_ fetch + // java primitive types from the interpreter stack. + // No need to worry about alignment on Intel. + static inline jint get_int (intptr_t *from) { return *(jint *) from; } + static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } + static inline oop get_obj (intptr_t *from) { return *(oop *) from; } + static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } + static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } +#undef _JNI_SLOT_OFFSET +}; + +#endif // CPU_AARCH64_VM_JNITYPES_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/jni_aarch64.h 2021-01-25 19:31:48.985605216 +0000 @@ -0,0 +1,64 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef _JAVASOFT_JNI_MD_H_ +#define _JAVASOFT_JNI_MD_H_ + +#if defined(SOLARIS) || defined(LINUX) || defined(_ALLBSD_SOURCE) + + +// Note: please do not change these without also changing jni_md.h in the JDK +// repository +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility) + #define JNIEXPORT __attribute__((visibility("default"))) + #define JNIIMPORT __attribute__((visibility("default"))) +#else + #define JNIEXPORT + #define JNIIMPORT +#endif + + #define JNICALL + typedef int jint; +#if defined(_LP64) + typedef long jlong; +#else + typedef long long jlong; +#endif + +#else + #define JNIEXPORT __declspec(dllexport) + #define JNIIMPORT __declspec(dllimport) + #define JNICALL __stdcall + + typedef int jint; + typedef __int64 jlong; +#endif + +typedef signed char jbyte; + +#endif /* !_JAVASOFT_JNI_MD_H_ */ --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp 2021-01-25 19:31:49.512610755 +0000 @@ -0,0 +1,4883 @@ +/* +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "interpreter/interpreter.hpp" + +#include "compiler/disassembler.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/sharedRuntime.hpp" + +// #include "gc_interface/collectedHeap.inline.hpp" +// #include "interpreter/interpreter.hpp" +// #include "memory/cardTableModRefBS.hpp" +// #include "prims/methodHandles.hpp" +// #include "runtime/biasedLocking.hpp" +// #include "runtime/interfaceSupport.hpp" +// #include "runtime/objectMonitor.hpp" +// #include "runtime/os.hpp" +// #include "runtime/sharedRuntime.hpp" +// #include "runtime/stubRoutines.hpp" + +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/g1/heapRegion.hpp" +#endif + +#ifdef COMPILER2 +#include "opto/node.hpp" +#include "opto/compile.hpp" +#endif + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Patch any kind of instruction; there may be several instructions. +// Return the total length (in bytes) of the instructions. +int MacroAssembler::pd_patch_instruction_size(address branch, address target) { + int instructions = 1; + assert((uint64_t)target < (1ul << 48), "48-bit overflow in address constant"); + long offset = (target - branch) >> 2; + unsigned insn = *(unsigned*)branch; + if ((Instruction_aarch64::extract(insn, 29, 24) & 0b111011) == 0b011000) { + // Load register (literal) + Instruction_aarch64::spatch(branch, 23, 5, offset); + } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) { + // Unconditional branch (immediate) + Instruction_aarch64::spatch(branch, 25, 0, offset); + } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) { + // Conditional branch (immediate) + Instruction_aarch64::spatch(branch, 23, 5, offset); + } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) { + // Compare & branch (immediate) + Instruction_aarch64::spatch(branch, 23, 5, offset); + } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) { + // Test & branch (immediate) + Instruction_aarch64::spatch(branch, 18, 5, offset); + } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) { + // PC-rel. addressing + offset = target-branch; + int shift = Instruction_aarch64::extract(insn, 31, 31); + if (shift) { + u_int64_t dest = (u_int64_t)target; + uint64_t pc_page = (uint64_t)branch >> 12; + uint64_t adr_page = (uint64_t)target >> 12; + unsigned offset_lo = dest & 0xfff; + offset = adr_page - pc_page; + + // We handle 4 types of PC relative addressing + // 1 - adrp Rx, target_page + // ldr/str Ry, [Rx, #offset_in_page] + // 2 - adrp Rx, target_page + // add Ry, Rx, #offset_in_page + // 3 - adrp Rx, target_page (page aligned reloc, offset == 0) + // movk Rx, #imm16<<32 + // 4 - adrp Rx, target_page (page aligned reloc, offset == 0) + // In the first 3 cases we must check that Rx is the same in the adrp and the + // subsequent ldr/str, add or movk instruction. Otherwise we could accidentally end + // up treating a type 4 relocation as a type 1, 2 or 3 just because it happened + // to be followed by a random unrelated ldr/str, add or movk instruction. + // + unsigned insn2 = ((unsigned*)branch)[1]; + if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 && + Instruction_aarch64::extract(insn, 4, 0) == + Instruction_aarch64::extract(insn2, 9, 5)) { + // Load/store register (unsigned immediate) + unsigned size = Instruction_aarch64::extract(insn2, 31, 30); + Instruction_aarch64::patch(branch + sizeof (unsigned), + 21, 10, offset_lo >> size); + guarantee(((dest >> size) << size) == dest, "misaligned target"); + instructions = 2; + } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 && + Instruction_aarch64::extract(insn, 4, 0) == + Instruction_aarch64::extract(insn2, 4, 0)) { + // add (immediate) + Instruction_aarch64::patch(branch + sizeof (unsigned), + 21, 10, offset_lo); + instructions = 2; + } else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 && + Instruction_aarch64::extract(insn, 4, 0) == + Instruction_aarch64::extract(insn2, 4, 0)) { + // movk #imm16<<32 + Instruction_aarch64::patch(branch + 4, 20, 5, (uint64_t)target >> 32); + long dest = ((long)target & 0xffffffffL) | ((long)branch & 0xffff00000000L); + long pc_page = (long)branch >> 12; + long adr_page = (long)dest >> 12; + offset = adr_page - pc_page; + instructions = 2; + } + } + int offset_lo = offset & 3; + offset >>= 2; + Instruction_aarch64::spatch(branch, 23, 5, offset); + Instruction_aarch64::patch(branch, 30, 29, offset_lo); + } else if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010100) { + u_int64_t dest = (u_int64_t)target; + // Move wide constant + assert(nativeInstruction_at(branch+4)->is_movk(), "wrong insns in patch"); + assert(nativeInstruction_at(branch+8)->is_movk(), "wrong insns in patch"); + Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff); + Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff); + Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff); + assert(target_addr_for_insn(branch) == target, "should be"); + instructions = 3; + } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 && + Instruction_aarch64::extract(insn, 4, 0) == 0b11111) { + // nothing to do + assert(target == 0, "did not expect to relocate target for polling page load"); + } else { + ShouldNotReachHere(); + } + return instructions * NativeInstruction::instruction_size; +} + +int MacroAssembler::patch_oop(address insn_addr, address o) { + int instructions; + unsigned insn = *(unsigned*)insn_addr; + assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch"); + + // OOPs are either narrow (32 bits) or wide (48 bits). We encode + // narrow OOPs by setting the upper 16 bits in the first + // instruction. + if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) { + // Move narrow OOP + narrowOop n = oopDesc::encode_heap_oop((oop)o); + Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16); + Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff); + instructions = 2; + } else { + // Move wide OOP + assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch"); + uintptr_t dest = (uintptr_t)o; + Instruction_aarch64::patch(insn_addr, 20, 5, dest & 0xffff); + Instruction_aarch64::patch(insn_addr+4, 20, 5, (dest >>= 16) & 0xffff); + Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff); + instructions = 3; + } + return instructions * NativeInstruction::instruction_size; +} + +address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) { + long offset = 0; + if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) { + // Load register (literal) + offset = Instruction_aarch64::sextract(insn, 23, 5); + return address(((uint64_t)insn_addr + (offset << 2))); + } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) { + // Unconditional branch (immediate) + offset = Instruction_aarch64::sextract(insn, 25, 0); + } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) { + // Conditional branch (immediate) + offset = Instruction_aarch64::sextract(insn, 23, 5); + } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) { + // Compare & branch (immediate) + offset = Instruction_aarch64::sextract(insn, 23, 5); + } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) { + // Test & branch (immediate) + offset = Instruction_aarch64::sextract(insn, 18, 5); + } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) { + // PC-rel. addressing + offset = Instruction_aarch64::extract(insn, 30, 29); + offset |= Instruction_aarch64::sextract(insn, 23, 5) << 2; + int shift = Instruction_aarch64::extract(insn, 31, 31) ? 12 : 0; + if (shift) { + offset <<= shift; + uint64_t target_page = ((uint64_t)insn_addr) + offset; + target_page &= ((uint64_t)-1) << shift; + // Return the target address for the following sequences + // 1 - adrp Rx, target_page + // ldr/str Ry, [Rx, #offset_in_page] + // 2 - adrp Rx, target_page + // add Ry, Rx, #offset_in_page + // 3 - adrp Rx, target_page (page aligned reloc, offset == 0) + // movk Rx, #imm12<<32 + // 4 - adrp Rx, target_page (page aligned reloc, offset == 0) + // + // In the first two cases we check that the register is the same and + // return the target_page + the offset within the page. + // Otherwise we assume it is a page aligned relocation and return + // the target page only. + // + unsigned insn2 = ((unsigned*)insn_addr)[1]; + if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 && + Instruction_aarch64::extract(insn, 4, 0) == + Instruction_aarch64::extract(insn2, 9, 5)) { + // Load/store register (unsigned immediate) + unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10); + unsigned int size = Instruction_aarch64::extract(insn2, 31, 30); + return address(target_page + (byte_offset << size)); + } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 && + Instruction_aarch64::extract(insn, 4, 0) == + Instruction_aarch64::extract(insn2, 4, 0)) { + // add (immediate) + unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10); + return address(target_page + byte_offset); + } else { + if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 && + Instruction_aarch64::extract(insn, 4, 0) == + Instruction_aarch64::extract(insn2, 4, 0)) { + target_page = (target_page & 0xffffffff) | + ((uint64_t)Instruction_aarch64::extract(insn2, 20, 5) << 32); + } + return (address)target_page; + } + } else { + ShouldNotReachHere(); + } + } else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) { + u_int32_t *insns = (u_int32_t *)insn_addr; + // Move wide constant: movz, movk, movk. See movptr(). + assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch"); + assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch"); + return address(u_int64_t(Instruction_aarch64::extract(insns[0], 20, 5)) + + (u_int64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16) + + (u_int64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32)); + } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 && + Instruction_aarch64::extract(insn, 4, 0) == 0b11111) { + return 0; + } else { + ShouldNotReachHere(); + } + return address(((uint64_t)insn_addr + (offset << 2))); +} + +void MacroAssembler::serialize_memory(Register thread, Register tmp) { + dsb(Assembler::SY); +} + + +void MacroAssembler::reset_last_Java_frame(bool clear_fp) { + // we must set sp to zero to clear frame + str(zr, Address(rthread, JavaThread::last_Java_sp_offset())); + + // must clear fp, so that compiled frames are not confused; it is + // possible that we need it only for debugging + if (clear_fp) { + str(zr, Address(rthread, JavaThread::last_Java_fp_offset())); + } + + // Always clear the pc because it could have been set by make_walkable() + str(zr, Address(rthread, JavaThread::last_Java_pc_offset())); +} + +// Calls to C land +// +// When entering C land, the rfp, & resp of the last Java frame have to be recorded +// in the (thread-local) JavaThread object. When leaving C land, the last Java fp +// has to be reset to 0. This is required to allow proper stack traversal. +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Register last_java_pc, + Register scratch) { + + if (last_java_pc->is_valid()) { + str(last_java_pc, Address(rthread, + JavaThread::frame_anchor_offset() + + JavaFrameAnchor::last_Java_pc_offset())); + } + + // determine last_java_sp register + if (last_java_sp == sp) { + mov(scratch, sp); + last_java_sp = scratch; + } else if (!last_java_sp->is_valid()) { + last_java_sp = esp; + } + + str(last_java_sp, Address(rthread, JavaThread::last_Java_sp_offset())); + + // last_java_fp is optional + if (last_java_fp->is_valid()) { + str(last_java_fp, Address(rthread, JavaThread::last_Java_fp_offset())); + } +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc, + Register scratch) { + if (last_java_pc != NULL) { + adr(scratch, last_java_pc); + } else { + // FIXME: This is almost never correct. We should delete all + // cases of set_last_Java_frame with last_java_pc=NULL and use the + // correct return address instead. + adr(scratch, pc()); + } + + str(scratch, Address(rthread, + JavaThread::frame_anchor_offset() + + JavaFrameAnchor::last_Java_pc_offset())); + + set_last_Java_frame(last_java_sp, last_java_fp, noreg, scratch); +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Label &L, + Register scratch) { + if (L.is_bound()) { + set_last_Java_frame(last_java_sp, last_java_fp, target(L), scratch); + } else { + InstructionMark im(this); + L.add_patch_at(code(), locator()); + set_last_Java_frame(last_java_sp, last_java_fp, (address)NULL, scratch); + } +} + +void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); + if (far_branches()) { + unsigned long offset; + // We can use ADRP here because we know that the total size of + // the code cache cannot exceed 2Gb. + adrp(tmp, entry, offset); + add(tmp, tmp, offset); + if (cbuf) cbuf->set_insts_mark(); + blr(tmp); + } else { + if (cbuf) cbuf->set_insts_mark(); + bl(entry); + } +} + +void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); + if (far_branches()) { + unsigned long offset; + // We can use ADRP here because we know that the total size of + // the code cache cannot exceed 2Gb. + adrp(tmp, entry, offset); + add(tmp, tmp, offset); + if (cbuf) cbuf->set_insts_mark(); + br(tmp); + } else { + if (cbuf) cbuf->set_insts_mark(); + b(entry); + } +} + +int MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + assert_different_registers(lock_reg, obj_reg, swap_reg); + + if (PrintBiasedLockingStatistics && counters == NULL) + counters = BiasedLocking::counters(); + + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1, rscratch2, noreg); + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); + Address saved_mark_addr(lock_reg, 0); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + int null_check_offset = -1; + if (!swap_reg_contains_mark) { + null_check_offset = offset(); + ldr(swap_reg, mark_addr); + } + andr(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); + cmp(tmp_reg, markOopDesc::biased_lock_pattern); + br(Assembler::NE, cas_label); + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + load_prototype_header(tmp_reg, obj_reg); + orr(tmp_reg, tmp_reg, rthread); + eor(tmp_reg, swap_reg, tmp_reg); + andr(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); + if (counters != NULL) { + Label around; + cbnz(tmp_reg, around); + atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1, rscratch2); + b(done); + bind(around); + } else { + cbz(tmp_reg, done); + } + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + andr(rscratch1, tmp_reg, markOopDesc::biased_lock_mask_in_place); + cbnz(rscratch1, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + andr(rscratch1, tmp_reg, markOopDesc::epoch_mask_in_place); + cbnz(rscratch1, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + { + Label here; + mov(rscratch1, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); + andr(swap_reg, swap_reg, rscratch1); + orr(tmp_reg, swap_reg, rthread); + cmpxchgptr(swap_reg, tmp_reg, obj_reg, rscratch1, here, slow_case); + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + bind(here); + if (counters != NULL) { + atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), + tmp_reg, rscratch1, rscratch2); + } + } + b(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + { + Label here; + load_prototype_header(tmp_reg, obj_reg); + orr(tmp_reg, rthread, tmp_reg); + cmpxchgptr(swap_reg, tmp_reg, obj_reg, rscratch1, here, slow_case); + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + bind(here); + if (counters != NULL) { + atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), + tmp_reg, rscratch1, rscratch2); + } + } + b(done); + + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + { + Label here, nope; + load_prototype_header(tmp_reg, obj_reg); + cmpxchgptr(swap_reg, tmp_reg, obj_reg, rscratch1, here, &nope); + bind(here); + + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (counters != NULL) { + atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, + rscratch1, rscratch2); + } + bind(nope); + } + + bind(cas_label); + + return null_check_offset; +} + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ldr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andr(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); + cmp(temp_reg, markOopDesc::biased_lock_pattern); + br(Assembler::EQ, done); +} + + +// added to make this compile + +REGISTER_DEFINITION(Register, noreg); + +static void pass_arg0(MacroAssembler* masm, Register arg) { + if (c_rarg0 != arg ) { + masm->mov(c_rarg0, arg); + } +} + +static void pass_arg1(MacroAssembler* masm, Register arg) { + if (c_rarg1 != arg ) { + masm->mov(c_rarg1, arg); + } +} + +static void pass_arg2(MacroAssembler* masm, Register arg) { + if (c_rarg2 != arg ) { + masm->mov(c_rarg2, arg); + } +} + +static void pass_arg3(MacroAssembler* masm, Register arg) { + if (c_rarg3 != arg ) { + masm->mov(c_rarg3, arg); + } +} + +void MacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // determine java_thread register + if (!java_thread->is_valid()) { + java_thread = rthread; + } + + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = esp; + } + + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + assert(java_thread == rthread, "unexpected register"); +#ifdef ASSERT + // TraceBytecodes does not use r12 but saves it over the call, so don't verify + // if ((UseCompressedOops || UseCompressedClassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?"); +#endif // ASSERT + + assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); + assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); + + // push java thread (becomes first argument of C function) + + mov(c_rarg0, java_thread); + + // set last Java frame before call + assert(last_java_sp != rfp, "can't use rfp"); + + Label l; + set_last_Java_frame(last_java_sp, rfp, l, rscratch1); + + // do the call, remove parameters + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); + + // reset last Java frame + // Only interpreter should have to clear fp + reset_last_Java_frame(true); + + // C++ interp handles this in the interpreter + check_and_handle_popframe(java_thread); + check_and_handle_earlyret(java_thread); + + if (check_exceptions) { + // check for pending exceptions (java_thread is set upon return) + ldr(rscratch1, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); + Label ok; + cbz(rscratch1, ok); + lea(rscratch1, RuntimeAddress(StubRoutines::forward_exception_entry())); + br(rscratch1); + bind(ok); + } + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + get_vm_result(oop_result, java_thread); + } +} + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); +} + +// Maybe emit a call via a trampoline. If the code cache is small +// trampolines won't be emitted. + +address MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) { + assert(entry.rspec().type() == relocInfo::runtime_call_type + || entry.rspec().type() == relocInfo::opt_virtual_call_type + || entry.rspec().type() == relocInfo::static_call_type + || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); + + unsigned int start_offset = offset(); +#ifdef COMPILER2 + // We need a trampoline if branches are far. + if (far_branches()) { + // We don't want to emit a trampoline if C2 is generating dummy + // code during its branch shortening phase. + CompileTask* task = ciEnv::current()->task(); + bool in_scratch_emit_size = + ((task != NULL) && is_c2_compile(task->comp_level()) + && Compile::current()->in_scratch_emit_size()); + if (! in_scratch_emit_size) { + address stub = emit_trampoline_stub(start_offset, entry.target()); + if (stub == NULL) { + return NULL; // CodeCache is full + } + } + } +#endif + + if (cbuf) cbuf->set_insts_mark(); + relocate(entry.rspec()); +#ifdef COMPILER2 + if (!far_branches()) { + bl(entry.target()); + } else { + bl(pc()); + } +#else + bl(entry.target()); +#endif + // just need to return a non-null address + return pc(); +} + + +// Emit a trampoline stub for a call to a target which is too far away. +// +// code sequences: +// +// call-site: +// branch-and-link to or +// +// Related trampoline stub for this call site in the stub section: +// load the call target from the constant pool +// branch (LR still points to the call site above) + +address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, + address dest) { +#ifdef COMPILER2 + address stub = start_a_stub(Compile::MAX_stubs_size/2); + if (stub == NULL) { + return NULL; // CodeBuffer::expand failed + } + + // Create a trampoline stub relocation which relates this trampoline stub + // with the call instruction at insts_call_instruction_offset in the + // instructions code-section. + align(wordSize); + relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + + insts_call_instruction_offset)); + const int stub_start_offset = offset(); + + // Now, create the trampoline stub's code: + // - load the call + // - call + Label target; + ldr(rscratch1, target); + br(rscratch1); + bind(target); + assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, + "should be"); + emit_int64((int64_t)dest); + + const address stub_start_addr = addr_at(stub_start_offset); + + assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); + + end_a_stub(); + return stub; +#else + ShouldNotReachHere(); + return NULL; +#endif +} + +void MacroAssembler::c2bool(Register x) { + // implements x == 0 ? 0 : 1 + // note: must only look at least-significant byte of x + // since C-style booleans are stored in one byte + // only! (was bug) + tst(x, 0xff); + cset(x, Assembler::NE); +} + +address MacroAssembler::ic_call(address entry) { + RelocationHolder rh = virtual_call_Relocation::spec(pc()); + // address const_ptr = long_constant((jlong)Universe::non_oop_word()); + // unsigned long offset; + // ldr_constant(rscratch2, const_ptr); + movptr(rscratch2, (uintptr_t)Universe::non_oop_word()); + return trampoline_call(Address(entry, rh)); +} + +// Implementation of call_VM versions + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + bool check_exceptions) { + call_VM_helper(oop_result, entry_point, 0, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions) { + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + assert(arg_1 != c_rarg3, "smashed arg"); + assert(arg_2 != c_rarg3, "smashed arg"); + pass_arg3(this, arg_3); + + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + call_VM_base(oop_result, rthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + bool check_exceptions) { + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + assert(arg_1 != c_rarg3, "smashed arg"); + assert(arg_2 != c_rarg3, "smashed arg"); + pass_arg3(this, arg_3); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + + +void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { + ldr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); + str(zr, Address(java_thread, JavaThread::vm_result_offset())); + verify_oop(oop_result, "broken oop in call_VM_base"); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { + ldr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); + str(zr, Address(java_thread, JavaThread::vm_result_2_offset())); +} + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) nop(); +} + +// these are no-ops overridden by InterpreterMacroAssembler + +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { } + +void MacroAssembler::check_and_handle_popframe(Register java_thread) { } + + +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) + return RegisterOrConstant(value + offset); + + // load indirectly to solve generation ordering problem + ldr(tmp, ExternalAddress((address) delayed_value_addr)); + + if (offset != 0) + add(tmp, tmp, offset); + + return RegisterOrConstant(tmp); +} + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& L_no_such_interface, + bool return_method) { + assert_different_registers(recv_klass, intf_klass, scan_temp); + assert_different_registers(method_result, intf_klass, scan_temp); + assert(recv_klass != method_result || !return_method, + "recv_klass can be destroyed when method isn't needed"); + + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must use same register for non-constant itable index as for method"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; + int itentry_off = itableMethodEntry::method_offset_in_bytes(); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size() * wordSize; + assert(vte_size == wordSize, "else adjust times_vte_scale"); + + ldrw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); + + // %%% Could store the aligned, prescaled offset in the klassoop. + // lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); + lea(scan_temp, Address(recv_klass, scan_temp, Address::lsl(3))); + add(scan_temp, scan_temp, vtable_base); + if (HeapWordsPerLong > 1) { + // Round up to align_object_offset boundary + // see code for instanceKlass::start_of_itable! + round_to(scan_temp, BytesPerLong); + } + + if (return_method) { + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); + lea(recv_klass, Address(recv_klass, itable_index, Address::lsl(3))); + if (itentry_off) + add(recv_klass, recv_klass, itentry_off); + } + + // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { + // if (scan->interface() == intf) { + // result = (klass + scan->offset() + itable_index); + // } + // } + Label search, found_method; + + for (int peel = 1; peel >= 0; peel--) { + ldr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); + cmp(intf_klass, method_result); + + if (peel) { + br(Assembler::EQ, found_method); + } else { + br(Assembler::NE, search); + // (invert the test to fall through to found_method...) + } + + if (!peel) break; + + bind(search); + + // Check that the previous entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + cbz(method_result, L_no_such_interface); + add(scan_temp, scan_temp, scan_step); + } + + bind(found_method); + + if (return_method) { + // Got a hit. + ldrw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); + ldr(method_result, Address(recv_klass, scan_temp, Address::uxtw(0))); + } +} + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + const int base = InstanceKlass::vtable_start_offset() * wordSize; + assert(vtableEntry::size() * wordSize == 8, + "adjust the scaling in the code below"); + int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes(); + + if (vtable_index.is_register()) { + lea(method_result, Address(recv_klass, + vtable_index.as_register(), + Address::lsl(LogBytesPerWord))); + ldr(method_result, Address(method_result, vtable_offset_in_bytes)); + } else { + vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; + ldr(method_result, + form_address(rscratch1, recv_klass, vtable_offset_in_bytes, 0)); + } +} + +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success) { + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); + bind(L_failure); +} + + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset) { + assert_different_registers(sub_klass, super_klass, temp_reg); + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp_reg != noreg, "supply either a temp or a register offset"); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + Address super_check_offset_addr(super_klass, sco_offset); + + // Hacked jmp, which may only be used just before L_fallthrough. +#define final_jmp(label) \ + if (&(label) == &L_fallthrough) { /*do nothing*/ } \ + else b(label) /*omit semi*/ + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + cmp(sub_klass, super_klass); + br(Assembler::EQ, *L_success); + + // Check the supertype display: + if (must_load_sco) { + // Positive movl does right thing on LP64. + ldrw(temp_reg, super_check_offset_addr); + super_check_offset = RegisterOrConstant(temp_reg); + } + Address super_check_addr(sub_klass, super_check_offset); + ldr(rscratch1, super_check_addr); + cmp(super_klass, rscratch1); // load displayed supertype + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + if (super_check_offset.is_register()) { + br(Assembler::EQ, *L_success); + cmp(super_check_offset.as_register(), sc_offset); + if (L_failure == &L_fallthrough) { + br(Assembler::EQ, *L_slow_path); + } else { + br(Assembler::NE, *L_failure); + final_jmp(*L_slow_path); + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + br(Assembler::EQ, *L_success); + } else { + br(Assembler::NE, *L_slow_path); + final_jmp(*L_success); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + br(Assembler::EQ, *L_success); + } else { + br(Assembler::NE, *L_failure); + final_jmp(*L_success); + } + } + + bind(L_fallthrough); + +#undef final_jmp +} + +// These two are taken from x86, but they look generally useful + +// scans count pointer sized words at [addr] for occurence of value, +// generic +void MacroAssembler::repne_scan(Register addr, Register value, Register count, + Register scratch) { + Label Lloop, Lexit; + cbz(count, Lexit); + bind(Lloop); + ldr(scratch, post(addr, wordSize)); + cmp(value, scratch); + br(EQ, Lexit); + sub(count, count, 1); + cbnz(count, Lloop); + bind(Lexit); +} + +// scans count 4 byte words at [addr] for occurence of value, +// generic +void MacroAssembler::repne_scanw(Register addr, Register value, Register count, + Register scratch) { + Label Lloop, Lexit; + cbz(count, Lexit); + bind(Lloop); + ldrw(scratch, post(addr, wordSize)); + cmpw(value, scratch); + br(EQ, Lexit); + sub(count, count, 1); + cbnz(count, Lloop); + bind(Lexit); +} + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + assert_different_registers(sub_klass, super_klass, temp_reg); + if (temp2_reg != noreg) + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, rscratch1); +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + + BLOCK_COMMENT("check_klass_subtype_slow_path"); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + // The repne_scan instruction uses fixed registers, which we must spill. + // Don't worry too much about pre-existing connections with the input regs. + + assert(sub_klass != r0, "killed reg"); // killed by mov(r0, super) + assert(sub_klass != r2, "killed reg"); // killed by lea(r2, &pst_counter) + + RegSet pushed_registers; + if (!IS_A_TEMP(r2)) pushed_registers += r2; + if (!IS_A_TEMP(r5)) pushed_registers += r5; + + if (super_klass != r0 || UseCompressedOops) { + if (!IS_A_TEMP(r0)) pushed_registers += r0; + } + + push(pushed_registers, sp); + + // Get super_klass value into r0 (even if it was in r5 or r2). + if (super_klass != r0) { + mov(r0, super_klass); + } + +#ifndef PRODUCT + mov(rscratch2, (address)&SharedRuntime::_partial_subtype_ctr); + Address pst_counter_addr(rscratch2); + ldr(rscratch1, pst_counter_addr); + add(rscratch1, rscratch1, 1); + str(rscratch1, pst_counter_addr); +#endif //PRODUCT + + // We will consult the secondary-super array. + ldr(r5, secondary_supers_addr); + // Load the array length. (Positive movl does right thing on LP64.) + ldrw(r2, Address(r5, Array::length_offset_in_bytes())); + // Skip to start of data. + add(r5, r5, Array::base_offset_in_bytes()); + + cmp(sp, zr); // Clear Z flag; SP is never zero + // Scan R2 words at [R5] for an occurrence of R0. + // Set NZ/Z based on last compare. + repne_scan(r5, r0, r2, rscratch1); + + // Unspill the temp. registers: + pop(pushed_registers, sp); + + br(Assembler::NE, *L_failure); + + // Success. Cache the super we found and proceed in triumph. + str(super_klass, super_cache_addr); + + if (L_success != &L_fallthrough) { + b(*L_success); + } + +#undef IS_A_TEMP + + bind(L_fallthrough); +} + + +void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) return; + + // Pass register number to verify_oop_subroutine + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("verify_oop: %s: %s", reg->name(), s); + b = code_string(ss.as_string()); + } + BLOCK_COMMENT("verify_oop {"); + + stp(r0, rscratch1, Address(pre(sp, -2 * wordSize))); + stp(rscratch2, lr, Address(pre(sp, -2 * wordSize))); + + mov(r0, reg); + mov(rscratch1, (address)b); + + // call indirectly to solve generation ordering problem + lea(rscratch2, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); + ldr(rscratch2, Address(rscratch2)); + blr(rscratch2); + + ldp(rscratch2, lr, Address(post(sp, 2 * wordSize))); + ldp(r0, rscratch1, Address(post(sp, 2 * wordSize))); + + BLOCK_COMMENT("} verify_oop"); +} + +void MacroAssembler::verify_oop_addr(Address addr, const char* s) { + if (!VerifyOops) return; + + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("verify_oop_addr: %s", s); + b = code_string(ss.as_string()); + } + BLOCK_COMMENT("verify_oop_addr {"); + + stp(r0, rscratch1, Address(pre(sp, -2 * wordSize))); + stp(rscratch2, lr, Address(pre(sp, -2 * wordSize))); + + // addr may contain sp so we will have to adjust it based on the + // pushes that we just did. + if (addr.uses(sp)) { + lea(r0, addr); + ldr(r0, Address(r0, 4 * wordSize)); + } else { + ldr(r0, addr); + } + mov(rscratch1, (address)b); + + // call indirectly to solve generation ordering problem + lea(rscratch2, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); + ldr(rscratch2, Address(rscratch2)); + blr(rscratch2); + + ldp(rscratch2, lr, Address(post(sp, 2 * wordSize))); + ldp(r0, rscratch1, Address(post(sp, 2 * wordSize))); + + BLOCK_COMMENT("} verify_oop_addr"); +} + +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); +#ifdef ASSERT + int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); + assert(offset1 - offset == stackElementSize, "correct arithmetic"); +#endif + if (arg_slot.is_constant()) { + return Address(esp, arg_slot.as_constant() * stackElementSize + + offset); + } else { + add(rscratch1, esp, arg_slot.as_register(), + ext::uxtx, exact_log2(stackElementSize)); + return Address(rscratch1, offset); + } +} + +void MacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments, + Label *retaddr) { + stp(rscratch1, rmethod, Address(pre(sp, -2 * wordSize))); + + // We add 1 to number_of_arguments because the thread in arg0 is + // not counted + mov(rscratch1, entry_point); + blr(rscratch1); + if (retaddr) + bind(*retaddr); + + ldp(rscratch1, rmethod, Address(post(sp, 2 * wordSize))); + maybe_isb(); +} + +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { + call_VM_leaf_base(entry_point, number_of_arguments); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + pass_arg0(this, arg_0); + pass_arg1(this, arg_1); + call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, + Register arg_1, Register arg_2) { + pass_arg0(this, arg_0); + pass_arg1(this, arg_1); + pass_arg2(this, arg_2); + call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { + assert(arg_0 != c_rarg2, "smashed arg"); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { + assert(arg_0 != c_rarg3, "smashed arg"); + assert(arg_1 != c_rarg3, "smashed arg"); + assert(arg_2 != c_rarg3, "smashed arg"); + pass_arg3(this, arg_3); + assert(arg_0 != c_rarg2, "smashed arg"); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 4); +} + +void MacroAssembler::null_check(Register reg, int offset) { + if (needs_explicit_null_check(offset)) { + // provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any registers + // NOTE: this is plenty to provoke a segv + ldr(zr, Address(reg)); + } else { + // nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL + } +} + +// MacroAssembler protected routines needed to implement +// public methods + +void MacroAssembler::mov(Register r, Address dest) { + code_section()->relocate(pc(), dest.rspec()); + u_int64_t imm64 = (u_int64_t)dest.target(); + movptr(r, imm64); +} + +// Move a constant pointer into r. In AArch64 mode the virtual +// address space is 48 bits in size, so we only need three +// instructions to create a patchable instruction sequence that can +// reach anywhere. +void MacroAssembler::movptr(Register r, uintptr_t imm64) { +#ifndef PRODUCT + { + char buffer[64]; + snprintf(buffer, sizeof(buffer), "0x%"PRIX64, imm64); + block_comment(buffer); + } +#endif + assert(imm64 < (1ul << 48), "48-bit overflow in address constant"); + movz(r, imm64 & 0xffff); + imm64 >>= 16; + movk(r, imm64 & 0xffff, 16); + imm64 >>= 16; + movk(r, imm64 & 0xffff, 32); +} + +// Macro to mov replicated immediate to vector register. +// Vd will get the following values for different arrangements in T +// imm32 == hex 000000gh T8B: Vd = ghghghghghghghgh +// imm32 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh +// imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh +// imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh +// imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh +// imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh +// T1D/T2D: invalid +void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) { + assert(T != T1D && T != T2D, "invalid arrangement"); + if (T == T8B || T == T16B) { + assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)"); + movi(Vd, T, imm32 & 0xff, 0); + return; + } + u_int32_t nimm32 = ~imm32; + if (T == T4H || T == T8H) { + assert((imm32 & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)"); + imm32 &= 0xffff; + nimm32 &= 0xffff; + } + u_int32_t x = imm32; + int movi_cnt = 0; + int movn_cnt = 0; + while (x) { if (x & 0xff) movi_cnt++; x >>= 8; } + x = nimm32; + while (x) { if (x & 0xff) movn_cnt++; x >>= 8; } + if (movn_cnt < movi_cnt) imm32 = nimm32; + unsigned lsl = 0; + while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } + if (movn_cnt < movi_cnt) + mvni(Vd, T, imm32 & 0xff, lsl); + else + movi(Vd, T, imm32 & 0xff, lsl); + imm32 >>= 8; lsl += 8; + while (imm32) { + while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } + if (movn_cnt < movi_cnt) + bici(Vd, T, imm32 & 0xff, lsl); + else + orri(Vd, T, imm32 & 0xff, lsl); + lsl += 8; imm32 >>= 8; + } +} + +void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64) +{ +#ifndef PRODUCT + { + char buffer[64]; + snprintf(buffer, sizeof(buffer), "0x%"PRIX64, imm64); + block_comment(buffer); + } +#endif + if (operand_valid_for_logical_immediate(false, imm64)) { + orr(dst, zr, imm64); + } else { + // we can use a combination of MOVZ or MOVN with + // MOVK to build up the constant + u_int64_t imm_h[4]; + int zero_count = 0; + int neg_count = 0; + int i; + for (i = 0; i < 4; i++) { + imm_h[i] = ((imm64 >> (i * 16)) & 0xffffL); + if (imm_h[i] == 0) { + zero_count++; + } else if (imm_h[i] == 0xffffL) { + neg_count++; + } + } + if (zero_count == 4) { + // one MOVZ will do + movz(dst, 0); + } else if (neg_count == 4) { + // one MOVN will do + movn(dst, 0); + } else if (zero_count == 3) { + for (i = 0; i < 4; i++) { + if (imm_h[i] != 0L) { + movz(dst, (u_int32_t)imm_h[i], (i << 4)); + break; + } + } + } else if (neg_count == 3) { + // one MOVN will do + for (int i = 0; i < 4; i++) { + if (imm_h[i] != 0xffffL) { + movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4)); + break; + } + } + } else if (zero_count == 2) { + // one MOVZ and one MOVK will do + for (i = 0; i < 3; i++) { + if (imm_h[i] != 0L) { + movz(dst, (u_int32_t)imm_h[i], (i << 4)); + i++; + break; + } + } + for (;i < 4; i++) { + if (imm_h[i] != 0L) { + movk(dst, (u_int32_t)imm_h[i], (i << 4)); + } + } + } else if (neg_count == 2) { + // one MOVN and one MOVK will do + for (i = 0; i < 4; i++) { + if (imm_h[i] != 0xffffL) { + movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4)); + i++; + break; + } + } + for (;i < 4; i++) { + if (imm_h[i] != 0xffffL) { + movk(dst, (u_int32_t)imm_h[i], (i << 4)); + } + } + } else if (zero_count == 1) { + // one MOVZ and two MOVKs will do + for (i = 0; i < 4; i++) { + if (imm_h[i] != 0L) { + movz(dst, (u_int32_t)imm_h[i], (i << 4)); + i++; + break; + } + } + for (;i < 4; i++) { + if (imm_h[i] != 0x0L) { + movk(dst, (u_int32_t)imm_h[i], (i << 4)); + } + } + } else if (neg_count == 1) { + // one MOVN and two MOVKs will do + for (i = 0; i < 4; i++) { + if (imm_h[i] != 0xffffL) { + movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4)); + i++; + break; + } + } + for (;i < 4; i++) { + if (imm_h[i] != 0xffffL) { + movk(dst, (u_int32_t)imm_h[i], (i << 4)); + } + } + } else { + // use a MOVZ and 3 MOVKs (makes it easier to debug) + movz(dst, (u_int32_t)imm_h[0], 0); + for (i = 1; i < 4; i++) { + movk(dst, (u_int32_t)imm_h[i], (i << 4)); + } + } + } +} + +void MacroAssembler::mov_immediate32(Register dst, u_int32_t imm32) +{ +#ifndef PRODUCT + { + char buffer[64]; + snprintf(buffer, sizeof(buffer), "0x%"PRIX32, imm32); + block_comment(buffer); + } +#endif + if (operand_valid_for_logical_immediate(true, imm32)) { + orrw(dst, zr, imm32); + } else { + // we can use MOVZ, MOVN or two calls to MOVK to build up the + // constant + u_int32_t imm_h[2]; + imm_h[0] = imm32 & 0xffff; + imm_h[1] = ((imm32 >> 16) & 0xffff); + if (imm_h[0] == 0) { + movzw(dst, imm_h[1], 16); + } else if (imm_h[0] == 0xffff) { + movnw(dst, imm_h[1] ^ 0xffff, 16); + } else if (imm_h[1] == 0) { + movzw(dst, imm_h[0], 0); + } else if (imm_h[1] == 0xffff) { + movnw(dst, imm_h[0] ^ 0xffff, 0); + } else { + // use a MOVZ and MOVK (makes it easier to debug) + movzw(dst, imm_h[0], 0); + movkw(dst, imm_h[1], 16); + } + } +} + +// Form an address from base + offset in Rd. Rd may or may +// not actually be used: you must use the Address that is returned. +// It is up to you to ensure that the shift provided matches the size +// of your data. +Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset, int shift) { + if (Address::offset_ok_for_immed(byte_offset, shift)) + // It fits; no need for any heroics + return Address(base, byte_offset); + + // Don't do anything clever with negative or misaligned offsets + unsigned mask = (1 << shift) - 1; + if (byte_offset < 0 || byte_offset & mask) { + mov(Rd, byte_offset); + add(Rd, base, Rd); + return Address(Rd); + } + + // See if we can do this with two 12-bit offsets + { + unsigned long word_offset = byte_offset >> shift; + unsigned long masked_offset = word_offset & 0xfff000; + if (Address::offset_ok_for_immed(word_offset - masked_offset) + && Assembler::operand_valid_for_add_sub_immediate(masked_offset << shift)) { + add(Rd, base, masked_offset << shift); + word_offset -= masked_offset; + return Address(Rd, word_offset << shift); + } + } + + // Do it the hard way + mov(Rd, byte_offset); + add(Rd, base, Rd); + return Address(Rd); +} + +void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) { + if (UseLSE) { + mov(tmp, 1); + ldadd(Assembler::word, tmp, zr, counter_addr); + return; + } + Label retry_load; + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + prfm(Address(counter_addr), PSTL1STRM); + bind(retry_load); + // flush and load exclusive from the memory location + ldxrw(tmp, counter_addr); + addw(tmp, tmp, 1); + // if we store+flush with no intervening write tmp wil be zero + stxrw(tmp2, tmp, counter_addr); + cbnzw(tmp2, retry_load); +} + + +int MacroAssembler::corrected_idivl(Register result, Register ra, Register rb, + bool want_remainder, Register scratch) +{ + // Full implementation of Java idiv and irem. The function + // returns the (pc) offset of the div instruction - may be needed + // for implicit exceptions. + // + // constraint : ra/rb =/= scratch + // normal case + // + // input : ra: dividend + // rb: divisor + // + // result: either + // quotient (= ra idiv rb) + // remainder (= ra irem rb) + + assert(ra != scratch && rb != scratch, "reg cannot be scratch"); + + int idivl_offset = offset(); + if (! want_remainder) { + sdivw(result, ra, rb); + } else { + sdivw(scratch, ra, rb); + Assembler::msubw(result, scratch, rb, ra); + } + + return idivl_offset; +} + +int MacroAssembler::corrected_idivq(Register result, Register ra, Register rb, + bool want_remainder, Register scratch) +{ + // Full implementation of Java ldiv and lrem. The function + // returns the (pc) offset of the div instruction - may be needed + // for implicit exceptions. + // + // constraint : ra/rb =/= scratch + // normal case + // + // input : ra: dividend + // rb: divisor + // + // result: either + // quotient (= ra idiv rb) + // remainder (= ra irem rb) + + assert(ra != scratch && rb != scratch, "reg cannot be scratch"); + + int idivq_offset = offset(); + if (! want_remainder) { + sdiv(result, ra, rb); + } else { + sdiv(scratch, ra, rb); + Assembler::msub(result, scratch, rb, ra); + } + + return idivq_offset; +} + +// MacroAssembler routines found actually to be needed + +void MacroAssembler::push(Register src) +{ + str(src, Address(pre(esp, -1 * wordSize))); +} + +void MacroAssembler::pop(Register dst) +{ + ldr(dst, Address(post(esp, 1 * wordSize))); +} + +// Note: load_unsigned_short used to be called load_unsigned_word. +int MacroAssembler::load_unsigned_short(Register dst, Address src) { + int off = offset(); + ldrh(dst, src); + return off; +} + +int MacroAssembler::load_unsigned_byte(Register dst, Address src) { + int off = offset(); + ldrb(dst, src); + return off; +} + +int MacroAssembler::load_signed_short(Register dst, Address src) { + int off = offset(); + ldrsh(dst, src); + return off; +} + +int MacroAssembler::load_signed_byte(Register dst, Address src) { + int off = offset(); + ldrsb(dst, src); + return off; +} + +int MacroAssembler::load_signed_short32(Register dst, Address src) { + int off = offset(); + ldrshw(dst, src); + return off; +} + +int MacroAssembler::load_signed_byte32(Register dst, Address src) { + int off = offset(); + ldrsbw(dst, src); + return off; +} + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { + switch (size_in_bytes) { + case 8: ldr(dst, src); break; + case 4: ldrw(dst, src); break; + case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; + case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { + switch (size_in_bytes) { + case 8: str(src, dst); break; + case 4: strw(src, dst); break; + case 2: strh(src, dst); break; + case 1: strb(src, dst); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::decrementw(Register reg, int value) +{ + if (value < 0) { incrementw(reg, -value); return; } + if (value == 0) { return; } + if (value < (1 << 12)) { subw(reg, reg, value); return; } + /* else */ { + guarantee(reg != rscratch2, "invalid dst for register decrement"); + movw(rscratch2, (unsigned)value); + subw(reg, reg, rscratch2); + } +} + +void MacroAssembler::decrement(Register reg, int value) +{ + if (value < 0) { increment(reg, -value); return; } + if (value == 0) { return; } + if (value < (1 << 12)) { sub(reg, reg, value); return; } + /* else */ { + assert(reg != rscratch2, "invalid dst for register decrement"); + mov(rscratch2, (unsigned long)value); + sub(reg, reg, rscratch2); + } +} + +void MacroAssembler::decrementw(Address dst, int value) +{ + assert(!dst.uses(rscratch1), "invalid dst for address decrement"); + ldrw(rscratch1, dst); + decrementw(rscratch1, value); + strw(rscratch1, dst); +} + +void MacroAssembler::decrement(Address dst, int value) +{ + assert(!dst.uses(rscratch1), "invalid address for decrement"); + ldr(rscratch1, dst); + decrement(rscratch1, value); + str(rscratch1, dst); +} + +void MacroAssembler::incrementw(Register reg, int value) +{ + if (value < 0) { decrementw(reg, -value); return; } + if (value == 0) { return; } + if (value < (1 << 12)) { addw(reg, reg, value); return; } + /* else */ { + assert(reg != rscratch2, "invalid dst for register increment"); + movw(rscratch2, (unsigned)value); + addw(reg, reg, rscratch2); + } +} + +void MacroAssembler::increment(Register reg, int value) +{ + if (value < 0) { decrement(reg, -value); return; } + if (value == 0) { return; } + if (value < (1 << 12)) { add(reg, reg, value); return; } + /* else */ { + assert(reg != rscratch2, "invalid dst for register increment"); + movw(rscratch2, (unsigned)value); + add(reg, reg, rscratch2); + } +} + +void MacroAssembler::incrementw(Address dst, int value) +{ + assert(!dst.uses(rscratch1), "invalid dst for address increment"); + ldrw(rscratch1, dst); + incrementw(rscratch1, value); + strw(rscratch1, dst); +} + +void MacroAssembler::increment(Address dst, int value) +{ + assert(!dst.uses(rscratch1), "invalid dst for address increment"); + ldr(rscratch1, dst); + increment(rscratch1, value); + str(rscratch1, dst); +} + + +void MacroAssembler::pusha() { + push(0x7fffffff, sp); +} + +void MacroAssembler::popa() { + pop(0x7fffffff, sp); +} + +// Push lots of registers in the bit set supplied. Don't push sp. +// Return the number of words pushed +int MacroAssembler::push(unsigned int bitset, Register stack) { + int words_pushed = 0; + + // Scan bitset to accumulate register pairs + unsigned char regs[32]; + int count = 0; + for (int reg = 0; reg <= 30; reg++) { + if (1 & bitset) + regs[count++] = reg; + bitset >>= 1; + } + regs[count++] = zr->encoding_nocheck(); + count &= ~1; // Only push an even nuber of regs + + if (count) { + stp(as_Register(regs[0]), as_Register(regs[1]), + Address(pre(stack, -count * wordSize))); + words_pushed += 2; + } + for (int i = 2; i < count; i += 2) { + stp(as_Register(regs[i]), as_Register(regs[i+1]), + Address(stack, i * wordSize)); + words_pushed += 2; + } + + assert(words_pushed == count, "oops, pushed != count"); + + return count; +} + +int MacroAssembler::pop(unsigned int bitset, Register stack) { + int words_pushed = 0; + + // Scan bitset to accumulate register pairs + unsigned char regs[32]; + int count = 0; + for (int reg = 0; reg <= 30; reg++) { + if (1 & bitset) + regs[count++] = reg; + bitset >>= 1; + } + regs[count++] = zr->encoding_nocheck(); + count &= ~1; + + for (int i = 2; i < count; i += 2) { + ldp(as_Register(regs[i]), as_Register(regs[i+1]), + Address(stack, i * wordSize)); + words_pushed += 2; + } + if (count) { + ldp(as_Register(regs[0]), as_Register(regs[1]), + Address(post(stack, count * wordSize))); + words_pushed += 2; + } + + assert(words_pushed == count, "oops, pushed != count"); + + return count; +} +#ifdef ASSERT +void MacroAssembler::verify_heapbase(const char* msg) { +#if 0 + assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + if (CheckCompressedOops) { + Label ok; + push(1 << rscratch1->encoding(), sp); // cmpptr trashes rscratch1 + cmpptr(rheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); + br(Assembler::EQ, ok); + stop(msg); + bind(ok); + pop(1 << rscratch1->encoding(), sp); + } +#endif +} +#endif + +void MacroAssembler::stop(const char* msg) { + address ip = pc(); + pusha(); + mov(c_rarg0, (address)msg); + mov(c_rarg1, (address)ip); + mov(c_rarg2, sp); + mov(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); + blr(c_rarg3); + hlt(0); +} + +void MacroAssembler::warn(const char* msg) { + pusha(); + mov(c_rarg0, (address)msg); + mov(lr, CAST_FROM_FN_PTR(address, warning)); + blr(lr); + popa(); +} + +// If a constant does not fit in an immediate field, generate some +// number of MOV instructions and then perform the operation. +void MacroAssembler::wrap_add_sub_imm_insn(Register Rd, Register Rn, unsigned imm, + add_sub_imm_insn insn1, + add_sub_reg_insn insn2) { + assert(Rd != zr, "Rd = zr and not setting flags?"); + if (operand_valid_for_add_sub_immediate((int)imm)) { + (this->*insn1)(Rd, Rn, imm); + } else { + if (uabs(imm) < (1 << 24)) { + (this->*insn1)(Rd, Rn, imm & -(1 << 12)); + (this->*insn1)(Rd, Rd, imm & ((1 << 12)-1)); + } else { + assert_different_registers(Rd, Rn); + mov(Rd, (uint64_t)imm); + (this->*insn2)(Rd, Rn, Rd, LSL, 0); + } + } +} + +// Seperate vsn which sets the flags. Optimisations are more restricted +// because we must set the flags correctly. +void MacroAssembler::wrap_adds_subs_imm_insn(Register Rd, Register Rn, unsigned imm, + add_sub_imm_insn insn1, + add_sub_reg_insn insn2) { + if (operand_valid_for_add_sub_immediate((int)imm)) { + (this->*insn1)(Rd, Rn, imm); + } else { + assert_different_registers(Rd, Rn); + assert(Rd != zr, "overflow in immediate operand"); + mov(Rd, (uint64_t)imm); + (this->*insn2)(Rd, Rn, Rd, LSL, 0); + } +} + + +void MacroAssembler::add(Register Rd, Register Rn, RegisterOrConstant increment) { + if (increment.is_register()) { + add(Rd, Rn, increment.as_register()); + } else { + add(Rd, Rn, increment.as_constant()); + } +} + +void MacroAssembler::addw(Register Rd, Register Rn, RegisterOrConstant increment) { + if (increment.is_register()) { + addw(Rd, Rn, increment.as_register()); + } else { + addw(Rd, Rn, increment.as_constant()); + } +} + +void MacroAssembler::sub(Register Rd, Register Rn, RegisterOrConstant decrement) { + if (decrement.is_register()) { + sub(Rd, Rn, decrement.as_register()); + } else { + sub(Rd, Rn, decrement.as_constant()); + } +} + +void MacroAssembler::subw(Register Rd, Register Rn, RegisterOrConstant decrement) { + if (decrement.is_register()) { + subw(Rd, Rn, decrement.as_register()); + } else { + subw(Rd, Rn, decrement.as_constant()); + } +} + +void MacroAssembler::reinit_heapbase() +{ + if (UseCompressedOops) { + if (Universe::is_fully_initialized()) { + mov(rheapbase, Universe::narrow_ptrs_base()); + } else { + lea(rheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); + ldr(rheapbase, Address(rheapbase)); + } + } +} + +// this simulates the behaviour of the x86 cmpxchg instruction using a +// load linked/store conditional pair. we use the acquire/release +// versions of these instructions so that we flush pending writes as +// per Java semantics. + +// n.b the x86 version assumes the old value to be compared against is +// in rax and updates rax with the value located in memory if the +// cmpxchg fails. we supply a register for the old value explicitly + +// the aarch64 load linked/store conditional instructions do not +// accept an offset. so, unlike x86, we must provide a plain register +// to identify the memory word to be compared/exchanged rather than a +// register+offset Address. + +void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, + Label &succeed, Label *fail) { + // oldv holds comparison value + // newv holds value to write in exchange + // addr identifies memory word to compare against/update + if (UseLSE) { + mov(tmp, oldv); + casal(Assembler::xword, oldv, newv, addr); + cmp(tmp, oldv); + br(Assembler::EQ, succeed); + membar(AnyAny); + } else { + Label retry_load, nope; + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + prfm(Address(addr), PSTL1STRM); + bind(retry_load); + // flush and load exclusive from the memory location + // and fail if it is not what we expect + ldaxr(tmp, addr); + cmp(tmp, oldv); + br(Assembler::NE, nope); + // if we store+flush with no intervening write tmp wil be zero + stlxr(tmp, newv, addr); + cbzw(tmp, succeed); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + b(retry_load); + // if the memory word differs we return it in oldv and signal a fail + bind(nope); + membar(AnyAny); + mov(oldv, tmp); + } + if (fail) + b(*fail); +} + +void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Register tmp, + Label &succeed, Label *fail) { + // oldv holds comparison value + // newv holds value to write in exchange + // addr identifies memory word to compare against/update + // tmp returns 0/1 for success/failure + if (UseLSE) { + mov(tmp, oldv); + casal(Assembler::word, oldv, newv, addr); + cmp(tmp, oldv); + br(Assembler::EQ, succeed); + membar(AnyAny); + } else { + Label retry_load, nope; + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + prfm(Address(addr), PSTL1STRM); + bind(retry_load); + // flush and load exclusive from the memory location + // and fail if it is not what we expect + ldaxrw(tmp, addr); + cmp(tmp, oldv); + br(Assembler::NE, nope); + // if we store+flush with no intervening write tmp wil be zero + stlxrw(tmp, newv, addr); + cbzw(tmp, succeed); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + b(retry_load); + // if the memory word differs we return it in oldv and signal a fail + bind(nope); + membar(AnyAny); + mov(oldv, tmp); + } + if (fail) + b(*fail); +} + +// A generic CAS; success or failure is in the EQ flag. +void MacroAssembler::cmpxchg(Register addr, Register expected, + Register new_val, + enum operand_size size, + bool acquire, bool release, + Register tmp) { + if (UseLSE) { + mov(tmp, expected); + lse_cas(tmp, new_val, addr, size, acquire, release, /*not_pair*/ true); + cmp(tmp, expected); + } else { + BLOCK_COMMENT("cmpxchg {"); + Label retry_load, done; + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) + prfm(Address(addr), PSTL1STRM); + bind(retry_load); + load_exclusive(tmp, addr, size, acquire); + if (size == xword) + cmp(tmp, expected); + else + cmpw(tmp, expected); + br(Assembler::NE, done); + store_exclusive(tmp, new_val, addr, size, release); + cbnzw(tmp, retry_load); + bind(done); + BLOCK_COMMENT("} cmpxchg"); + } +} + +static bool different(Register a, RegisterOrConstant b, Register c) { + if (b.is_constant()) + return a != c; + else + return a != b.as_register() && a != c && b.as_register() != c; +} + +#define ATOMIC_OP(NAME, LDXR, OP, IOP, AOP, STXR, sz) \ +void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ + if (UseLSE) { \ + prev = prev->is_valid() ? prev : zr; \ + if (incr.is_register()) { \ + AOP(sz, incr.as_register(), prev, addr); \ + } else { \ + mov(rscratch2, incr.as_constant()); \ + AOP(sz, rscratch2, prev, addr); \ + } \ + return; \ + } \ + Register result = rscratch2; \ + if (prev->is_valid()) \ + result = different(prev, incr, addr) ? prev : rscratch2; \ + \ + Label retry_load; \ + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) \ + prfm(Address(addr), PSTL1STRM); \ + bind(retry_load); \ + LDXR(result, addr); \ + OP(rscratch1, result, incr); \ + STXR(rscratch2, rscratch1, addr); \ + cbnzw(rscratch2, retry_load); \ + if (prev->is_valid() && prev != result) { \ + IOP(prev, rscratch1, incr); \ + } \ +} + +ATOMIC_OP(add, ldxr, add, sub, ldadd, stxr, Assembler::xword) +ATOMIC_OP(addw, ldxrw, addw, subw, ldadd, stxrw, Assembler::word) +ATOMIC_OP(addal, ldaxr, add, sub, ldaddal, stlxr, Assembler::xword) +ATOMIC_OP(addalw, ldaxrw, addw, subw, ldaddal, stlxrw, Assembler::word) + +#undef ATOMIC_OP + +#define ATOMIC_XCHG(OP, AOP, LDXR, STXR, sz) \ +void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ + if (UseLSE) { \ + prev = prev->is_valid() ? prev : zr; \ + AOP(sz, newv, prev, addr); \ + return; \ + } \ + Register result = rscratch2; \ + if (prev->is_valid()) \ + result = different(prev, newv, addr) ? prev : rscratch2; \ + \ + Label retry_load; \ + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_STXR_PREFETCH)) \ + prfm(Address(addr), PSTL1STRM); \ + bind(retry_load); \ + LDXR(result, addr); \ + STXR(rscratch1, newv, addr); \ + cbnzw(rscratch1, retry_load); \ + if (prev->is_valid() && prev != result) \ + mov(prev, result); \ +} + +ATOMIC_XCHG(xchg, swp, ldxr, stxr, Assembler::xword) +ATOMIC_XCHG(xchgw, swp, ldxrw, stxrw, Assembler::word) +ATOMIC_XCHG(xchgal, swpal, ldaxr, stlxr, Assembler::xword) +ATOMIC_XCHG(xchgalw, swpal, ldaxrw, stlxrw, Assembler::word) + +#undef ATOMIC_XCHG + +void MacroAssembler::incr_allocated_bytes(Register thread, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1) { + if (!thread->is_valid()) { + thread = rthread; + } + assert(t1->is_valid(), "need temp reg"); + + ldr(t1, Address(thread, in_bytes(JavaThread::allocated_bytes_offset()))); + if (var_size_in_bytes->is_valid()) { + add(t1, t1, var_size_in_bytes); + } else { + add(t1, t1, con_size_in_bytes); + } + str(t1, Address(thread, in_bytes(JavaThread::allocated_bytes_offset()))); +} + +#ifndef PRODUCT +extern "C" void findpc(intptr_t x); +#endif + +void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) +{ + // In order to get locks to work, we need to fake a in_VM state + if (ShowMessageBoxOnError ) { + JavaThread* thread = JavaThread::current(); + JavaThreadState saved_state = thread->thread_state(); + thread->set_thread_state(_thread_in_vm); +#ifndef PRODUCT + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + ttyLocker ttyl; + BytecodeCounter::print(); + } +#endif + if (os::message_box(msg, "Execution stopped, print registers?")) { + ttyLocker ttyl; + tty->print_cr(" pc = 0x%016lx", pc); +#ifndef PRODUCT + tty->cr(); + findpc(pc); + tty->cr(); +#endif + tty->print_cr(" r0 = 0x%016lx", regs[0]); + tty->print_cr(" r1 = 0x%016lx", regs[1]); + tty->print_cr(" r2 = 0x%016lx", regs[2]); + tty->print_cr(" r3 = 0x%016lx", regs[3]); + tty->print_cr(" r4 = 0x%016lx", regs[4]); + tty->print_cr(" r5 = 0x%016lx", regs[5]); + tty->print_cr(" r6 = 0x%016lx", regs[6]); + tty->print_cr(" r7 = 0x%016lx", regs[7]); + tty->print_cr(" r8 = 0x%016lx", regs[8]); + tty->print_cr(" r9 = 0x%016lx", regs[9]); + tty->print_cr("r10 = 0x%016lx", regs[10]); + tty->print_cr("r11 = 0x%016lx", regs[11]); + tty->print_cr("r12 = 0x%016lx", regs[12]); + tty->print_cr("r13 = 0x%016lx", regs[13]); + tty->print_cr("r14 = 0x%016lx", regs[14]); + tty->print_cr("r15 = 0x%016lx", regs[15]); + tty->print_cr("r16 = 0x%016lx", regs[16]); + tty->print_cr("r17 = 0x%016lx", regs[17]); + tty->print_cr("r18 = 0x%016lx", regs[18]); + tty->print_cr("r19 = 0x%016lx", regs[19]); + tty->print_cr("r20 = 0x%016lx", regs[20]); + tty->print_cr("r21 = 0x%016lx", regs[21]); + tty->print_cr("r22 = 0x%016lx", regs[22]); + tty->print_cr("r23 = 0x%016lx", regs[23]); + tty->print_cr("r24 = 0x%016lx", regs[24]); + tty->print_cr("r25 = 0x%016lx", regs[25]); + tty->print_cr("r26 = 0x%016lx", regs[26]); + tty->print_cr("r27 = 0x%016lx", regs[27]); + tty->print_cr("r28 = 0x%016lx", regs[28]); + tty->print_cr("r30 = 0x%016lx", regs[30]); + tty->print_cr("r31 = 0x%016lx", regs[31]); + BREAKPOINT; + } + ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); + } else { + ttyLocker ttyl; + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", + msg); + assert(false, err_msg("DEBUG MESSAGE: %s", msg)); + } +} + +void MacroAssembler::push_call_clobbered_registers() { + push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp); + + // Push v0-v7, v16-v31. + for (int i = 30; i >= 0; i -= 2) { + if (i <= v7->encoding() || i >= v16->encoding()) { + stpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(pre(sp, -2 * wordSize))); + } + } +} + +void MacroAssembler::pop_call_clobbered_registers() { + + for (int i = 0; i < 32; i += 2) { + if (i <= v7->encoding() || i >= v16->encoding()) { + ldpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(post(sp, 2 * wordSize))); + } + } + + pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp); +} + +void MacroAssembler::push_CPU_state(bool save_vectors) { + push(0x3fffffff, sp); // integer registers except lr & sp + + if (!save_vectors) { + for (int i = 30; i >= 0; i -= 2) + stpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(pre(sp, -2 * wordSize))); + } else { + for (int i = 30; i >= 0; i -= 2) + stpq(as_FloatRegister(i), as_FloatRegister(i+1), + Address(pre(sp, -4 * wordSize))); + } +} + +void MacroAssembler::pop_CPU_state(bool restore_vectors) { + if (!restore_vectors) { + for (int i = 0; i < 32; i += 2) + ldpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(post(sp, 2 * wordSize))); + } else { + for (int i = 0; i < 32; i += 2) + ldpq(as_FloatRegister(i), as_FloatRegister(i+1), + Address(post(sp, 4 * wordSize))); + } + + pop(0x3fffffff, sp); // integer registers except lr & sp +} + +/** + * Helpers for multiply_to_len(). + */ +void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2) { + adds(dest_lo, dest_lo, src1); + adc(dest_hi, dest_hi, zr); + adds(dest_lo, dest_lo, src2); + adc(final_dest_hi, dest_hi, zr); +} + +// Generate an address from (r + r1 extend offset). "size" is the +// size of the operand. The result may be in rscratch2. +Address MacroAssembler::offsetted_address(Register r, Register r1, + Address::extend ext, int offset, int size) { + if (offset || (ext.shift() % size != 0)) { + lea(rscratch2, Address(r, r1, ext)); + return Address(rscratch2, offset); + } else { + return Address(r, r1, ext); + } +} + +Address MacroAssembler::spill_address(int size, int offset, Register tmp) +{ + assert(offset >= 0, "spill to negative address?"); + // Offset reachable ? + // Not aligned - 9 bits signed offset + // Aligned - 12 bits unsigned offset shifted + Register base = sp; + if ((offset & (size-1)) && offset >= (1<<8)) { + add(tmp, base, offset & ((1<<12)-1)); + base = tmp; + offset &= -1u<<12; + } + + if (offset >= (1<<12) * size) { + add(tmp, base, offset & (((1<<12)-1)<<12)); + base = tmp; + offset &= ~(((1<<12)-1)<<12); + } + + return Address(base, offset); +} + +/** + * Multiply 64 bit by 64 bit first loop. + */ +void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx) { + // + // jlong carry, x[], y[], z[]; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { + // huge_128 product = y[idx] * x[xstart] + carry; + // z[kdx] = (jlong)product; + // carry = (jlong)(product >>> 64); + // } + // z[xstart] = carry; + // + + Label L_first_loop, L_first_loop_exit; + Label L_one_x, L_one_y, L_multiply; + + subsw(xstart, xstart, 1); + br(Assembler::MI, L_one_x); + + lea(rscratch1, Address(x, xstart, Address::lsl(LogBytesPerInt))); + ldr(x_xstart, Address(rscratch1)); + ror(x_xstart, x_xstart, 32); // convert big-endian to little-endian + + bind(L_first_loop); + subsw(idx, idx, 1); + br(Assembler::MI, L_first_loop_exit); + subsw(idx, idx, 1); + br(Assembler::MI, L_one_y); + lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt))); + ldr(y_idx, Address(rscratch1)); + ror(y_idx, y_idx, 32); // convert big-endian to little-endian + bind(L_multiply); + + // AArch64 has a multiply-accumulate instruction that we can't use + // here because it has no way to process carries, so we have to use + // separate add and adc instructions. Bah. + umulh(rscratch1, x_xstart, y_idx); // x_xstart * y_idx -> rscratch1:product + mul(product, x_xstart, y_idx); + adds(product, product, carry); + adc(carry, rscratch1, zr); // x_xstart * y_idx + carry -> carry:product + + subw(kdx, kdx, 2); + ror(product, product, 32); // back to big-endian + str(product, offsetted_address(z, kdx, Address::uxtw(LogBytesPerInt), 0, BytesPerLong)); + + b(L_first_loop); + + bind(L_one_y); + ldrw(y_idx, Address(y, 0)); + b(L_multiply); + + bind(L_one_x); + ldrw(x_xstart, Address(x, 0)); + b(L_first_loop); + + bind(L_first_loop_exit); +} + +/** + * Multiply 128 bit by 128. Unrolled inner loop. + * + */ +void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp6, Register product_hi) { + + // jlong carry, x[], y[], z[]; + // int kdx = ystart+1; + // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop + // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; + // jlong carry2 = (jlong)(tmp3 >>> 64); + // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; + // carry = (jlong)(tmp4 >>> 64); + // z[kdx+idx+1] = (jlong)tmp3; + // z[kdx+idx] = (jlong)tmp4; + // } + // idx += 2; + // if (idx > 0) { + // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; + // z[kdx+idx] = (jlong)yz_idx1; + // carry = (jlong)(yz_idx1 >>> 64); + // } + // + + Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; + + lsrw(jdx, idx, 2); + + bind(L_third_loop); + + subsw(jdx, jdx, 1); + br(Assembler::MI, L_third_loop_exit); + subw(idx, idx, 4); + + lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt))); + + ldp(yz_idx2, yz_idx1, Address(rscratch1, 0)); + + lea(tmp6, Address(z, idx, Address::uxtw(LogBytesPerInt))); + + ror(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian + ror(yz_idx2, yz_idx2, 32); + + ldp(rscratch2, rscratch1, Address(tmp6, 0)); + + mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 + umulh(tmp4, product_hi, yz_idx1); + + ror(rscratch1, rscratch1, 32); // convert big-endian to little-endian + ror(rscratch2, rscratch2, 32); + + mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp + umulh(carry2, product_hi, yz_idx2); + + // propagate sum of both multiplications into carry:tmp4:tmp3 + adds(tmp3, tmp3, carry); + adc(tmp4, tmp4, zr); + adds(tmp3, tmp3, rscratch1); + adcs(tmp4, tmp4, tmp); + adc(carry, carry2, zr); + adds(tmp4, tmp4, rscratch2); + adc(carry, carry, zr); + + ror(tmp3, tmp3, 32); // convert little-endian to big-endian + ror(tmp4, tmp4, 32); + stp(tmp4, tmp3, Address(tmp6, 0)); + + b(L_third_loop); + bind (L_third_loop_exit); + + andw (idx, idx, 0x3); + cbz(idx, L_post_third_loop_done); + + Label L_check_1; + subsw(idx, idx, 2); + br(Assembler::MI, L_check_1); + + lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt))); + ldr(yz_idx1, Address(rscratch1, 0)); + ror(yz_idx1, yz_idx1, 32); + mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 + umulh(tmp4, product_hi, yz_idx1); + lea(rscratch1, Address(z, idx, Address::uxtw(LogBytesPerInt))); + ldr(yz_idx2, Address(rscratch1, 0)); + ror(yz_idx2, yz_idx2, 32); + + add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2); + + ror(tmp3, tmp3, 32); + str(tmp3, Address(rscratch1, 0)); + + bind (L_check_1); + + andw (idx, idx, 0x1); + subsw(idx, idx, 1); + br(Assembler::MI, L_post_third_loop_done); + ldrw(tmp4, Address(y, idx, Address::uxtw(LogBytesPerInt))); + mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 + umulh(carry2, tmp4, product_hi); + ldrw(tmp4, Address(z, idx, Address::uxtw(LogBytesPerInt))); + + add2_with_carry(carry2, tmp3, tmp4, carry); + + strw(tmp3, Address(z, idx, Address::uxtw(LogBytesPerInt))); + extr(carry, carry2, tmp3, 32); + + bind(L_post_third_loop_done); +} + +/** + * Code for BigInteger::multiplyToLen() instrinsic. + * + * r0: x + * r1: xlen + * r2: y + * r3: ylen + * r4: z + * r5: zlen + * r10: tmp1 + * r11: tmp2 + * r12: tmp3 + * r13: tmp4 + * r14: tmp5 + * r15: tmp6 + * r16: tmp7 + * + */ +void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6, Register product_hi) { + + assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); + + const Register idx = tmp1; + const Register kdx = tmp2; + const Register xstart = tmp3; + + const Register y_idx = tmp4; + const Register carry = tmp5; + const Register product = xlen; + const Register x_xstart = zlen; // reuse register + + // First Loop. + // + // final static long LONG_MASK = 0xffffffffL; + // int xstart = xlen - 1; + // int ystart = ylen - 1; + // long carry = 0; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { + // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; + // z[kdx] = (int)product; + // carry = product >>> 32; + // } + // z[xstart] = (int)carry; + // + + movw(idx, ylen); // idx = ylen; + movw(kdx, zlen); // kdx = xlen+ylen; + mov(carry, zr); // carry = 0; + + Label L_done; + + movw(xstart, xlen); + subsw(xstart, xstart, 1); + br(Assembler::MI, L_done); + + multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + + Label L_second_loop; + cbzw(kdx, L_second_loop); + + Label L_carry; + subw(kdx, kdx, 1); + cbzw(kdx, L_carry); + + strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt))); + lsr(carry, carry, 32); + subw(kdx, kdx, 1); + + bind(L_carry); + strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt))); + + // Second and third (nested) loops. + // + // for (int i = xstart-1; i >= 0; i--) { // Second loop + // carry = 0; + // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop + // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + + // (z[k] & LONG_MASK) + carry; + // z[k] = (int)product; + // carry = product >>> 32; + // } + // z[i] = (int)carry; + // } + // + // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi + + const Register jdx = tmp1; + + bind(L_second_loop); + mov(carry, zr); // carry = 0; + movw(jdx, ylen); // j = ystart+1 + + subsw(xstart, xstart, 1); // i = xstart-1; + br(Assembler::MI, L_done); + + str(z, Address(pre(sp, -4 * wordSize))); + + Label L_last_x; + lea(z, offsetted_address(z, xstart, Address::uxtw(LogBytesPerInt), 4, BytesPerInt)); // z = z + k - j + subsw(xstart, xstart, 1); // i = xstart-1; + br(Assembler::MI, L_last_x); + + lea(rscratch1, Address(x, xstart, Address::uxtw(LogBytesPerInt))); + ldr(product_hi, Address(rscratch1)); + ror(product_hi, product_hi, 32); // convert big-endian to little-endian + + Label L_third_loop_prologue; + bind(L_third_loop_prologue); + + str(ylen, Address(sp, wordSize)); + stp(x, xstart, Address(sp, 2 * wordSize)); + multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, + tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); + ldp(z, ylen, Address(post(sp, 2 * wordSize))); + ldp(x, xlen, Address(post(sp, 2 * wordSize))); // copy old xstart -> xlen + + addw(tmp3, xlen, 1); + strw(carry, Address(z, tmp3, Address::uxtw(LogBytesPerInt))); + subsw(tmp3, tmp3, 1); + br(Assembler::MI, L_done); + + lsr(carry, carry, 32); + strw(carry, Address(z, tmp3, Address::uxtw(LogBytesPerInt))); + b(L_second_loop); + + // Next infrequent code is moved outside loops. + bind(L_last_x); + ldrw(product_hi, Address(x, 0)); + b(L_third_loop_prologue); + + bind(L_done); +} + +/** + * Emits code to update CRC-32 with a byte value according to constants in table + * + * @param [in,out]crc Register containing the crc. + * @param [in]val Register containing the byte to fold into the CRC. + * @param [in]table Register containing the table of crc constants. + * + * uint32_t crc; + * val = crc_table[(val ^ crc) & 0xFF]; + * crc = val ^ (crc >> 8); + * + */ +void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { + eor(val, val, crc); + andr(val, val, 0xff); + ldrw(val, Address(table, val, Address::lsl(2))); + eor(crc, val, crc, Assembler::LSR, 8); +} + +/** + * Emits code to update CRC-32 with a 32-bit value according to tables 0 to 3 + * + * @param [in,out]crc Register containing the crc. + * @param [in]v Register containing the 32-bit to fold into the CRC. + * @param [in]table0 Register containing table 0 of crc constants. + * @param [in]table1 Register containing table 1 of crc constants. + * @param [in]table2 Register containing table 2 of crc constants. + * @param [in]table3 Register containing table 3 of crc constants. + * + * uint32_t crc; + * v = crc ^ v + * crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24] + * + */ +void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp, + Register table0, Register table1, Register table2, Register table3, + bool upper) { + eor(v, crc, v, upper ? LSR:LSL, upper ? 32:0); + uxtb(tmp, v); + ldrw(crc, Address(table3, tmp, Address::lsl(2))); + ubfx(tmp, v, 8, 8); + ldrw(tmp, Address(table2, tmp, Address::lsl(2))); + eor(crc, crc, tmp); + ubfx(tmp, v, 16, 8); + ldrw(tmp, Address(table1, tmp, Address::lsl(2))); + eor(crc, crc, tmp); + ubfx(tmp, v, 24, 8); + ldrw(tmp, Address(table0, tmp, Address::lsl(2))); + eor(crc, crc, tmp); +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param table register that will contain address of CRC table + * @param tmp scratch register + */ +void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, + Register table0, Register table1, Register table2, Register table3, + Register tmp, Register tmp2, Register tmp3) { + Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit; + unsigned long offset; + + ornw(crc, zr, crc); + + if (UseCRC32) { + Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop; + + subs(len, len, 64); + br(Assembler::GE, CRC_by64_loop); + adds(len, len, 64-4); + br(Assembler::GE, CRC_by4_loop); + adds(len, len, 4); + br(Assembler::GT, CRC_by1_loop); + b(L_exit); + + BIND(CRC_by4_loop); + ldrw(tmp, Address(post(buf, 4))); + subs(len, len, 4); + crc32w(crc, crc, tmp); + br(Assembler::GE, CRC_by4_loop); + adds(len, len, 4); + br(Assembler::LE, L_exit); + BIND(CRC_by1_loop); + ldrb(tmp, Address(post(buf, 1))); + subs(len, len, 1); + crc32b(crc, crc, tmp); + br(Assembler::GT, CRC_by1_loop); + b(L_exit); + + align(CodeEntryAlignment); + BIND(CRC_by64_loop); + subs(len, len, 64); + ldp(tmp, tmp3, Address(post(buf, 16))); + crc32x(crc, crc, tmp); + crc32x(crc, crc, tmp3); + ldp(tmp, tmp3, Address(post(buf, 16))); + crc32x(crc, crc, tmp); + crc32x(crc, crc, tmp3); + ldp(tmp, tmp3, Address(post(buf, 16))); + crc32x(crc, crc, tmp); + crc32x(crc, crc, tmp3); + ldp(tmp, tmp3, Address(post(buf, 16))); + crc32x(crc, crc, tmp); + crc32x(crc, crc, tmp3); + br(Assembler::GE, CRC_by64_loop); + adds(len, len, 64-4); + br(Assembler::GE, CRC_by4_loop); + adds(len, len, 4); + br(Assembler::GT, CRC_by1_loop); + BIND(L_exit); + ornw(crc, zr, crc); + return; + } + + adrp(table0, ExternalAddress(StubRoutines::crc_table_addr()), offset); + if (offset) add(table0, table0, offset); + add(table1, table0, 1*256*sizeof(juint)); + add(table2, table0, 2*256*sizeof(juint)); + add(table3, table0, 3*256*sizeof(juint)); + + if (UseNeon) { + cmp(len, 64); + br(Assembler::LT, L_by16); + eor(v16, T16B, v16, v16); + + Label L_fold; + + add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants + + ld1(v0, v1, T2D, post(buf, 32)); + ld1r(v4, T2D, post(tmp, 8)); + ld1r(v5, T2D, post(tmp, 8)); + ld1r(v6, T2D, post(tmp, 8)); + ld1r(v7, T2D, post(tmp, 8)); + mov(v16, T4S, 0, crc); + + eor(v0, T16B, v0, v16); + sub(len, len, 64); + + BIND(L_fold); + pmull(v22, T8H, v0, v5, T8B); + pmull(v20, T8H, v0, v7, T8B); + pmull(v23, T8H, v0, v4, T8B); + pmull(v21, T8H, v0, v6, T8B); + + pmull2(v18, T8H, v0, v5, T16B); + pmull2(v16, T8H, v0, v7, T16B); + pmull2(v19, T8H, v0, v4, T16B); + pmull2(v17, T8H, v0, v6, T16B); + + uzp1(v24, v20, v22, T8H); + uzp2(v25, v20, v22, T8H); + eor(v20, T16B, v24, v25); + + uzp1(v26, v16, v18, T8H); + uzp2(v27, v16, v18, T8H); + eor(v16, T16B, v26, v27); + + ushll2(v22, T4S, v20, T8H, 8); + ushll(v20, T4S, v20, T4H, 8); + + ushll2(v18, T4S, v16, T8H, 8); + ushll(v16, T4S, v16, T4H, 8); + + eor(v22, T16B, v23, v22); + eor(v18, T16B, v19, v18); + eor(v20, T16B, v21, v20); + eor(v16, T16B, v17, v16); + + uzp1(v17, v16, v20, T2D); + uzp2(v21, v16, v20, T2D); + eor(v17, T16B, v17, v21); + + ushll2(v20, T2D, v17, T4S, 16); + ushll(v16, T2D, v17, T2S, 16); + + eor(v20, T16B, v20, v22); + eor(v16, T16B, v16, v18); + + uzp1(v17, v20, v16, T2D); + uzp2(v21, v20, v16, T2D); + eor(v28, T16B, v17, v21); + + pmull(v22, T8H, v1, v5, T8B); + pmull(v20, T8H, v1, v7, T8B); + pmull(v23, T8H, v1, v4, T8B); + pmull(v21, T8H, v1, v6, T8B); + + pmull2(v18, T8H, v1, v5, T16B); + pmull2(v16, T8H, v1, v7, T16B); + pmull2(v19, T8H, v1, v4, T16B); + pmull2(v17, T8H, v1, v6, T16B); + + ld1(v0, v1, T2D, post(buf, 32)); + + uzp1(v24, v20, v22, T8H); + uzp2(v25, v20, v22, T8H); + eor(v20, T16B, v24, v25); + + uzp1(v26, v16, v18, T8H); + uzp2(v27, v16, v18, T8H); + eor(v16, T16B, v26, v27); + + ushll2(v22, T4S, v20, T8H, 8); + ushll(v20, T4S, v20, T4H, 8); + + ushll2(v18, T4S, v16, T8H, 8); + ushll(v16, T4S, v16, T4H, 8); + + eor(v22, T16B, v23, v22); + eor(v18, T16B, v19, v18); + eor(v20, T16B, v21, v20); + eor(v16, T16B, v17, v16); + + uzp1(v17, v16, v20, T2D); + uzp2(v21, v16, v20, T2D); + eor(v16, T16B, v17, v21); + + ushll2(v20, T2D, v16, T4S, 16); + ushll(v16, T2D, v16, T2S, 16); + + eor(v20, T16B, v22, v20); + eor(v16, T16B, v16, v18); + + uzp1(v17, v20, v16, T2D); + uzp2(v21, v20, v16, T2D); + eor(v20, T16B, v17, v21); + + shl(v16, T2D, v28, 1); + shl(v17, T2D, v20, 1); + + eor(v0, T16B, v0, v16); + eor(v1, T16B, v1, v17); + + subs(len, len, 32); + br(Assembler::GE, L_fold); + + mov(crc, 0); + mov(tmp, v0, T1D, 0); + update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); + update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); + mov(tmp, v0, T1D, 1); + update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); + update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); + mov(tmp, v1, T1D, 0); + update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); + update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); + mov(tmp, v1, T1D, 1); + update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); + update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); + + add(len, len, 32); + } + + BIND(L_by16); + subs(len, len, 16); + br(Assembler::GE, L_by16_loop); + adds(len, len, 16-4); + br(Assembler::GE, L_by4_loop); + adds(len, len, 4); + br(Assembler::GT, L_by1_loop); + b(L_exit); + + BIND(L_by4_loop); + ldrw(tmp, Address(post(buf, 4))); + update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3); + subs(len, len, 4); + br(Assembler::GE, L_by4_loop); + adds(len, len, 4); + br(Assembler::LE, L_exit); + BIND(L_by1_loop); + subs(len, len, 1); + ldrb(tmp, Address(post(buf, 1))); + update_byte_crc32(crc, tmp, table0); + br(Assembler::GT, L_by1_loop); + b(L_exit); + + align(CodeEntryAlignment); + BIND(L_by16_loop); + subs(len, len, 16); + ldp(tmp, tmp3, Address(post(buf, 16))); + update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); + update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); + update_word_crc32(crc, tmp3, tmp2, table0, table1, table2, table3, false); + update_word_crc32(crc, tmp3, tmp2, table0, table1, table2, table3, true); + br(Assembler::GE, L_by16_loop); + adds(len, len, 16-4); + br(Assembler::GE, L_by4_loop); + adds(len, len, 4); + br(Assembler::GT, L_by1_loop); + BIND(L_exit); + ornw(crc, zr, crc); +} + +SkipIfEqual::SkipIfEqual( + MacroAssembler* masm, const bool* flag_addr, bool value) { + _masm = masm; + unsigned long offset; + _masm->adrp(rscratch1, ExternalAddress((address)flag_addr), offset); + _masm->ldrb(rscratch1, Address(rscratch1, offset)); + _masm->cbzw(rscratch1, _label); +} + +SkipIfEqual::~SkipIfEqual() { + _masm->bind(_label); +} + +void MacroAssembler::addptr(const Address &dst, int32_t src) { + Address adr; + switch(dst.getMode()) { + case Address::base_plus_offset: + // This is the expected mode, although we allow all the other + // forms below. + adr = form_address(rscratch2, dst.base(), dst.offset(), LogBytesPerWord); + break; + default: + lea(rscratch2, dst); + adr = Address(rscratch2); + break; + } + ldr(rscratch1, adr); + add(rscratch1, rscratch1, src); + str(rscratch1, adr); +} + +void MacroAssembler::cmpptr(Register src1, Address src2) { + unsigned long offset; + adrp(rscratch1, src2, offset); + ldr(rscratch1, Address(rscratch1, offset)); + cmp(src1, rscratch1); +} + +void MacroAssembler::store_check(Register obj) { + // Does a store check for the oop in register obj. The content of + // register obj is destroyed afterwards. + store_check_part_1(obj); + store_check_part_2(obj); +} + +void MacroAssembler::store_check(Register obj, Address dst) { + store_check(obj); +} + + +// split the store check operation so that other instructions can be scheduled inbetween +void MacroAssembler::store_check_part_1(Register obj) { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); + lsr(obj, obj, CardTableModRefBS::card_shift); +} + +void MacroAssembler::store_check_part_2(Register obj) { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + // The calculation for byte_map_base is as follows: + // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); + // So this essentially converts an address to a displacement and + // it will never need to be relocated. + + // FIXME: It's not likely that disp will fit into an offset so we + // don't bother to check, but it could save an instruction. + intptr_t disp = (intptr_t) ct->byte_map_base; + load_byte_map_base(rscratch1); + + if (UseConcMarkSweepGC && CMSPrecleaningEnabled) { + membar(StoreStore); + } + strb(zr, Address(obj, rscratch1)); +} + +void MacroAssembler::load_klass(Register dst, Register src) { + if (UseCompressedClassPointers) { + ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes())); + decode_klass_not_null(dst); + } else { + ldr(dst, Address(src, oopDesc::klass_offset_in_bytes())); + } +} + +void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) { + if (UseCompressedClassPointers) { + ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); + if (Universe::narrow_klass_base() == NULL) { + cmp(trial_klass, tmp, LSL, Universe::narrow_klass_shift()); + return; + } else if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 + && Universe::narrow_klass_shift() == 0) { + // Only the bottom 32 bits matter + cmpw(trial_klass, tmp); + return; + } + decode_klass_not_null(tmp); + } else { + ldr(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); + } + cmp(trial_klass, tmp); +} + +void MacroAssembler::load_prototype_header(Register dst, Register src) { + load_klass(dst, src); + ldr(dst, Address(dst, Klass::prototype_header_offset())); +} + +void MacroAssembler::store_klass(Register dst, Register src) { + // FIXME: Should this be a store release? concurrent gcs assumes + // klass length is valid if klass field is not null. + if (UseCompressedClassPointers) { + encode_klass_not_null(src); + strw(src, Address(dst, oopDesc::klass_offset_in_bytes())); + } else { + str(src, Address(dst, oopDesc::klass_offset_in_bytes())); + } +} + +void MacroAssembler::store_klass_gap(Register dst, Register src) { + if (UseCompressedClassPointers) { + // Store to klass gap in destination + strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); + } +} + +// Algorithm must match oop.inline.hpp encode_heap_oop. +void MacroAssembler::encode_heap_oop(Register d, Register s) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); +#endif + verify_oop(s, "broken oop in encode_heap_oop"); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + lsr(d, s, LogMinObjAlignmentInBytes); + } else { + mov(d, s); + } + } else { + subs(d, s, rheapbase); + csel(d, d, zr, Assembler::HS); + lsr(d, d, LogMinObjAlignmentInBytes); + + /* Old algorithm: is this any worse? + Label nonnull; + cbnz(r, nonnull); + sub(r, r, rheapbase); + bind(nonnull); + lsr(r, r, LogMinObjAlignmentInBytes); + */ + } +} + +void MacroAssembler::encode_heap_oop_not_null(Register r) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); + if (CheckCompressedOops) { + Label ok; + cbnz(r, ok); + stop("null oop passed to encode_heap_oop_not_null"); + bind(ok); + } +#endif + verify_oop(r, "broken oop in encode_heap_oop_not_null"); + if (Universe::narrow_oop_base() != NULL) { + sub(r, r, rheapbase); + } + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + lsr(r, r, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); + if (CheckCompressedOops) { + Label ok; + cbnz(src, ok); + stop("null oop passed to encode_heap_oop_not_null2"); + bind(ok); + } +#endif + verify_oop(src, "broken oop in encode_heap_oop_not_null2"); + + Register data = src; + if (Universe::narrow_oop_base() != NULL) { + sub(dst, src, rheapbase); + data = dst; + } + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + lsr(dst, data, LogMinObjAlignmentInBytes); + data = dst; + } + if (data == src) + mov(dst, src); +} + +void MacroAssembler::decode_heap_oop(Register d, Register s) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); +#endif + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0 || d != s) { + lsl(d, s, Universe::narrow_oop_shift()); + } + } else { + Label done; + if (d != s) + mov(d, s); + cbz(s, done); + add(d, rheapbase, s, Assembler::LSL, LogMinObjAlignmentInBytes); + bind(done); + } + verify_oop(d, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::decode_heap_oop_not_null(Register r) { + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (Universe::narrow_oop_base() != NULL) { + add(r, rheapbase, r, Assembler::LSL, LogMinObjAlignmentInBytes); + } else { + add(r, zr, r, Assembler::LSL, LogMinObjAlignmentInBytes); + } + } else { + assert (Universe::narrow_oop_base() == NULL, "sanity"); + } +} + +void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (Universe::narrow_oop_base() != NULL) { + add(dst, rheapbase, src, Assembler::LSL, LogMinObjAlignmentInBytes); + } else { + add(dst, zr, src, Assembler::LSL, LogMinObjAlignmentInBytes); + } + } else { + assert (Universe::narrow_oop_base() == NULL, "sanity"); + if (dst != src) { + mov(dst, src); + } + } +} + +void MacroAssembler::encode_klass_not_null(Register dst, Register src) { + if (Universe::narrow_klass_base() == NULL) { + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + lsr(dst, src, LogKlassAlignmentInBytes); + } else { + if (dst != src) mov(dst, src); + } + return; + } + + if (use_XOR_for_compressed_class_base) { + if (Universe::narrow_klass_shift() != 0) { + eor(dst, src, (uint64_t)Universe::narrow_klass_base()); + lsr(dst, dst, LogKlassAlignmentInBytes); + } else { + eor(dst, src, (uint64_t)Universe::narrow_klass_base()); + } + return; + } + + if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 + && Universe::narrow_klass_shift() == 0) { + movw(dst, src); + return; + } + +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?"); +#endif + + Register rbase = dst; + if (dst == src) rbase = rheapbase; + mov(rbase, (uint64_t)Universe::narrow_klass_base()); + sub(dst, src, rbase); + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + lsr(dst, dst, LogKlassAlignmentInBytes); + } + if (dst == src) reinit_heapbase(); +} + +void MacroAssembler::encode_klass_not_null(Register r) { + encode_klass_not_null(r, r); +} + +void MacroAssembler::decode_klass_not_null(Register dst, Register src) { + Register rbase = dst; + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + + if (Universe::narrow_klass_base() == NULL) { + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + lsl(dst, src, LogKlassAlignmentInBytes); + } else { + if (dst != src) mov(dst, src); + } + return; + } + + if (use_XOR_for_compressed_class_base) { + if (Universe::narrow_klass_shift() != 0) { + lsl(dst, src, LogKlassAlignmentInBytes); + eor(dst, dst, (uint64_t)Universe::narrow_klass_base()); + } else { + eor(dst, src, (uint64_t)Universe::narrow_klass_base()); + } + return; + } + + if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 + && Universe::narrow_klass_shift() == 0) { + if (dst != src) + movw(dst, src); + movk(dst, (uint64_t)Universe::narrow_klass_base() >> 32, 32); + return; + } + + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (dst == src) rbase = rheapbase; + mov(rbase, (uint64_t)Universe::narrow_klass_base()); + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + add(dst, rbase, src, Assembler::LSL, LogKlassAlignmentInBytes); + } else { + add(dst, rbase, src); + } + if (dst == src) reinit_heapbase(); +} + +void MacroAssembler::decode_klass_not_null(Register r) { + decode_klass_not_null(r, r); +} + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { + assert (UseCompressedOops, "should only be used for compressed oops"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + + int oop_index = oop_recorder()->find_index(obj); + assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); + + InstructionMark im(this); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + code_section()->relocate(inst_mark(), rspec); + movz(dst, 0xDEAD, 16); + movk(dst, 0xBEEF); +} + +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int index = oop_recorder()->find_index(k); + assert(! Universe::heap()->is_in_reserved(k), "should not be an oop"); + + InstructionMark im(this); + RelocationHolder rspec = metadata_Relocation::spec(index); + code_section()->relocate(inst_mark(), rspec); + narrowKlass nk = Klass::encode_klass(k); + movz(dst, (nk >> 16), 16); + movk(dst, nk & 0xffff); +} + +void MacroAssembler::load_heap_oop(Register dst, Address src) +{ + if (UseCompressedOops) { + ldrw(dst, src); + decode_heap_oop(dst); + } else { + ldr(dst, src); + } +} + +void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) +{ + if (UseCompressedOops) { + ldrw(dst, src); + decode_heap_oop_not_null(dst); + } else { + ldr(dst, src); + } +} + +void MacroAssembler::store_heap_oop(Address dst, Register src) { + if (UseCompressedOops) { + assert(!dst.uses(src), "not enough registers"); + encode_heap_oop(src); + strw(src, dst); + } else + str(src, dst); +} + +// Used for storing NULLs. +void MacroAssembler::store_heap_oop_null(Address dst) { + if (UseCompressedOops) { + strw(zr, dst); + } else + str(zr, dst); +} + +#if INCLUDE_ALL_GCS +/* + * g1_write_barrier_pre -- G1GC pre-write barrier for store of new_val at + * store_addr. + * + * Allocates rscratch1 + */ +void MacroAssembler::g1_write_barrier_pre(Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + +#ifdef _LP64 + assert(thread == rthread, "must be"); +#endif // _LP64 + + Label done; + Label runtime; + + assert_different_registers(obj, pre_val, tmp, rscratch1); + assert(pre_val != noreg && tmp != noreg, "expecting a register"); + + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + + // Is marking active? + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + ldrw(tmp, in_progress); + } else { + assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); + ldrb(tmp, in_progress); + } + cbzw(tmp, done); + + // Do we need to load the previous value? + if (obj != noreg) { + load_heap_oop(pre_val, Address(obj, 0)); + } + + // Is the previous value null? + cbz(pre_val, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + ldr(tmp, index); // tmp := *index_adr + cbz(tmp, runtime); // tmp == 0? + // If yes, goto runtime + + sub(tmp, tmp, wordSize); // tmp := tmp - wordSize + str(tmp, index); // *index_adr := tmp + ldr(rscratch1, buffer); + add(tmp, tmp, rscratch1); // tmp := tmp + *buffer_adr + + // Record the previous value + str(pre_val, Address(tmp, 0)); + b(done); + + bind(runtime); + // save the live input values + push(r0->bit(tosca_live) | obj->bit(obj != noreg) | pre_val->bit(true), sp); + + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then ebp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + + if (expand_call) { + LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) + pass_arg1(this, thread); + pass_arg0(this, pre_val); + MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); + } else { + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); + } + + pop(r0->bit(tosca_live) | obj->bit(obj != noreg) | pre_val->bit(true), sp); + + bind(done); +} + +/* + * g1_write_barrier_post -- G1GC post-write barrier for store of new_val at + * store_addr + * + * Allocates rscratch1 + */ +void MacroAssembler::g1_write_barrier_post(Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { +#ifdef _LP64 + assert(thread == rthread, "must be"); +#endif // _LP64 + assert_different_registers(store_addr, new_val, thread, tmp, tmp2, + rscratch1); + assert(store_addr != noreg && new_val != noreg && tmp != noreg + && tmp2 != noreg, "expecting a register"); + + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // Does store cross heap regions? + + eor(tmp, store_addr, new_val); + lsr(tmp, tmp, HeapRegion::LogOfHRGrainBytes); + cbz(tmp, done); + + // crosses regions, storing NULL? + + cbz(new_val, done); + + // storing region crossing non-NULL, is card already dirty? + + ExternalAddress cardtable((address) ct->byte_map_base); + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + const Register card_addr = tmp; + + lsr(card_addr, store_addr, CardTableModRefBS::card_shift); + + // get the address of the card + load_byte_map_base(tmp2); + add(card_addr, card_addr, tmp2); + ldrb(tmp2, Address(card_addr)); + cmpw(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val()); + br(Assembler::EQ, done); + + assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0"); + + membar(Assembler::Assembler::StoreLoad); + + ldrb(tmp2, Address(card_addr)); + cbzw(tmp2, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + + strb(zr, Address(card_addr)); + + ldr(rscratch1, queue_index); + cbz(rscratch1, runtime); + sub(rscratch1, rscratch1, wordSize); + str(rscratch1, queue_index); + + ldr(tmp2, buffer); + str(card_addr, Address(tmp2, rscratch1)); + b(done); + + bind(runtime); + // save the live input values + push(store_addr->bit(true) | new_val->bit(true), sp); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); + pop(store_addr->bit(true) | new_val->bit(true), sp); + + bind(done); +} + +#endif // INCLUDE_ALL_GCS + +Address MacroAssembler::allocate_metadata_address(Metadata* obj) { + assert(oop_recorder() != NULL, "this assembler needs a Recorder"); + int index = oop_recorder()->allocate_metadata_index(obj); + RelocationHolder rspec = metadata_Relocation::spec(index); + return Address((address)obj, rspec); +} + +// Move an oop into a register. immediate is true if we want +// immediate instrcutions, i.e. we are not going to patch this +// instruction while the code is being executed by another thread. In +// that case we can use move immediates rather than the constant pool. +void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { + int oop_index; + if (obj == NULL) { + oop_index = oop_recorder()->allocate_oop_index(obj); + } else { + oop_index = oop_recorder()->find_index(obj); + assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); + } + RelocationHolder rspec = oop_Relocation::spec(oop_index); + if (! immediate) { + address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address + ldr_constant(dst, Address(dummy, rspec)); + } else + mov(dst, Address((address)obj, rspec)); +} + +// Move a metadata address into a register. +void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { + int oop_index; + if (obj == NULL) { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } else { + oop_index = oop_recorder()->find_index(obj); + } + RelocationHolder rspec = metadata_Relocation::spec(oop_index); + mov(dst, Address((address)obj, rspec)); +} + +Address MacroAssembler::constant_oop_address(jobject obj) { + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "not an oop"); + int oop_index = oop_recorder()->find_index(obj); + return Address((address)obj, oop_Relocation::spec(oop_index)); +} + +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. +void MacroAssembler::tlab_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Register t2, + Label& slow_case) { + assert_different_registers(obj, t2); + assert_different_registers(obj, var_size_in_bytes); + Register end = t2; + + // verify_tlab(); + + ldr(obj, Address(rthread, JavaThread::tlab_top_offset())); + if (var_size_in_bytes == noreg) { + lea(end, Address(obj, con_size_in_bytes)); + } else { + lea(end, Address(obj, var_size_in_bytes)); + } + ldr(rscratch1, Address(rthread, JavaThread::tlab_end_offset())); + cmp(end, rscratch1); + br(Assembler::HI, slow_case); + + // update the tlab top pointer + str(end, Address(rthread, JavaThread::tlab_top_offset())); + + // recover var_size_in_bytes if necessary + if (var_size_in_bytes == end) { + sub(var_size_in_bytes, var_size_in_bytes, obj); + } + // verify_tlab(); +} + +// Preserves r19, and r3. +Register MacroAssembler::tlab_refill(Label& retry, + Label& try_eden, + Label& slow_case) { + Register top = r0; + Register t1 = r2; + Register t2 = r4; + assert_different_registers(top, rthread, t1, t2, /* preserve: */ r19, r3); + Label do_refill, discard_tlab; + + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + // No allocation in the shared eden. + b(slow_case); + } + + ldr(top, Address(rthread, in_bytes(JavaThread::tlab_top_offset()))); + ldr(t1, Address(rthread, in_bytes(JavaThread::tlab_end_offset()))); + + // calculate amount of free space + sub(t1, t1, top); + lsr(t1, t1, LogHeapWordSize); + + // Retain tlab and allocate object in shared space if + // the amount free in the tlab is too large to discard. + + ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); + cmp(t1, rscratch1); + br(Assembler::LE, discard_tlab); + + // Retain + // ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); + mov(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); + add(rscratch1, rscratch1, t2); + str(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); + + if (TLABStats) { + // increment number of slow_allocations + addmw(Address(rthread, in_bytes(JavaThread::tlab_slow_allocations_offset())), + 1, rscratch1); + } + b(try_eden); + + bind(discard_tlab); + if (TLABStats) { + // increment number of refills + addmw(Address(rthread, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1, + rscratch1); + // accumulate wastage -- t1 is amount free in tlab + addmw(Address(rthread, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1, + rscratch1); + } + + // if tlab is currently allocated (top or end != null) then + // fill [top, end + alignment_reserve) with array object + cbz(top, do_refill); + + // set up the mark word + mov(rscratch1, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); + str(rscratch1, Address(top, oopDesc::mark_offset_in_bytes())); + // set the length to the remaining space + sub(t1, t1, typeArrayOopDesc::header_size(T_INT)); + add(t1, t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); + lsl(t1, t1, log2_intptr(HeapWordSize/sizeof(jint))); + strw(t1, Address(top, arrayOopDesc::length_offset_in_bytes())); + // set klass to intArrayKlass + { + unsigned long offset; + // dubious reloc why not an oop reloc? + adrp(rscratch1, ExternalAddress((address)Universe::intArrayKlassObj_addr()), + offset); + ldr(t1, Address(rscratch1, offset)); + } + // store klass last. concurrent gcs assumes klass length is valid if + // klass field is not null. + store_klass(top, t1); + + mov(t1, top); + ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_start_offset()))); + sub(t1, t1, rscratch1); + incr_allocated_bytes(rthread, t1, 0, rscratch1); + + // refill the tlab with an eden allocation + bind(do_refill); + ldr(t1, Address(rthread, in_bytes(JavaThread::tlab_size_offset()))); + lsl(t1, t1, LogHeapWordSize); + // allocate new tlab, address returned in top + eden_allocate(top, t1, 0, t2, slow_case); + + // Check that t1 was preserved in eden_allocate. +#ifdef ASSERT + if (UseTLAB) { + Label ok; + Register tsize = r4; + assert_different_registers(tsize, rthread, t1); + str(tsize, Address(pre(sp, -16))); + ldr(tsize, Address(rthread, in_bytes(JavaThread::tlab_size_offset()))); + lsl(tsize, tsize, LogHeapWordSize); + cmp(t1, tsize); + br(Assembler::EQ, ok); + STOP("assert(t1 != tlab size)"); + should_not_reach_here(); + + bind(ok); + ldr(tsize, Address(post(sp, 16))); + } +#endif + str(top, Address(rthread, in_bytes(JavaThread::tlab_start_offset()))); + str(top, Address(rthread, in_bytes(JavaThread::tlab_top_offset()))); + add(top, top, t1); + sub(top, top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); + str(top, Address(rthread, in_bytes(JavaThread::tlab_end_offset()))); + verify_tlab(); + b(retry); + + return rthread; // for use by caller +} + +// Defines obj, preserves var_size_in_bytes +void MacroAssembler::eden_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Label& slow_case) { + assert_different_registers(obj, var_size_in_bytes, t1); + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + b(slow_case); + } else { + Register end = t1; + Register heap_end = rscratch2; + Label retry; + bind(retry); + { + unsigned long offset; + adrp(rscratch1, ExternalAddress((address) Universe::heap()->end_addr()), offset); + ldr(heap_end, Address(rscratch1, offset)); + } + + ExternalAddress heap_top((address) Universe::heap()->top_addr()); + + // Get the current top of the heap + { + unsigned long offset; + adrp(rscratch1, heap_top, offset); + // Use add() here after ARDP, rather than lea(). + // lea() does not generate anything if its offset is zero. + // However, relocs expect to find either an ADD or a load/store + // insn after an ADRP. add() always generates an ADD insn, even + // for add(Rn, Rn, 0). + add(rscratch1, rscratch1, offset); + ldaxr(obj, rscratch1); + } + + // Adjust it my the size of our new object + if (var_size_in_bytes == noreg) { + lea(end, Address(obj, con_size_in_bytes)); + } else { + lea(end, Address(obj, var_size_in_bytes)); + } + + // if end < obj then we wrapped around high memory + cmp(end, obj); + br(Assembler::LO, slow_case); + + cmp(end, heap_end); + br(Assembler::HI, slow_case); + + // If heap_top hasn't been changed by some other thread, update it. + stlxr(rscratch2, end, rscratch1); + cbnzw(rscratch2, retry); + } +} + +void MacroAssembler::verify_tlab() { +#ifdef ASSERT + if (UseTLAB && VerifyOops) { + Label next, ok; + + stp(rscratch2, rscratch1, Address(pre(sp, -16))); + + ldr(rscratch2, Address(rthread, in_bytes(JavaThread::tlab_top_offset()))); + ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_start_offset()))); + cmp(rscratch2, rscratch1); + br(Assembler::HS, next); + STOP("assert(top >= start)"); + should_not_reach_here(); + + bind(next); + ldr(rscratch2, Address(rthread, in_bytes(JavaThread::tlab_end_offset()))); + ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_top_offset()))); + cmp(rscratch2, rscratch1); + br(Assembler::HS, ok); + STOP("assert(top <= end)"); + should_not_reach_here(); + + bind(ok); + ldp(rscratch2, rscratch1, Address(post(sp, 16))); + } +#endif +} + +// Writes to stack successive pages until offset reached to check for +// stack overflow + shadow pages. This clobbers tmp. +void MacroAssembler::bang_stack_size(Register size, Register tmp) { + assert_different_registers(tmp, size, rscratch1); + mov(tmp, sp); + // Bang stack for total size given plus shadow page size. + // Bang one page at a time because large size can bang beyond yellow and + // red zones. + Label loop; + mov(rscratch1, os::vm_page_size()); + bind(loop); + lea(tmp, Address(tmp, -os::vm_page_size())); + subsw(size, size, rscratch1); + str(size, Address(tmp)); + br(Assembler::GT, loop); + + // Bang down shadow pages too. + // The -1 because we already subtracted 1 page. + for (int i = 0; i< StackShadowPages-1; i++) { + // this could be any sized move but this is can be a debugging crumb + // so the bigger the better. + lea(tmp, Address(tmp, -os::vm_page_size())); + str(size, Address(tmp)); + } +} + + +address MacroAssembler::read_polling_page(Register r, address page, relocInfo::relocType rtype) { + unsigned long off; + adrp(r, Address(page, rtype), off); + InstructionMark im(this); + code_section()->relocate(inst_mark(), rtype); + ldrw(zr, Address(r, off)); + return inst_mark(); +} + +address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) { + InstructionMark im(this); + code_section()->relocate(inst_mark(), rtype); + ldrw(zr, Address(r, 0)); + return inst_mark(); +} + +void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) { + relocInfo::relocType rtype = dest.rspec().reloc()->type(); + unsigned long low_page = (unsigned long)CodeCache::low_bound() >> 12; + unsigned long high_page = (unsigned long)(CodeCache::high_bound()-1) >> 12; + unsigned long dest_page = (unsigned long)dest.target() >> 12; + long offset_low = dest_page - low_page; + long offset_high = dest_page - high_page; + + assert(is_valid_AArch64_address(dest.target()), "bad address"); + assert(dest.getMode() == Address::literal, "ADRP must be applied to a literal address"); + + InstructionMark im(this); + code_section()->relocate(inst_mark(), dest.rspec()); + // 8143067: Ensure that the adrp can reach the dest from anywhere within + // the code cache so that if it is relocated we know it will still reach + if (offset_high >= -(1<<20) && offset_low < (1<<20)) { + _adrp(reg1, dest.target()); + } else { + unsigned long target = (unsigned long)dest.target(); + unsigned long adrp_target + = (target & 0xffffffffUL) | ((unsigned long)pc() & 0xffff00000000UL); + + _adrp(reg1, (address)adrp_target); + movk(reg1, target >> 32, 32); + } + byte_offset = (unsigned long)dest.target() & 0xfff; +} + +void MacroAssembler::load_byte_map_base(Register reg) { + jbyte *byte_map_base = + ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base; + + if (is_valid_AArch64_address((address)byte_map_base)) { + // Strictly speaking the byte_map_base isn't an address at all, + // and it might even be negative. + unsigned long offset; + adrp(reg, ExternalAddress((address)byte_map_base), offset); + // We expect offset to be zero with most collectors. + if (offset != 0) { + add(reg, reg, offset); + } + } else { + mov(reg, (uint64_t)byte_map_base); + } +} + +void MacroAssembler::build_frame(int framesize) { + if (framesize == 0) { + // Is this even possible? + stp(rfp, lr, Address(pre(sp, -2 * wordSize))); + } else if (framesize < ((1 << 9) + 2 * wordSize)) { + sub(sp, sp, framesize); + stp(rfp, lr, Address(sp, framesize - 2 * wordSize)); + } else { + stp(rfp, lr, Address(pre(sp, -2 * wordSize))); + if (framesize < ((1 << 12) + 2 * wordSize)) + sub(sp, sp, framesize - 2 * wordSize); + else { + mov(rscratch1, framesize - 2 * wordSize); + sub(sp, sp, rscratch1); + } + } +} + +void MacroAssembler::remove_frame(int framesize) { + if (framesize == 0) { + ldp(rfp, lr, Address(post(sp, 2 * wordSize))); + } else if (framesize < ((1 << 9) + 2 * wordSize)) { + ldp(rfp, lr, Address(sp, framesize - 2 * wordSize)); + add(sp, sp, framesize); + } else { + if (framesize < ((1 << 12) + 2 * wordSize)) + add(sp, sp, framesize - 2 * wordSize); + else { + mov(rscratch1, framesize - 2 * wordSize); + add(sp, sp, rscratch1); + } + ldp(rfp, lr, Address(post(sp, 2 * wordSize))); + } +} + +// Search for str1 in str2 and return index or -1 +void MacroAssembler::string_indexof(Register str2, Register str1, + Register cnt2, Register cnt1, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + int icnt1, Register result) { + Label BM, LINEARSEARCH, DONE, NOMATCH, MATCH; + + Register ch1 = rscratch1; + Register ch2 = rscratch2; + Register cnt1tmp = tmp1; + Register cnt2tmp = tmp2; + Register cnt1_neg = cnt1; + Register cnt2_neg = cnt2; + Register result_tmp = tmp4; + + // Note, inline_string_indexOf() generates checks: + // if (substr.count > string.count) return -1; + // if (substr.count == 0) return 0; + +// We have two strings, a source string in str2, cnt2 and a pattern string +// in str1, cnt1. Find the 1st occurence of pattern in source or return -1. + +// For larger pattern and source we use a simplified Boyer Moore algorithm. +// With a small pattern and source we use linear scan. + + if (icnt1 == -1) { + cmp(cnt1, 256); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256 + ccmp(cnt1, 8, 0b0000, LO); // Can't handle skip >= 256 because we use + br(LO, LINEARSEARCH); // a byte array. + cmp(cnt1, cnt2, LSR, 2); // Source must be 4 * pattern for BM + br(HS, LINEARSEARCH); + } + +// The Boyer Moore alogorithm is based on the description here:- +// +// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm +// +// This describes and algorithm with 2 shift rules. The 'Bad Character' rule +// and the 'Good Suffix' rule. +// +// These rules are essentially heuristics for how far we can shift the +// pattern along the search string. +// +// The implementation here uses the 'Bad Character' rule only because of the +// complexity of initialisation for the 'Good Suffix' rule. +// +// This is also known as the Boyer-Moore-Horspool algorithm:- +// +// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm +// +// #define ASIZE 128 +// +// int bm(unsigned char *x, int m, unsigned char *y, int n) { +// int i, j; +// unsigned c; +// unsigned char bc[ASIZE]; +// +// /* Preprocessing */ +// for (i = 0; i < ASIZE; ++i) +// bc[i] = 0; +// for (i = 0; i < m - 1; ) { +// c = x[i]; +// ++i; +// if (c < ASIZE) bc[c] = i; +// } +// +// /* Searching */ +// j = 0; +// while (j <= n - m) { +// c = y[i+j]; +// if (x[m-1] == c) +// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i); +// if (i < 0) return j; +// if (c < ASIZE) +// j = j - bc[y[j+m-1]] + m; +// else +// j += 1; // Advance by 1 only if char >= ASIZE +// } +// } + + if (icnt1 == -1) { + BIND(BM); + + Label ZLOOP, BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP; + Label BMADV, BMMATCH, BMCHECKEND; + + Register cnt1end = tmp2; + Register str2end = cnt2; + Register skipch = tmp2; + + // Restrict ASIZE to 128 to reduce stack space/initialisation. + // The presence of chars >= ASIZE in the target string does not affect + // performance, but we must be careful not to initialise them in the stack + // array. + // The presence of chars >= ASIZE in the source string may adversely affect + // performance since we can only advance by one when we encounter one. + + stp(zr, zr, pre(sp, -128)); + for (int i = 1; i < 8; i++) + stp(zr, zr, Address(sp, i*16)); + + mov(cnt1tmp, 0); + sub(cnt1end, cnt1, 1); + BIND(BCLOOP); + ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1))); + cmp(ch1, 128); + add(cnt1tmp, cnt1tmp, 1); + br(HS, BCSKIP); + strb(cnt1tmp, Address(sp, ch1)); + BIND(BCSKIP); + cmp(cnt1tmp, cnt1end); + br(LT, BCLOOP); + + mov(result_tmp, str2); + + sub(cnt2, cnt2, cnt1); + add(str2end, str2, cnt2, LSL, 1); + BIND(BMLOOPSTR2); + sub(cnt1tmp, cnt1, 1); + ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1))); + ldrh(skipch, Address(str2, cnt1tmp, Address::lsl(1))); + cmp(ch1, skipch); + br(NE, BMSKIP); + subs(cnt1tmp, cnt1tmp, 1); + br(LT, BMMATCH); + BIND(BMLOOPSTR1); + ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1))); + ldrh(ch2, Address(str2, cnt1tmp, Address::lsl(1))); + cmp(ch1, ch2); + br(NE, BMSKIP); + subs(cnt1tmp, cnt1tmp, 1); + br(GE, BMLOOPSTR1); + BIND(BMMATCH); + sub(result_tmp, str2, result_tmp); + lsr(result, result_tmp, 1); + add(sp, sp, 128); + b(DONE); + BIND(BMADV); + add(str2, str2, 2); + b(BMCHECKEND); + BIND(BMSKIP); + cmp(skipch, 128); + br(HS, BMADV); + ldrb(ch2, Address(sp, skipch)); + add(str2, str2, cnt1, LSL, 1); + sub(str2, str2, ch2, LSL, 1); + BIND(BMCHECKEND); + cmp(str2, str2end); + br(LE, BMLOOPSTR2); + add(sp, sp, 128); + b(NOMATCH); + } + + BIND(LINEARSEARCH); + { + Label DO1, DO2, DO3; + + Register str2tmp = tmp2; + Register first = tmp3; + + if (icnt1 == -1) + { + Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT, LAST_WORD; + + cmp(cnt1, 4); + br(LT, DOSHORT); + + sub(cnt2, cnt2, cnt1); + sub(cnt1, cnt1, 4); + mov(result_tmp, cnt2); + + lea(str1, Address(str1, cnt1, Address::uxtw(1))); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt1_neg, zr, cnt1, LSL, 1); + sub(cnt2_neg, zr, cnt2, LSL, 1); + ldr(first, Address(str1, cnt1_neg)); + + BIND(FIRST_LOOP); + ldr(ch2, Address(str2, cnt2_neg)); + cmp(first, ch2); + br(EQ, STR1_LOOP); + BIND(STR2_NEXT); + adds(cnt2_neg, cnt2_neg, 2); + br(LE, FIRST_LOOP); + b(NOMATCH); + + BIND(STR1_LOOP); + adds(cnt1tmp, cnt1_neg, 8); + add(cnt2tmp, cnt2_neg, 8); + br(GE, LAST_WORD); + + BIND(STR1_NEXT); + ldr(ch1, Address(str1, cnt1tmp)); + ldr(ch2, Address(str2, cnt2tmp)); + cmp(ch1, ch2); + br(NE, STR2_NEXT); + adds(cnt1tmp, cnt1tmp, 8); + add(cnt2tmp, cnt2tmp, 8); + br(LT, STR1_NEXT); + + BIND(LAST_WORD); + ldr(ch1, Address(str1)); + sub(str2tmp, str2, cnt1_neg); // adjust to corresponding + ldr(ch2, Address(str2tmp, cnt2_neg)); // word in str2 + cmp(ch1, ch2); + br(NE, STR2_NEXT); + b(MATCH); + + BIND(DOSHORT); + cmp(cnt1, 2); + br(LT, DO1); + br(GT, DO3); + } + + if (icnt1 == 4) { + Label CH1_LOOP; + + ldr(ch1, str1); + sub(cnt2, cnt2, 4); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + + BIND(CH1_LOOP); + ldr(ch2, Address(str2, cnt2_neg)); + cmp(ch1, ch2); + br(EQ, MATCH); + adds(cnt2_neg, cnt2_neg, 2); + br(LE, CH1_LOOP); + b(NOMATCH); + } + + if (icnt1 == -1 || icnt1 == 2) { + Label CH1_LOOP; + + BIND(DO2); + ldrw(ch1, str1); + sub(cnt2, cnt2, 2); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + + BIND(CH1_LOOP); + ldrw(ch2, Address(str2, cnt2_neg)); + cmp(ch1, ch2); + br(EQ, MATCH); + adds(cnt2_neg, cnt2_neg, 2); + br(LE, CH1_LOOP); + b(NOMATCH); + } + + if (icnt1 == -1 || icnt1 == 3) { + Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; + + BIND(DO3); + ldrw(first, str1); + ldrh(ch1, Address(str1, 4)); + + sub(cnt2, cnt2, 3); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + + BIND(FIRST_LOOP); + ldrw(ch2, Address(str2, cnt2_neg)); + cmpw(first, ch2); + br(EQ, STR1_LOOP); + BIND(STR2_NEXT); + adds(cnt2_neg, cnt2_neg, 2); + br(LE, FIRST_LOOP); + b(NOMATCH); + + BIND(STR1_LOOP); + add(cnt2tmp, cnt2_neg, 4); + ldrh(ch2, Address(str2, cnt2tmp)); + cmp(ch1, ch2); + br(NE, STR2_NEXT); + b(MATCH); + } + + if (icnt1 == -1 || icnt1 == 1) { + Label CH1_LOOP, HAS_ZERO; + Label DO1_SHORT, DO1_LOOP; + + BIND(DO1); + ldrh(ch1, str1); + cmp(cnt2, 4); + br(LT, DO1_SHORT); + + orr(ch1, ch1, ch1, LSL, 16); + orr(ch1, ch1, ch1, LSL, 32); + + sub(cnt2, cnt2, 4); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + + mov(tmp3, 0x0001000100010001); + BIND(CH1_LOOP); + ldr(ch2, Address(str2, cnt2_neg)); + eor(ch2, ch1, ch2); + sub(tmp1, ch2, tmp3); + orr(tmp2, ch2, 0x7fff7fff7fff7fff); + bics(tmp1, tmp1, tmp2); + br(NE, HAS_ZERO); + adds(cnt2_neg, cnt2_neg, 8); + br(LT, CH1_LOOP); + + cmp(cnt2_neg, 8); + mov(cnt2_neg, 0); + br(LT, CH1_LOOP); + b(NOMATCH); + + BIND(HAS_ZERO); + rev(tmp1, tmp1); + clz(tmp1, tmp1); + add(cnt2_neg, cnt2_neg, tmp1, LSR, 3); + b(MATCH); + + BIND(DO1_SHORT); + mov(result_tmp, cnt2); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2_neg, zr, cnt2, LSL, 1); + BIND(DO1_LOOP); + ldrh(ch2, Address(str2, cnt2_neg)); + cmpw(ch1, ch2); + br(EQ, MATCH); + adds(cnt2_neg, cnt2_neg, 2); + br(LT, DO1_LOOP); + } + } + BIND(NOMATCH); + mov(result, -1); + b(DONE); + BIND(MATCH); + add(result, result_tmp, cnt2_neg, ASR, 1); + BIND(DONE); +} + +// Compare strings. +void MacroAssembler::string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + Register tmp1) { + Label LENGTH_DIFF, DONE, SHORT_LOOP, SHORT_STRING, + NEXT_WORD, DIFFERENCE; + + BLOCK_COMMENT("string_compare {"); + + // Compute the minimum of the string lengths and save the difference. + subsw(tmp1, cnt1, cnt2); + cselw(cnt2, cnt1, cnt2, Assembler::LE); // min + + // A very short string + cmpw(cnt2, 4); + br(Assembler::LT, SHORT_STRING); + + // Check if the strings start at the same location. + cmp(str1, str2); + br(Assembler::EQ, LENGTH_DIFF); + + // Compare longwords + { + subw(cnt2, cnt2, 4); // The last longword is a special case + + // Move both string pointers to the last longword of their + // strings, negate the remaining count, and convert it to bytes. + lea(str1, Address(str1, cnt2, Address::uxtw(1))); + lea(str2, Address(str2, cnt2, Address::uxtw(1))); + sub(cnt2, zr, cnt2, LSL, 1); + + // Loop, loading longwords and comparing them into rscratch2. + bind(NEXT_WORD); + ldr(result, Address(str1, cnt2)); + ldr(cnt1, Address(str2, cnt2)); + adds(cnt2, cnt2, wordSize); + eor(rscratch2, result, cnt1); + cbnz(rscratch2, DIFFERENCE); + br(Assembler::LT, NEXT_WORD); + + // Last longword. In the case where length == 4 we compare the + // same longword twice, but that's still faster than another + // conditional branch. + + ldr(result, Address(str1)); + ldr(cnt1, Address(str2)); + eor(rscratch2, result, cnt1); + cbz(rscratch2, LENGTH_DIFF); + + // Find the first different characters in the longwords and + // compute their difference. + bind(DIFFERENCE); + rev(rscratch2, rscratch2); + clz(rscratch2, rscratch2); + andr(rscratch2, rscratch2, -16); + lsrv(result, result, rscratch2); + uxthw(result, result); + lsrv(cnt1, cnt1, rscratch2); + uxthw(cnt1, cnt1); + subw(result, result, cnt1); + b(DONE); + } + + bind(SHORT_STRING); + // Is the minimum length zero? + cbz(cnt2, LENGTH_DIFF); + + bind(SHORT_LOOP); + load_unsigned_short(result, Address(post(str1, 2))); + load_unsigned_short(cnt1, Address(post(str2, 2))); + subw(result, result, cnt1); + cbnz(result, DONE); + sub(cnt2, cnt2, 1); + cbnz(cnt2, SHORT_LOOP); + + // Strings are equal up to min length. Return the length difference. + bind(LENGTH_DIFF); + mov(result, tmp1); + + // That's it + bind(DONE); + + BLOCK_COMMENT("} string_compare"); +} + + +// base: Address of a buffer to be zeroed, 8 bytes aligned. +// cnt: Count in HeapWords. +// is_large: True when 'cnt' is known to be >= BlockZeroingLowLimit. +void MacroAssembler::zero_words(Register base, Register cnt) +{ + if (UseBlockZeroing) { + block_zero(base, cnt); + } else { + fill_words(base, cnt, zr); + } +} + +// r10 = base: Address of a buffer to be zeroed, 8 bytes aligned. +// cnt: Immediate count in HeapWords. +// r11 = tmp: For use as cnt if we need to call out +#define ShortArraySize (18 * BytesPerLong) +void MacroAssembler::zero_words(Register base, u_int64_t cnt) +{ + Register tmp = r11; + int i = cnt & 1; // store any odd word to start + if (i) str(zr, Address(base)); + + if (cnt <= ShortArraySize / BytesPerLong) { + for (; i < (int)cnt; i += 2) + stp(zr, zr, Address(base, i * wordSize)); + } else if (UseBlockZeroing && cnt >= (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord)) { + mov(tmp, cnt); + block_zero(base, tmp, true); + } else { + const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll + int remainder = cnt % (2 * unroll); + for (; i < remainder; i += 2) + stp(zr, zr, Address(base, i * wordSize)); + + Label loop; + Register cnt_reg = rscratch1; + Register loop_base = rscratch2; + cnt = cnt - remainder; + mov(cnt_reg, cnt); + // adjust base and prebias by -2 * wordSize so we can pre-increment + add(loop_base, base, (remainder - 2) * wordSize); + bind(loop); + sub(cnt_reg, cnt_reg, 2 * unroll); + for (i = 1; i < unroll; i++) + stp(zr, zr, Address(loop_base, 2 * i * wordSize)); + stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize))); + cbnz(cnt_reg, loop); + } +} + +// base: Address of a buffer to be filled, 8 bytes aligned. +// cnt: Count in 8-byte unit. +// value: Value to be filled with. +// base will point to the end of the buffer after filling. +void MacroAssembler::fill_words(Register base, Register cnt, Register value) +{ +// Algorithm: +// +// scratch1 = cnt & 7; +// cnt -= scratch1; +// p += scratch1; +// switch (scratch1) { +// do { +// cnt -= 8; +// p[-8] = v; +// case 7: +// p[-7] = v; +// case 6: +// p[-6] = v; +// // ... +// case 1: +// p[-1] = v; +// case 0: +// p += 8; +// } while (cnt); +// } + + assert_different_registers(base, cnt, value, rscratch1, rscratch2); + + Label fini, skip, entry, loop; + const int unroll = 8; // Number of stp instructions we'll unroll + + cbz(cnt, fini); + tbz(base, 3, skip); + str(value, Address(post(base, 8))); + sub(cnt, cnt, 1); + bind(skip); + + andr(rscratch1, cnt, (unroll-1) * 2); + sub(cnt, cnt, rscratch1); + add(base, base, rscratch1, Assembler::LSL, 3); + adr(rscratch2, entry); + sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 1); + br(rscratch2); + + bind(loop); + add(base, base, unroll * 16); + for (int i = -unroll; i < 0; i++) + stp(value, value, Address(base, i * 16)); + bind(entry); + subs(cnt, cnt, unroll * 2); + br(Assembler::GE, loop); + + tbz(cnt, 0, fini); + str(value, Address(post(base, 8))); + bind(fini); +} + +// Use DC ZVA to do fast zeroing. +// base: Address of a buffer to be zeroed, 8 bytes aligned. +// cnt: Count in HeapWords. +// is_large: True when 'cnt' is known to be >= BlockZeroingLowLimit. +void MacroAssembler::block_zero(Register base, Register cnt, bool is_large) +{ + Label small; + Label store_pair, loop_store_pair, done; + Label base_aligned; + + assert_different_registers(base, cnt, rscratch1); + guarantee(base == r10 && cnt == r11, "fix register usage"); + + Register tmp = rscratch1; + Register tmp2 = rscratch2; + int zva_length = VM_Version::zva_length(); + + // Ensure ZVA length can be divided by 16. This is required by + // the subsequent operations. + assert (zva_length % 16 == 0, "Unexpected ZVA Length"); + + if (!is_large) cbz(cnt, done); + tbz(base, 3, base_aligned); + str(zr, Address(post(base, 8))); + sub(cnt, cnt, 1); + bind(base_aligned); + + // Ensure count >= zva_length * 2 so that it still deserves a zva after + // alignment. + if (!is_large || !(BlockZeroingLowLimit >= zva_length * 2)) { + int low_limit = MAX2(zva_length * 2, (int)BlockZeroingLowLimit); + subs(tmp, cnt, low_limit >> 3); + br(Assembler::LT, small); + } + + far_call(StubRoutines::aarch64::get_zero_longs()); + + bind(small); + + const int unroll = 8; // Number of stp instructions we'll unroll + Label small_loop, small_table_end; + + andr(tmp, cnt, (unroll-1) * 2); + sub(cnt, cnt, tmp); + add(base, base, tmp, Assembler::LSL, 3); + adr(tmp2, small_table_end); + sub(tmp2, tmp2, tmp, Assembler::LSL, 1); + br(tmp2); + + bind(small_loop); + add(base, base, unroll * 16); + for (int i = -unroll; i < 0; i++) + stp(zr, zr, Address(base, i * 16)); + bind(small_table_end); + subs(cnt, cnt, unroll * 2); + br(Assembler::GE, small_loop); + + tbz(cnt, 0, done); + str(zr, Address(post(base, 8))); + + bind(done); +} + +void MacroAssembler::string_equals(Register str1, Register str2, + Register cnt, Register result, + Register tmp1) { + Label SAME_CHARS, DONE, SHORT_LOOP, SHORT_STRING, + NEXT_WORD; + + const Register tmp2 = rscratch1; + assert_different_registers(str1, str2, cnt, result, tmp1, tmp2, rscratch2); + + BLOCK_COMMENT("string_equals {"); + + // Start by assuming that the strings are not equal. + mov(result, zr); + + // A very short string + cmpw(cnt, 4); + br(Assembler::LT, SHORT_STRING); + + // Check if the strings start at the same location. + cmp(str1, str2); + br(Assembler::EQ, SAME_CHARS); + + // Compare longwords + { + subw(cnt, cnt, 4); // The last longword is a special case + + // Move both string pointers to the last longword of their + // strings, negate the remaining count, and convert it to bytes. + lea(str1, Address(str1, cnt, Address::uxtw(1))); + lea(str2, Address(str2, cnt, Address::uxtw(1))); + sub(cnt, zr, cnt, LSL, 1); + + // Loop, loading longwords and comparing them into rscratch2. + bind(NEXT_WORD); + ldr(tmp1, Address(str1, cnt)); + ldr(tmp2, Address(str2, cnt)); + adds(cnt, cnt, wordSize); + eor(rscratch2, tmp1, tmp2); + cbnz(rscratch2, DONE); + br(Assembler::LT, NEXT_WORD); + + // Last longword. In the case where length == 4 we compare the + // same longword twice, but that's still faster than another + // conditional branch. + + ldr(tmp1, Address(str1)); + ldr(tmp2, Address(str2)); + eor(rscratch2, tmp1, tmp2); + cbz(rscratch2, SAME_CHARS); + b(DONE); + } + + bind(SHORT_STRING); + // Is the length zero? + cbz(cnt, SAME_CHARS); + + bind(SHORT_LOOP); + load_unsigned_short(tmp1, Address(post(str1, 2))); + load_unsigned_short(tmp2, Address(post(str2, 2))); + subw(tmp1, tmp1, tmp2); + cbnz(tmp1, DONE); + sub(cnt, cnt, 1); + cbnz(cnt, SHORT_LOOP); + + // Strings are equal. + bind(SAME_CHARS); + mov(result, true); + + // That's it + bind(DONE); + + BLOCK_COMMENT("} string_equals"); +} + +// Compare char[] arrays aligned to 4 bytes +void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, + Register result, Register tmp1) +{ + Register cnt1 = rscratch1; + Register cnt2 = rscratch2; + Register tmp2 = rscratch2; + + Label SAME, DIFFER, NEXT, TAIL03, TAIL01; + + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + BLOCK_COMMENT("char_arrays_equals {"); + + // different until proven equal + mov(result, false); + + // same array? + cmp(ary1, ary2); + br(Assembler::EQ, SAME); + + // ne if either null + cbz(ary1, DIFFER); + cbz(ary2, DIFFER); + + // lengths ne? + ldrw(cnt1, Address(ary1, length_offset)); + ldrw(cnt2, Address(ary2, length_offset)); + cmp(cnt1, cnt2); + br(Assembler::NE, DIFFER); + + lea(ary1, Address(ary1, base_offset)); + lea(ary2, Address(ary2, base_offset)); + + subs(cnt1, cnt1, 4); + br(LT, TAIL03); + + BIND(NEXT); + ldr(tmp1, Address(post(ary1, 8))); + ldr(tmp2, Address(post(ary2, 8))); + subs(cnt1, cnt1, 4); + eor(tmp1, tmp1, tmp2); + cbnz(tmp1, DIFFER); + br(GE, NEXT); + + BIND(TAIL03); // 0-3 chars left, cnt1 = #chars left - 4 + tst(cnt1, 0b10); + br(EQ, TAIL01); + ldrw(tmp1, Address(post(ary1, 4))); + ldrw(tmp2, Address(post(ary2, 4))); + cmp(tmp1, tmp2); + br(NE, DIFFER); + BIND(TAIL01); // 0-1 chars left + tst(cnt1, 0b01); + br(EQ, SAME); + ldrh(tmp1, ary1); + ldrh(tmp2, ary2); + cmp(tmp1, tmp2); + br(NE, DIFFER); + + BIND(SAME); + mov(result, true); + BIND(DIFFER); // result already set + + BLOCK_COMMENT("} char_arrays_equals"); +} + +// encode char[] to byte[] in ISO_8859_1 +void MacroAssembler::encode_iso_array(Register src, Register dst, + Register len, Register result, + FloatRegister Vtmp1, FloatRegister Vtmp2, + FloatRegister Vtmp3, FloatRegister Vtmp4) +{ + Label DONE, NEXT_32, LOOP_8, NEXT_8, LOOP_1, NEXT_1; + Register tmp1 = rscratch1; + + mov(result, len); // Save initial len + + subs(len, len, 32); + br(LT, LOOP_8); + +// The following code uses the SIMD 'uqxtn' and 'uqxtn2' instructions +// to convert chars to bytes. These set the 'QC' bit in the FPSR if +// any char could not fit in a byte, so clear the FPSR so we can test it. + clear_fpsr(); + + BIND(NEXT_32); + ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src); + uqxtn(Vtmp1, T8B, Vtmp1, T8H); // uqxtn - write bottom half + uqxtn(Vtmp1, T16B, Vtmp2, T8H); // uqxtn2 - write top half + uqxtn(Vtmp2, T8B, Vtmp3, T8H); + uqxtn(Vtmp2, T16B, Vtmp4, T8H); // uqxtn2 + get_fpsr(tmp1); + cbnzw(tmp1, LOOP_8); + st1(Vtmp1, Vtmp2, T16B, post(dst, 32)); + subs(len, len, 32); + add(src, src, 64); + br(GE, NEXT_32); + + BIND(LOOP_8); + adds(len, len, 32-8); + br(LT, LOOP_1); + clear_fpsr(); // QC may be set from loop above, clear again + BIND(NEXT_8); + ld1(Vtmp1, T8H, src); + uqxtn(Vtmp1, T8B, Vtmp1, T8H); + get_fpsr(tmp1); + cbnzw(tmp1, LOOP_1); + st1(Vtmp1, T8B, post(dst, 8)); + subs(len, len, 8); + add(src, src, 16); + br(GE, NEXT_8); + + BIND(LOOP_1); + adds(len, len, 8); + br(LE, DONE); + + BIND(NEXT_1); + ldrh(tmp1, Address(post(src, 2))); + tst(tmp1, 0xff00); + br(NE, DONE); + strb(tmp1, Address(post(dst, 1))); + subs(len, len, 1); + br(GT, NEXT_1); + + BIND(DONE); + sub(result, result, len); // Return index where we stopped +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp 2021-01-25 19:31:49.985615726 +0000 @@ -0,0 +1,1297 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_MACROASSEMBLER_AARCH64_HPP +#define CPU_AARCH64_VM_MACROASSEMBLER_AARCH64_HPP + +#include "asm/assembler.hpp" + +// MacroAssembler extends Assembler by frequently used macros. +// +// Instructions for which a 'better' code sequence exists depending +// on arguments should also go in here. + +class MacroAssembler: public Assembler { + friend class LIR_Assembler; + + public: + using Assembler::mov; + using Assembler::movi; + + protected: + + // Support for VM calls + // + // This is the base routine called by the different versions of call_VM_leaf. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). +#ifdef CC_INTERP + // c++ interpreter never wants to use interp_masm version of call_VM + #define VIRTUAL +#else + #define VIRTUAL virtual +#endif + + VIRTUAL void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments, // the number of arguments to pop after the call + Label *retaddr = NULL + ); + + VIRTUAL void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments, // the number of arguments to pop after the call + Label &retaddr) { + call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); + } + + // This is the base routine called by the different versions of call_VM. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + // + // If no java_thread register is specified (noreg) than rthread will be used instead. call_VM_base + // returns the register which contains the thread upon return. If a thread register has been + // specified, the return value will correspond to that register. If no last_java_sp is specified + // (noreg) than rsp will be used instead. + VIRTUAL void call_VM_base( // returns the register containing the thread upon return + Register oop_result, // where an oop-result ends up if any; use noreg otherwise + Register java_thread, // the thread if computed before ; use noreg otherwise + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise + address entry_point, // the entry point + int number_of_arguments, // the number of arguments (w/o thread) to pop after the call + bool check_exceptions // whether to check for pending exceptions after return + ); + + // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. + // The implementation is only non-empty for the InterpreterMacroAssembler, + // as only the interpreter handles PopFrame and ForceEarlyReturn requests. + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); + + // Maximum size of class area in Metaspace when compressed + uint64_t use_XOR_for_compressed_class_base; + + public: + MacroAssembler(CodeBuffer* code) : Assembler(code) { + use_XOR_for_compressed_class_base + = (operand_valid_for_logical_immediate(false /*is32*/, + (uint64_t)Universe::narrow_klass_base()) + && ((uint64_t)Universe::narrow_klass_base() + > (1u << log2_intptr(CompressedClassSpaceSize)))); + } + + // Biased locking support + // lock_reg and obj_reg must be loaded up with the appropriate values. + // swap_reg is killed. + // tmp_reg must be supplied and must not be rscratch1 or rscratch2 + // Optional slow case is for implementations (interpreter and C1) which branch to + // slow case directly. Leaves condition codes set for C2's Fast_Lock node. + // Returns offset of first potentially-faulting instruction for null + // check info (currently consumed only by C1). If + // swap_reg_contains_mark is true then returns -1 as it is assumed + // the calling code has already passed any potential faults. + int biased_locking_enter(Register lock_reg, Register obj_reg, + Register swap_reg, Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, Label* slow_case = NULL, + BiasedLockingCounters* counters = NULL); + void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); + + + // Helper functions for statistics gathering. + // Unconditional atomic increment. + void atomic_incw(Register counter_addr, Register tmp, Register tmp2); + void atomic_incw(Address counter_addr, Register tmp1, Register tmp2, Register tmp3) { + lea(tmp1, counter_addr); + atomic_incw(tmp1, tmp2, tmp3); + } + // Load Effective Address + void lea(Register r, const Address &a) { + InstructionMark im(this); + code_section()->relocate(inst_mark(), a.rspec()); + a.lea(this, r); + } + + void addmw(Address a, Register incr, Register scratch) { + ldrw(scratch, a); + addw(scratch, scratch, incr); + strw(scratch, a); + } + + // Add constant to memory word + void addmw(Address a, int imm, Register scratch) { + ldrw(scratch, a); + if (imm > 0) + addw(scratch, scratch, (unsigned)imm); + else + subw(scratch, scratch, (unsigned)-imm); + strw(scratch, a); + } + + // Frame creation and destruction shared between JITs. + void build_frame(int framesize); + void remove_frame(int framesize); + + virtual void _call_Unimplemented(address call_site) { + mov(rscratch2, call_site); + } + +#define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) + + // aliases defined in AARCH64 spec + + template + inline void cmpw(Register Rd, T imm) { subsw(zr, Rd, imm); } + inline void cmp(Register Rd, unsigned imm) { subs(zr, Rd, imm); } + + inline void cmnw(Register Rd, unsigned imm) { addsw(zr, Rd, imm); } + inline void cmn(Register Rd, unsigned imm) { adds(zr, Rd, imm); } + + void cset(Register Rd, Assembler::Condition cond) { + csinc(Rd, zr, zr, ~cond); + } + void csetw(Register Rd, Assembler::Condition cond) { + csincw(Rd, zr, zr, ~cond); + } + + void cneg(Register Rd, Register Rn, Assembler::Condition cond) { + csneg(Rd, Rn, Rn, ~cond); + } + void cnegw(Register Rd, Register Rn, Assembler::Condition cond) { + csnegw(Rd, Rn, Rn, ~cond); + } + + inline void movw(Register Rd, Register Rn) { + if (Rd == sp || Rn == sp) { + addw(Rd, Rn, 0U); + } else { + orrw(Rd, zr, Rn); + } + } + inline void mov(Register Rd, Register Rn) { + assert(Rd != r31_sp && Rn != r31_sp, "should be"); + if (Rd == Rn) { + } else if (Rd == sp || Rn == sp) { + add(Rd, Rn, 0U); + } else { + orr(Rd, zr, Rn); + } + } + + inline void moviw(Register Rd, unsigned imm) { orrw(Rd, zr, imm); } + inline void movi(Register Rd, unsigned imm) { orr(Rd, zr, imm); } + + inline void tstw(Register Rd, Register Rn) { andsw(zr, Rd, Rn); } + inline void tst(Register Rd, Register Rn) { ands(zr, Rd, Rn); } + + inline void tstw(Register Rd, uint64_t imm) { andsw(zr, Rd, imm); } + inline void tst(Register Rd, uint64_t imm) { ands(zr, Rd, imm); } + + inline void bfiw(Register Rd, Register Rn, unsigned lsb, unsigned width) { + bfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1)); + } + inline void bfi(Register Rd, Register Rn, unsigned lsb, unsigned width) { + bfm(Rd, Rn, ((64 - lsb) & 63), (width - 1)); + } + + inline void bfxilw(Register Rd, Register Rn, unsigned lsb, unsigned width) { + bfmw(Rd, Rn, lsb, (lsb + width - 1)); + } + inline void bfxil(Register Rd, Register Rn, unsigned lsb, unsigned width) { + bfm(Rd, Rn, lsb , (lsb + width - 1)); + } + + inline void sbfizw(Register Rd, Register Rn, unsigned lsb, unsigned width) { + sbfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1)); + } + inline void sbfiz(Register Rd, Register Rn, unsigned lsb, unsigned width) { + sbfm(Rd, Rn, ((64 - lsb) & 63), (width - 1)); + } + + inline void sbfxw(Register Rd, Register Rn, unsigned lsb, unsigned width) { + sbfmw(Rd, Rn, lsb, (lsb + width - 1)); + } + inline void sbfx(Register Rd, Register Rn, unsigned lsb, unsigned width) { + sbfm(Rd, Rn, lsb , (lsb + width - 1)); + } + + inline void ubfizw(Register Rd, Register Rn, unsigned lsb, unsigned width) { + ubfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1)); + } + inline void ubfiz(Register Rd, Register Rn, unsigned lsb, unsigned width) { + ubfm(Rd, Rn, ((64 - lsb) & 63), (width - 1)); + } + + inline void ubfxw(Register Rd, Register Rn, unsigned lsb, unsigned width) { + ubfmw(Rd, Rn, lsb, (lsb + width - 1)); + } + inline void ubfx(Register Rd, Register Rn, unsigned lsb, unsigned width) { + ubfm(Rd, Rn, lsb , (lsb + width - 1)); + } + + inline void asrw(Register Rd, Register Rn, unsigned imm) { + sbfmw(Rd, Rn, imm, 31); + } + + inline void asr(Register Rd, Register Rn, unsigned imm) { + sbfm(Rd, Rn, imm, 63); + } + + inline void lslw(Register Rd, Register Rn, unsigned imm) { + ubfmw(Rd, Rn, ((32 - imm) & 31), (31 - imm)); + } + + inline void lsl(Register Rd, Register Rn, unsigned imm) { + ubfm(Rd, Rn, ((64 - imm) & 63), (63 - imm)); + } + + inline void lsrw(Register Rd, Register Rn, unsigned imm) { + ubfmw(Rd, Rn, imm, 31); + } + + inline void lsr(Register Rd, Register Rn, unsigned imm) { + ubfm(Rd, Rn, imm, 63); + } + + inline void rorw(Register Rd, Register Rn, unsigned imm) { + extrw(Rd, Rn, Rn, imm); + } + + inline void ror(Register Rd, Register Rn, unsigned imm) { + extr(Rd, Rn, Rn, imm); + } + + inline void sxtbw(Register Rd, Register Rn) { + sbfmw(Rd, Rn, 0, 7); + } + inline void sxthw(Register Rd, Register Rn) { + sbfmw(Rd, Rn, 0, 15); + } + inline void sxtb(Register Rd, Register Rn) { + sbfm(Rd, Rn, 0, 7); + } + inline void sxth(Register Rd, Register Rn) { + sbfm(Rd, Rn, 0, 15); + } + inline void sxtw(Register Rd, Register Rn) { + sbfm(Rd, Rn, 0, 31); + } + + inline void uxtbw(Register Rd, Register Rn) { + ubfmw(Rd, Rn, 0, 7); + } + inline void uxthw(Register Rd, Register Rn) { + ubfmw(Rd, Rn, 0, 15); + } + inline void uxtb(Register Rd, Register Rn) { + ubfm(Rd, Rn, 0, 7); + } + inline void uxth(Register Rd, Register Rn) { + ubfm(Rd, Rn, 0, 15); + } + inline void uxtw(Register Rd, Register Rn) { + ubfm(Rd, Rn, 0, 31); + } + + inline void cmnw(Register Rn, Register Rm) { + addsw(zr, Rn, Rm); + } + inline void cmn(Register Rn, Register Rm) { + adds(zr, Rn, Rm); + } + + inline void cmpw(Register Rn, Register Rm) { + subsw(zr, Rn, Rm); + } + inline void cmp(Register Rn, Register Rm) { + subs(zr, Rn, Rm); + } + + inline void negw(Register Rd, Register Rn) { + subw(Rd, zr, Rn); + } + + inline void neg(Register Rd, Register Rn) { + sub(Rd, zr, Rn); + } + + inline void negsw(Register Rd, Register Rn) { + subsw(Rd, zr, Rn); + } + + inline void negs(Register Rd, Register Rn) { + subs(Rd, zr, Rn); + } + + inline void cmnw(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { + addsw(zr, Rn, Rm, kind, shift); + } + inline void cmn(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { + adds(zr, Rn, Rm, kind, shift); + } + + inline void cmpw(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { + subsw(zr, Rn, Rm, kind, shift); + } + inline void cmp(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { + subs(zr, Rn, Rm, kind, shift); + } + + inline void negw(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { + subw(Rd, zr, Rn, kind, shift); + } + + inline void neg(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { + sub(Rd, zr, Rn, kind, shift); + } + + inline void negsw(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { + subsw(Rd, zr, Rn, kind, shift); + } + + inline void negs(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { + subs(Rd, zr, Rn, kind, shift); + } + + inline void mnegw(Register Rd, Register Rn, Register Rm) { + msubw(Rd, Rn, Rm, zr); + } + inline void mneg(Register Rd, Register Rn, Register Rm) { + msub(Rd, Rn, Rm, zr); + } + + inline void mulw(Register Rd, Register Rn, Register Rm) { + maddw(Rd, Rn, Rm, zr); + } + inline void mul(Register Rd, Register Rn, Register Rm) { + madd(Rd, Rn, Rm, zr); + } + + inline void smnegl(Register Rd, Register Rn, Register Rm) { + smsubl(Rd, Rn, Rm, zr); + } + inline void smull(Register Rd, Register Rn, Register Rm) { + smaddl(Rd, Rn, Rm, zr); + } + + inline void umnegl(Register Rd, Register Rn, Register Rm) { + umsubl(Rd, Rn, Rm, zr); + } + inline void umull(Register Rd, Register Rn, Register Rm) { + umaddl(Rd, Rn, Rm, zr); + } + +#define WRAP(INSN) \ + void INSN(Register Rd, Register Rn, Register Rm, Register Ra) { \ + if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_A53MAC) && Ra != zr) \ + nop(); \ + Assembler::INSN(Rd, Rn, Rm, Ra); \ + } + + WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw) + WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl) +#undef WRAP + + // macro assembly operations needed for aarch64 + + // first two private routines for loading 32 bit or 64 bit constants +private: + + void mov_immediate64(Register dst, u_int64_t imm64); + void mov_immediate32(Register dst, u_int32_t imm32); + + int push(unsigned int bitset, Register stack); + int pop(unsigned int bitset, Register stack); + + void mov(Register dst, Address a); + +public: + void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); } + void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); } + + // Push and pop everything that might be clobbered by a native + // runtime call except rscratch1 and rscratch2. (They are always + // scratch, so we don't have to protect them.) Only save the lower + // 64 bits of each vector register. + void push_call_clobbered_registers(); + void pop_call_clobbered_registers(); + + // now mov instructions for loading absolute addresses and 32 or + // 64 bit integers + + inline void mov(Register dst, address addr) + { + mov_immediate64(dst, (u_int64_t)addr); + } + + inline void mov(Register dst, u_int64_t imm64) + { + mov_immediate64(dst, imm64); + } + + inline void movw(Register dst, u_int32_t imm32) + { + mov_immediate32(dst, imm32); + } + + inline void mov(Register dst, long l) + { + mov(dst, (u_int64_t)l); + } + + inline void mov(Register dst, int i) + { + mov(dst, (long)i); + } + + void mov(Register dst, RegisterOrConstant src) { + if (src.is_register()) + mov(dst, src.as_register()); + else + mov(dst, src.as_constant()); + } + + void movptr(Register r, uintptr_t imm64); + + void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32); + + void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { + orr(Vd, T, Vn, Vn); + } + +public: + + // Generalized Test Bit And Branch, including a "far" variety which + // spans more than 32KiB. + void tbr(Condition cond, Register Rt, int bitpos, Label &dest, bool far = false) { + assert(cond == EQ || cond == NE, "must be"); + + if (far) + cond = ~cond; + + void (Assembler::* branch)(Register Rt, int bitpos, Label &L); + if (cond == Assembler::EQ) + branch = &Assembler::tbz; + else + branch = &Assembler::tbnz; + + if (far) { + Label L; + (this->*branch)(Rt, bitpos, L); + b(dest); + bind(L); + } else { + (this->*branch)(Rt, bitpos, dest); + } + } + + // macro instructions for accessing and updating floating point + // status register + // + // FPSR : op1 == 011 + // CRn == 0100 + // CRm == 0100 + // op2 == 001 + + inline void get_fpsr(Register reg) + { + mrs(0b11, 0b0100, 0b0100, 0b001, reg); + } + + inline void set_fpsr(Register reg) + { + msr(0b011, 0b0100, 0b0100, 0b001, reg); + } + + inline void clear_fpsr() + { + msr(0b011, 0b0100, 0b0100, 0b001, zr); + } + + // DCZID_EL0: op1 == 011 + // CRn == 0000 + // CRm == 0000 + // op2 == 111 + inline void get_dczid_el0(Register reg) + { + mrs(0b011, 0b0000, 0b0000, 0b111, reg); + } + + // CTR_EL0: op1 == 011 + // CRn == 0000 + // CRm == 0000 + // op2 == 001 + inline void get_ctr_el0(Register reg) + { + mrs(0b011, 0b0000, 0b0000, 0b001, reg); + } + + // idiv variant which deals with MINLONG as dividend and -1 as divisor + int corrected_idivl(Register result, Register ra, Register rb, + bool want_remainder, Register tmp = rscratch1); + int corrected_idivq(Register result, Register ra, Register rb, + bool want_remainder, Register tmp = rscratch1); + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generation is needed if the offset is within a certain + // range (0 <= offset <= page_size). + + virtual void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); + + static address target_addr_for_insn(address insn_addr, unsigned insn); + static address target_addr_for_insn(address insn_addr) { + unsigned insn = *(unsigned*)insn_addr; + return target_addr_for_insn(insn_addr, insn); + } + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. + static int pd_patch_instruction_size(address branch, address target); + static void pd_patch_instruction(address branch, address target) { + pd_patch_instruction_size(branch, target); + } + static address pd_call_destination(address branch) { + return target_addr_for_insn(branch); + } +#ifndef PRODUCT + static void pd_print_patched_instruction(address branch); +#endif + + static int patch_oop(address insn_addr, address o); + + address emit_trampoline_stub(int insts_call_instruction_offset, address target); + + // The following 4 methods return the offset of the appropriate move instruction + + // Support for fast byte/short loading with zero extension (depending on particular CPU) + int load_unsigned_byte(Register dst, Address src); + int load_unsigned_short(Register dst, Address src); + + // Support for fast byte/short loading with sign extension (depending on particular CPU) + int load_signed_byte(Register dst, Address src); + int load_signed_short(Register dst, Address src); + + int load_signed_byte32(Register dst, Address src); + int load_signed_short32(Register dst, Address src); + + // Support for sign-extension (hi:lo = extend_sign(lo)) + void extend_sign(Register hi, Register lo); + + // Load and store values by size and signed-ness + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); + void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + // Support for inc/dec with optimal instruction selection depending on value + + // x86_64 aliases an unqualified register/address increment and + // decrement to call incrementq and decrementq but also supports + // explicitly sized calls to incrementq/decrementq or + // incrementl/decrementl + + // for aarch64 the proper convention would be to use + // increment/decrement for 64 bit operatons and + // incrementw/decrementw for 32 bit operations. so when porting + // x86_64 code we can leave calls to increment/decrement as is, + // replace incrementq/decrementq with increment/decrement and + // replace incrementl/decrementl with incrementw/decrementw. + + // n.b. increment/decrement calls with an Address destination will + // need to use a scratch register to load the value to be + // incremented. increment/decrement calls which add or subtract a + // constant value greater than 2^12 will need to use a 2nd scratch + // register to hold the constant. so, a register increment/decrement + // may trash rscratch2 and an address increment/decrement trash + // rscratch and rscratch2 + + void decrementw(Address dst, int value = 1); + void decrementw(Register reg, int value = 1); + + void decrement(Register reg, int value = 1); + void decrement(Address dst, int value = 1); + + void incrementw(Address dst, int value = 1); + void incrementw(Register reg, int value = 1); + + void increment(Register reg, int value = 1); + void increment(Address dst, int value = 1); + + + // Alignment + void align(int modulus); + + // Stack frame creation/removal + void enter() + { + stp(rfp, lr, Address(pre(sp, -2 * wordSize))); + mov(rfp, sp); + } + void leave() + { + mov(sp, rfp); + ldp(rfp, lr, Address(post(sp, 2 * wordSize))); + } + + // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) + // The pointer will be loaded into the thread register. + void get_thread(Register thread); + + + // Support for VM calls + // + // It is imperative that all calls into the VM are handled via the call_VM macros. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + + + void call_VM(Register oop_result, + address entry_point, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + // Overloadings with last_Java_sp + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments = 0, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, bool + check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + void get_vm_result (Register oop_result, Register thread); + void get_vm_result_2(Register metadata_result, Register thread); + + // These always tightly bind to MacroAssembler::call_VM_base + // bypassing the virtual implementation + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); + + void call_VM_leaf(address entry_point, + int number_of_arguments = 0); + void call_VM_leaf(address entry_point, + Register arg_1); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2, Register arg_3); + + // These always tightly bind to MacroAssembler::call_VM_leaf_base + // bypassing the virtual implementation + void super_call_VM_leaf(address entry_point); + void super_call_VM_leaf(address entry_point, Register arg_1); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); + + // last Java Frame (fills frame anchor) + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc, + Register scratch); + + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Label &last_java_pc, + Register scratch); + + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Register last_java_pc, + Register scratch); + + void reset_last_Java_frame(Register thread); + + // thread in the default location (rthread) + void reset_last_Java_frame(bool clear_fp); + + // Stores + void store_check(Register obj); // store check for obj - register is destroyed afterwards + void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) + +#if INCLUDE_ALL_GCS + + void g1_write_barrier_pre(Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + + void g1_write_barrier_post(Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2); + +#endif // INCLUDE_ALL_GCS + + // split store_check(Register obj) to enhance instruction interleaving + void store_check_part_1(Register obj); + void store_check_part_2(Register obj); + + // C 'boolean' to Java boolean: x == 0 ? 0 : 1 + void c2bool(Register x); + + // oop manipulations + void load_klass(Register dst, Register src); + void store_klass(Register dst, Register src); + void cmp_klass(Register oop, Register trial_klass, Register tmp); + + void load_heap_oop(Register dst, Address src); + + void load_heap_oop_not_null(Register dst, Address src); + void store_heap_oop(Address dst, Register src); + + // currently unimplemented + // Used for storing NULL. All other oop constants should be + // stored using routines that take a jobject. + void store_heap_oop_null(Address dst); + + void load_prototype_header(Register dst, Register src); + + void store_klass_gap(Register dst, Register src); + + // This dummy is to prevent a call to store_heap_oop from + // converting a zero (like NULL) into a Register by giving + // the compiler two choices it can't resolve + + void store_heap_oop(Address dst, void* dummy); + + void encode_heap_oop(Register d, Register s); + void encode_heap_oop(Register r) { encode_heap_oop(r, r); } + void decode_heap_oop(Register d, Register s); + void decode_heap_oop(Register r) { decode_heap_oop(r, r); } + void encode_heap_oop_not_null(Register r); + void decode_heap_oop_not_null(Register r); + void encode_heap_oop_not_null(Register dst, Register src); + void decode_heap_oop_not_null(Register dst, Register src); + + void set_narrow_oop(Register dst, jobject obj); + + void encode_klass_not_null(Register r); + void decode_klass_not_null(Register r); + void encode_klass_not_null(Register dst, Register src); + void decode_klass_not_null(Register dst, Register src); + + void set_narrow_klass(Register dst, Klass* k); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); + + DEBUG_ONLY(void verify_heapbase(const char* msg);) + + void push_CPU_state(bool save_vectors = false); + void pop_CPU_state(bool restore_vectors = false) ; + + // Round up to a power of two + void round_to(Register reg, int modulus); + + // allocation + void eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address + void verify_tlab(); + + void incr_allocated_bytes(Register thread, + Register var_size_in_bytes, int con_size_in_bytes, + Register t1 = noreg); + + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& no_such_interface, + bool return_method = true); + + // virtual method calling + // n.b. x86 allows RegisterOrConstant for vtable_index + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg and temp2_reg can be noreg, if no temps are available. + // Updates the sub's secondary super cache as necessary. + // If set_cond_codes, condition codes will be Z on success, NZ on failure. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success); + + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + + + // Debugging + + // only if +VerifyOops + void verify_oop(Register reg, const char* s = "broken oop"); + void verify_oop_addr(Address addr, const char * s = "broken oop addr"); + +// TODO: verify method and klass metadata (compare against vptr?) + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} + +#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) +#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + // only if +VerifyFPU + void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); + + // prints msg, dumps registers and stops execution + void stop(const char* msg); + + // prints msg and continues + void warn(const char* msg); + + static void debug64(char* msg, int64_t pc, int64_t regs[]); + + void untested() { stop("untested"); } + + void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, 1024, "unimplemented: %s", what); stop(b); } + + void should_not_reach_here() { stop("should not reach here"); } + + // Stack overflow checking + void bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); + mov(rscratch2, -offset); + str(zr, Address(sp, rscratch2)); + } + + // Writes to stack successive pages until offset reached to check for + // stack overflow + shadow pages. Also, clobbers tmp + void bang_stack_size(Register size, Register tmp); + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset); + + // Support for serializing memory accesses between threads + void serialize_memory(Register thread, Register tmp); + + // Arithmetics + + void addptr(const Address &dst, int32_t src); + void cmpptr(Register src1, Address src2); + + void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, + Label &suceed, Label *fail); + + void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp, + Label &suceed, Label *fail); + + void atomic_add(Register prev, RegisterOrConstant incr, Register addr); + void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); + void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); + void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); + + void atomic_xchg(Register prev, Register newv, Register addr); + void atomic_xchgw(Register prev, Register newv, Register addr); + void atomic_xchgal(Register prev, Register newv, Register addr); + void atomic_xchgalw(Register prev, Register newv, Register addr); + + void orptr(Address adr, RegisterOrConstant src) { + ldr(rscratch2, adr); + if (src.is_register()) + orr(rscratch2, rscratch2, src.as_register()); + else + orr(rscratch2, rscratch2, src.as_constant()); + str(rscratch2, adr); + } + + // A generic CAS; success or failure is in the EQ flag. + void cmpxchg(Register addr, Register expected, Register new_val, + enum operand_size size, + bool acquire, bool release, + Register tmp = rscratch1); + + // Calls + + address trampoline_call(Address entry, CodeBuffer *cbuf = NULL); + + static bool far_branches() { + return ReservedCodeCacheSize > branch_range; + } + + // Jumps that can reach anywhere in the code cache. + // Trashes tmp. + void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1); + void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1); + + static int far_branch_size() { + if (far_branches()) { + return 3 * 4; // adrp, add, br + } else { + return 4; + } + } + + // Emit the CompiledIC call idiom + address ic_call(address entry); + +public: + + // Data + + void mov_metadata(Register dst, Metadata* obj); + Address allocate_metadata_address(Metadata* obj); + Address constant_oop_address(jobject obj); + + void movoop(Register dst, jobject obj, bool immediate = false); + + // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. + void kernel_crc32(Register crc, Register buf, Register len, + Register table0, Register table1, Register table2, Register table3, + Register tmp, Register tmp2, Register tmp3); + +#undef VIRTUAL + + // Stack push and pop individual 64 bit registers + void push(Register src); + void pop(Register dst); + + // push all registers onto the stack + void pusha(); + void popa(); + + void repne_scan(Register addr, Register value, Register count, + Register scratch); + void repne_scanw(Register addr, Register value, Register count, + Register scratch); + + typedef void (MacroAssembler::* add_sub_imm_insn)(Register Rd, Register Rn, unsigned imm); + typedef void (MacroAssembler::* add_sub_reg_insn)(Register Rd, Register Rn, Register Rm, enum shift_kind kind, unsigned shift); + + // If a constant does not fit in an immediate field, generate some + // number of MOV instructions and then perform the operation + void wrap_add_sub_imm_insn(Register Rd, Register Rn, unsigned imm, + add_sub_imm_insn insn1, + add_sub_reg_insn insn2); + // Seperate vsn which sets the flags + void wrap_adds_subs_imm_insn(Register Rd, Register Rn, unsigned imm, + add_sub_imm_insn insn1, + add_sub_reg_insn insn2); + +#define WRAP(INSN) \ + void INSN(Register Rd, Register Rn, unsigned imm) { \ + wrap_add_sub_imm_insn(Rd, Rn, imm, &Assembler::INSN, &Assembler::INSN); \ + } \ + \ + void INSN(Register Rd, Register Rn, Register Rm, \ + enum shift_kind kind, unsigned shift = 0) { \ + Assembler::INSN(Rd, Rn, Rm, kind, shift); \ + } \ + \ + void INSN(Register Rd, Register Rn, Register Rm) { \ + Assembler::INSN(Rd, Rn, Rm); \ + } \ + \ + void INSN(Register Rd, Register Rn, Register Rm, \ + ext::operation option, int amount = 0) { \ + Assembler::INSN(Rd, Rn, Rm, option, amount); \ + } + + WRAP(add) WRAP(addw) WRAP(sub) WRAP(subw) + +#undef WRAP +#define WRAP(INSN) \ + void INSN(Register Rd, Register Rn, unsigned imm) { \ + wrap_adds_subs_imm_insn(Rd, Rn, imm, &Assembler::INSN, &Assembler::INSN); \ + } \ + \ + void INSN(Register Rd, Register Rn, Register Rm, \ + enum shift_kind kind, unsigned shift = 0) { \ + Assembler::INSN(Rd, Rn, Rm, kind, shift); \ + } \ + \ + void INSN(Register Rd, Register Rn, Register Rm) { \ + Assembler::INSN(Rd, Rn, Rm); \ + } \ + \ + void INSN(Register Rd, Register Rn, Register Rm, \ + ext::operation option, int amount = 0) { \ + Assembler::INSN(Rd, Rn, Rm, option, amount); \ + } + + WRAP(adds) WRAP(addsw) WRAP(subs) WRAP(subsw) + + void add(Register Rd, Register Rn, RegisterOrConstant increment); + void addw(Register Rd, Register Rn, RegisterOrConstant increment); + void sub(Register Rd, Register Rn, RegisterOrConstant decrement); + void subw(Register Rd, Register Rn, RegisterOrConstant decrement); + + void adrp(Register reg1, const Address &dest, unsigned long &byte_offset); + + void tableswitch(Register index, jint lowbound, jint highbound, + Label &jumptable, Label &jumptable_end, int stride = 1) { + adr(rscratch1, jumptable); + subsw(rscratch2, index, lowbound); + subsw(zr, rscratch2, highbound - lowbound); + br(Assembler::HS, jumptable_end); + add(rscratch1, rscratch1, rscratch2, + ext::sxtw, exact_log2(stride * Assembler::instruction_size)); + br(rscratch1); + } + + // Form an address from base + offset in Rd. Rd may or may not + // actually be used: you must use the Address that is returned. It + // is up to you to ensure that the shift provided matches the size + // of your data. + Address form_address(Register Rd, Register base, long byte_offset, int shift); + + // Return true iff an address is within the 48-bit AArch64 address + // space. + bool is_valid_AArch64_address(address a) { + return ((uint64_t)a >> 48) == 0; + } + + // Load the base of the cardtable byte map into reg. + void load_byte_map_base(Register reg); + + // Prolog generator routines to support switch between x86 code and + // generated ARM code + + // routine to generate an x86 prolog for a stub function which + // bootstraps into the generated ARM code which directly follows the + // stub + // + + public: + + void ldr_constant(Register dest, const Address &const_addr) { + if (NearCpool) { + ldr(dest, const_addr); + } else { + unsigned long offset; + adrp(dest, InternalAddress(const_addr.target()), offset); + ldr(dest, Address(dest, offset)); + } + } + + address read_polling_page(Register r, address page, relocInfo::relocType rtype); + address read_polling_page(Register r, relocInfo::relocType rtype); + + // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. + void update_byte_crc32(Register crc, Register val, Register table); + void update_word_crc32(Register crc, Register v, Register tmp, + Register table0, Register table1, Register table2, Register table3, + bool upper = false); + + void string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + Register tmp1); + void string_equals(Register str1, Register str2, + Register cnt, Register result, + Register tmp1); + void char_arrays_equals(Register ary1, Register ary2, + Register result, Register tmp1); + void fill_words(Register base, Register cnt, Register value); + void zero_words(Register base, u_int64_t cnt); + void zero_words(Register base, Register cnt); + void block_zero(Register base, Register cnt, bool is_large = false); + + void encode_iso_array(Register src, Register dst, + Register len, Register result, + FloatRegister Vtmp1, FloatRegister Vtmp2, + FloatRegister Vtmp3, FloatRegister Vtmp4); + void string_indexof(Register str1, Register str2, + Register cnt1, Register cnt2, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + int int_cnt1, Register result); +private: + void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2); + void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) { + add2_with_carry(dest_hi, dest_hi, dest_lo, src1, src2); + } + void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx); + void multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp7, Register product_hi); +public: + void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, + Register zlen, Register tmp1, Register tmp2, Register tmp3, + Register tmp4, Register tmp5, Register tmp6, Register tmp7); + // ISB may be needed because of a safepoint + void maybe_isb() { isb(); } + +private: + // Return the effective address r + (r1 << ext) + offset. + // Uses rscratch2. + Address offsetted_address(Register r, Register r1, Address::extend ext, + int offset, int size); + +private: + // Returns an address on the stack which is reachable with a ldr/str of size + // Uses rscratch2 if the address is not directly reachable + Address spill_address(int size, int offset, Register tmp=rscratch2); + +public: + void spill(Register Rx, bool is64, int offset) { + if (is64) { + str(Rx, spill_address(8, offset)); + } else { + strw(Rx, spill_address(4, offset)); + } + } + void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) { + str(Vx, T, spill_address(1 << (int)T, offset)); + } + void unspill(Register Rx, bool is64, int offset) { + if (is64) { + ldr(Rx, spill_address(8, offset)); + } else { + ldrw(Rx, spill_address(4, offset)); + } + } + void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) { + ldr(Vx, T, spill_address(1 << (int)T, offset)); + } + void spill_copy128(int src_offset, int dst_offset, + Register tmp1=rscratch1, Register tmp2=rscratch2) { + if (src_offset < 512 && (src_offset & 7) == 0 && + dst_offset < 512 && (dst_offset & 7) == 0) { + ldp(tmp1, tmp2, Address(sp, src_offset)); + stp(tmp1, tmp2, Address(sp, dst_offset)); + } else { + unspill(tmp1, true, src_offset); + spill(tmp1, true, dst_offset); + unspill(tmp1, true, src_offset+8); + spill(tmp1, true, dst_offset+8); + } + } +}; + +#ifdef ASSERT +inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } +#endif + +/** + * class SkipIfEqual: + * + * Instantiating this class will result in assembly code being output that will + * jump around any code emitted between the creation of the instance and it's + * automatic destruction at the end of a scope block, depending on the value of + * the flag passed to the constructor, which will be checked at run-time. + */ +class SkipIfEqual { + private: + MacroAssembler* _masm; + Label _label; + + public: + SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); + ~SkipIfEqual(); +}; + +struct tableswitch { + Register _reg; + int _insn_index; jint _first_key; jint _last_key; + Label _after; + Label _branches; +}; + +#endif // CPU_AARCH64_VM_MACROASSEMBLER_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/macroAssembler_aarch64.inline.hpp 2021-01-25 19:31:50.417620267 +0000 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_MACROASSEMBLER_AARCH64_INLINE_HPP +#define CPU_AARCH64_VM_MACROASSEMBLER_AARCH64_INLINE_HPP + +#include "asm/assembler.hpp" + +#ifndef PRODUCT + +#endif // ndef PRODUCT + +#endif // CPU_AARCH64_VM_MACROASSEMBLER_AARCH64_INLINE_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/metaspaceShared_aarch64.cpp 2021-01-25 19:31:50.874625070 +0000 @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2004, 2012, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "memory/metaspaceShared.hpp" + +// Generate the self-patching vtable method: +// +// This method will be called (as any other Klass virtual method) with +// the Klass itself as the first argument. Example: +// +// oop obj; +// int size = obj->klass()->oop_size(this); +// +// for which the virtual method call is Klass::oop_size(); +// +// The dummy method is called with the Klass object as the first +// operand, and an object as the second argument. +// + +//===================================================================== + +// All of the dummy methods in the vtable are essentially identical, +// differing only by an ordinal constant, and they bear no relationship +// to the original method which the caller intended. Also, there needs +// to be 'vtbl_list_size' instances of the vtable in order to +// differentiate between the 'vtable_list_size' original Klass objects. + +#define __ masm-> + +extern "C" { + void aarch64_prolog(void); +} + +void MetaspaceShared::generate_vtable_methods(void** vtbl_list, + void** vtable, + char** md_top, + char* md_end, + char** mc_top, + char* mc_end) { + + intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*); + *(intptr_t *)(*md_top) = vtable_bytes; + *md_top += sizeof(intptr_t); + void** dummy_vtable = (void**)*md_top; + *vtable = dummy_vtable; + *md_top += vtable_bytes; + + // Get ready to generate dummy methods. + + CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top); + MacroAssembler* masm = new MacroAssembler(&cb); + + Label common_code; + for (int i = 0; i < vtbl_list_size; ++i) { + for (int j = 0; j < num_virtuals; ++j) { + dummy_vtable[num_virtuals * i + j] = (void*)masm->pc(); + + // We're called directly from C code. + // Load rscratch1 with a value indicating vtable/offset pair. + // -- bits[ 7..0] (8 bits) which virtual method in table? + // -- bits[12..8] (5 bits) which virtual method table? + __ mov(rscratch1, (i << 8) + j); + __ b(common_code); + } + } + + __ bind(common_code); + + Register tmp0 = r10, tmp1 = r11; // AAPCS64 temporary registers + __ enter(); + __ lsr(tmp0, rscratch1, 8); // isolate vtable identifier. + __ mov(tmp1, (address)vtbl_list); // address of list of vtable pointers. + __ ldr(tmp1, Address(tmp1, tmp0, Address::lsl(LogBytesPerWord))); // get correct vtable pointer. + __ str(tmp1, Address(c_rarg0)); // update vtable pointer in obj. + __ add(rscratch1, tmp1, rscratch1, ext::uxtb, LogBytesPerWord); // address of real method pointer. + __ ldr(rscratch1, Address(rscratch1)); // get real method pointer. + __ blr(rscratch1); // jump to the real method. + __ leave(); + __ ret(lr); + + *mc_top = (char*)__ pc(); +} + --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/methodHandles_aarch64.cpp 2021-01-25 19:31:51.314629695 +0000 @@ -0,0 +1,444 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "prims/methodHandles.hpp" + +#define __ _masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { + if (VerifyMethodHandles) + verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), + "MH argument is a Class"); + __ ldr(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); +} + +#ifdef ASSERT +static int check_nonzero(const char* xname, int x) { + assert(x != 0, err_msg("%s should be nonzero", xname)); + return x; +} +#define NONZERO(x) check_nonzero(#x, x) +#else //ASSERT +#define NONZERO(x) (x) +#endif //PRODUCT + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message) { + Klass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); + KlassHandle klass = SystemDictionary::well_known_klass(klass_id); + Register temp = rscratch2; + Register temp2 = rscratch1; // used by MacroAssembler::cmpptr + Label L_ok, L_bad; + BLOCK_COMMENT("verify_klass {"); + __ verify_oop(obj); + __ cbz(obj, L_bad); + __ push(RegSet::of(temp, temp2), sp); + __ load_klass(temp, obj); + __ cmpptr(temp, ExternalAddress((address) klass_addr)); + __ br(Assembler::EQ, L_ok); + intptr_t super_check_offset = klass->super_check_offset(); + __ ldr(temp, Address(temp, super_check_offset)); + __ cmpptr(temp, ExternalAddress((address) klass_addr)); + __ br(Assembler::EQ, L_ok); + __ pop(RegSet::of(temp, temp2), sp); + __ bind(L_bad); + __ stop(error_message); + __ BIND(L_ok); + __ pop(RegSet::of(temp, temp2), sp); + BLOCK_COMMENT("} verify_klass"); +} + +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { } + +#endif //ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry) { + assert(method == rmethod, "interpreter calling convention"); + Label L_no_such_method; + __ cbz(rmethod, L_no_such_method); + __ verify_method_ptr(method); + + if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + + __ ldrw(rscratch1, Address(rthread, JavaThread::interp_only_mode_offset())); + __ cbzw(rscratch1, run_compiled_code); + __ ldr(rscratch1, Address(method, Method::interpreter_entry_offset())); + __ br(rscratch1); + __ BIND(run_compiled_code); + } + + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + Method::from_interpreted_offset(); + __ ldr(rscratch1,Address(method, entry_offset)); + __ br(rscratch1); + __ bind(L_no_such_method); + __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry())); +} + +void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry) { + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. + assert_different_registers(recv, method_temp, temp2); + assert(recv != noreg, "required register"); + assert(method_temp == rmethod, "required register for loading method"); + + //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); + __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes()))); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes()))); + __ verify_oop(method_temp); + // the following assumes that a Method* is normally compressed in the vmtarget field: + __ ldr(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()))); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack + __ ldr(temp2, Address(method_temp, Method::const_offset())); + __ load_sized_value(temp2, + Address(temp2, ConstMethod::size_of_parameters_offset()), + sizeof(u2), /*is_signed*/ false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + Label L; + __ ldr(rscratch1, __ argument_address(temp2, -1)); + __ cmp(recv, rscratch1); + __ br(Assembler::EQ, L); + __ ldr(r0, __ argument_address(temp2, -1)); + __ hlt(0); + __ BIND(L); + } + + jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); + BLOCK_COMMENT("} jump_to_lambda_form"); +} + +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || + iid == vmIntrinsics::_compiledLambdaForm) { + // Perhaps surprisingly, the symbolic references visible to Java are not directly used. + // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. + // They all allow an appendix argument. + __ hlt(0); // empty stubs make SG sick + return NULL; + } + + // r13: sender SP (must preserve; see prepare_to_jump_from_interpreted) + // rmethod: Method* + // r3: argument locator (parameter slot count, added to rsp) + // r1: used as temp to hold mh or receiver + // r0, r11: garbage temps, blown away + Register argp = r3; // argument list ptr, live on error paths + Register temp = r0; + Register mh = r1; // MH receiver; dies quickly and is recycled + + // here's where control starts out: + __ align(CodeEntryAlignment); + address entry_point = __ pc(); + + if (VerifyMethodHandles) { + Label L; + BLOCK_COMMENT("verify_intrinsic_id {"); + __ ldrb(rscratch1, Address(rmethod, Method::intrinsic_id_offset_in_bytes())); + __ cmp(rscratch1, (int) iid); + __ br(Assembler::EQ, L); + if (iid == vmIntrinsics::_linkToVirtual || + iid == vmIntrinsics::_linkToSpecial) { + // could do this for all kinds, but would explode assembly code size + trace_method_handle(_masm, "bad Method*::intrinsic_id"); + } + __ hlt(0); + __ bind(L); + BLOCK_COMMENT("} verify_intrinsic_id"); + } + + // First task: Find out how big the argument list is. + Address r3_first_arg_addr; + int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); + assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); + if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + __ ldr(argp, Address(rmethod, Method::const_offset())); + __ load_sized_value(argp, + Address(argp, ConstMethod::size_of_parameters_offset()), + sizeof(u2), /*is_signed*/ false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + r3_first_arg_addr = __ argument_address(argp, -1); + } else { + DEBUG_ONLY(argp = noreg); + } + + if (!is_signature_polymorphic_static(iid)) { + __ ldr(mh, r3_first_arg_addr); + DEBUG_ONLY(argp = noreg); + } + + // r3_first_arg_addr is live! + + trace_method_handle_interpreter_entry(_masm, iid); + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry); + + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register recv = noreg; + if (MethodHandles::ref_kind_has_receiver(ref_kind)) { + // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. + __ ldr(recv = r2, r3_first_arg_addr); + } + DEBUG_ONLY(argp = noreg); + Register rmember = rmethod; // MemberName ptr; incoming method ptr is dead now + __ pop(rmember); // extract last argument + generate_method_handle_dispatch(_masm, iid, recv, rmember, not_for_compiler_entry); + } + + return entry_point; +} + + +void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + vmIntrinsics::ID iid, + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { + assert(is_signature_polymorphic(iid), "expected invoke iid"); + // temps used in this code are not used in *either* compiled or interpreted calling sequences + Register temp1 = r10; + Register temp2 = r11; + Register temp3 = r14; // r13 is live by this point: it contains the sender SP + if (for_compiler_entry) { + assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); + assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); + assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); + assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); + } + + assert_different_registers(temp1, temp2, temp3, receiver_reg); + assert_different_registers(temp1, temp2, temp3, member_reg); + + if (iid == vmIntrinsics::_invokeBasic) { + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, rmethod, temp1, for_compiler_entry); + + } else { + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) + verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), + "MemberName required for invokeVirtual etc."); + } + + Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); + Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); + Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { + __ verify_oop(receiver_reg); + if (iid == vmIntrinsics::_linkToSpecial) { + // Don't actually load the klass; just null-check the receiver. + __ null_check(receiver_reg); + } else { + // load receiver klass itself + __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + BLOCK_COMMENT("check_receiver {"); + // The receiver for the MemberName must be in receiver_reg. + // Check the receiver against the MemberName.clazz + if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { + // Did not load it above... + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { + Label L_ok; + Register temp2_defc = temp2; + __ load_heap_oop(temp2_defc, member_clazz); + load_klass_from_Class(_masm, temp2_defc); + __ verify_klass_ptr(temp2_defc); + __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); + // If we get here, the type check failed! + __ hlt(0); + // __ STOP("receiver class disagrees with MemberName.clazz"); + __ bind(L_ok); + } + BLOCK_COMMENT("} check_receiver"); + } + if (iid == vmIntrinsics::_linkToSpecial || + iid == vmIntrinsics::_linkToStatic) { + DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass + } + + // Live registers at this point: + // member_reg - MemberName that was the trailing argument + // temp1_recv_klass - klass of stacked receiver, if needed + // r13 - interpreter linkage (if interpreted) ??? FIXME + // r1 ... r0 - compiler arguments (if compiled) + + Label L_incompatible_class_change_error; + switch (iid) { + case vmIntrinsics::_linkToSpecial: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); + } + __ ldr(rmethod, member_vmtarget); + break; + + case vmIntrinsics::_linkToStatic: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); + } + __ ldr(rmethod, member_vmtarget); + break; + + case vmIntrinsics::_linkToVirtual: + { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); + } + + // pick out the vtable index from the MemberName, and then we can discard it: + Register temp2_index = temp2; + __ ldr(temp2_index, member_vmindex); + + if (VerifyMethodHandles) { + Label L_index_ok; + __ cmpw(temp2_index, 0U); + __ br(Assembler::GE, L_index_ok); + __ hlt(0); + __ BIND(L_index_ok); + } + + // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget + // at this point. And VerifyMethodHandles has already checked clazz, if needed. + + // get target Method* & entry point + __ lookup_virtual_method(temp1_recv_klass, temp2_index, rmethod); + break; + } + + case vmIntrinsics::_linkToInterface: + { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); + } + + Register temp3_intf = temp3; + __ load_heap_oop(temp3_intf, member_clazz); + load_klass_from_Class(_masm, temp3_intf); + __ verify_klass_ptr(temp3_intf); + + Register rindex = rmethod; + __ ldr(rindex, member_vmindex); + if (VerifyMethodHandles) { + Label L; + __ cmpw(rindex, 0U); + __ br(Assembler::GE, L); + __ hlt(0); + __ bind(L); + } + + // given intf, index, and recv klass, dispatch to the implementation method + __ lookup_interface_method(temp1_recv_klass, temp3_intf, + // note: next two args must be the same: + rindex, rmethod, + temp2, + L_incompatible_class_change_error); + break; + } + + default: + fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); + break; + } + + // live at this point: rmethod, r13 (if interpreted) + + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that r2_recv be shifted out. + __ verify_method_ptr(rmethod); + jump_from_method_handle(_masm, rmethod, temp1, for_compiler_entry); + if (iid == vmIntrinsics::_linkToInterface) { + __ bind(L_incompatible_class_change_error); + __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); + } + } +} + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, + oop mh, + intptr_t* saved_regs, + intptr_t* entry_sp) { } + +// The stub wraps the arguments in a struct on the stack to avoid +// dealing with the different calling conventions for passing 6 +// arguments. +struct MethodHandleStubArguments { + const char* adaptername; + oopDesc* mh; + intptr_t* saved_regs; + intptr_t* entry_sp; +}; +void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { } + +void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { } +#endif //PRODUCT --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/methodHandles_aarch64.hpp 2021-01-25 19:31:51.747634246 +0000 @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Platform-specific definitions for method handles. +// These definitions are inlined into class MethodHandles. + +// Adapters +enum /* platform_dependent_constants */ { + adapter_code_size = 32000 DEBUG_ONLY(+ 120000) +}; + +public: + + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + + static void verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { + verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), + "reference is a MH"); + } + + static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + + // Similar to InterpreterMacroAssembler::jump_from_interpreted. + // Takes care of special dispatch from single stepping too. + static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry); + + static void jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry); + + static Register saved_last_sp_register() { + // Should be in sharedRuntime, not here. + return noreg; + } --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/nativeInst_aarch64.cpp 2021-01-25 19:31:52.209639102 +0000 @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_aarch64.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif + +void NativeCall::verify() { ; } + +address NativeCall::destination() const { + address addr = (address)this; + address destination = instruction_address() + displacement(); + + // Do we use a trampoline stub for this call? + CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. + assert(cb && cb->is_nmethod(), "sanity"); + nmethod *nm = (nmethod *)cb; + if (nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) { + // Yes we do, so get the destination from the trampoline stub. + const address trampoline_stub_addr = destination; + destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); + } + + return destination; +} + +// Similar to replace_mt_safe, but just changes the destination. The +// important thing is that free-running threads are able to execute this +// call instruction at all times. +// +// Used in the runtime linkage of calls; see class CompiledIC. +// +// Add parameter assert_lock to switch off assertion +// during code generation, where no patching lock is needed. +void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(!assert_lock || + (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), + "concurrent code patching"); + + ResourceMark rm; + int code_size = NativeInstruction::instruction_size; + address addr_call = addr_at(0); + assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); + + // Patch the constant in the call's trampoline stub. + address trampoline_stub_addr = get_trampoline(); + if (trampoline_stub_addr != NULL) { + assert (! is_NativeCallTrampolineStub_at(dest), "chained trampolines"); + nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); + } + + // Patch the call. + if (Assembler::reachable_from_branch_at(addr_call, dest)) { + set_destination(dest); + } else { + assert (trampoline_stub_addr != NULL, "we need a trampoline"); + set_destination(trampoline_stub_addr); + } + + ICache::invalidate_range(addr_call, instruction_size); +} + +address NativeCall::get_trampoline() { + address call_addr = addr_at(0); + + CodeBlob *code = CodeCache::find_blob(call_addr); + assert(code != NULL, "Could not find the containing code blob"); + + address bl_destination + = MacroAssembler::pd_call_destination(call_addr); + if (code->content_contains(bl_destination) && + is_NativeCallTrampolineStub_at(bl_destination)) + return bl_destination; + + // If the codeBlob is not a nmethod, this is because we get here from the + // CodeBlob constructor, which is called within the nmethod constructor. + return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); +} + +// Inserts a native call instruction at a given pc +void NativeCall::insert(address code_pos, address entry) { Unimplemented(); } + +//------------------------------------------------------------------- + +void NativeMovConstReg::verify() { + // make sure code pattern is actually mov reg64, imm64 instructions +} + + +intptr_t NativeMovConstReg::data() const { + // das(uint64_t(instruction_address()),2); + address addr = MacroAssembler::target_addr_for_insn(instruction_address()); + if (maybe_cpool_ref(instruction_address())) { + return *(intptr_t*)addr; + } else { + return (intptr_t)addr; + } +} + +void NativeMovConstReg::set_data(intptr_t x) { + if (maybe_cpool_ref(instruction_address())) { + address addr = MacroAssembler::target_addr_for_insn(instruction_address()); + *(intptr_t*)addr = x; + } else { + MacroAssembler::pd_patch_instruction(instruction_address(), (address)x); + ICache::invalidate_range(instruction_address(), instruction_size); + } +}; + +void NativeMovConstReg::print() { + tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, + p2i(instruction_address()), data()); +} + +//------------------------------------------------------------------- + +address NativeMovRegMem::instruction_address() const { return addr_at(instruction_offset); } + +int NativeMovRegMem::offset() const { + address pc = instruction_address(); + unsigned insn = *(unsigned*)pc; + if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) { + address addr = MacroAssembler::target_addr_for_insn(pc); + return *addr; + } else { + return (int)(intptr_t)MacroAssembler::target_addr_for_insn(instruction_address()); + } +} + +void NativeMovRegMem::set_offset(int x) { + address pc = instruction_address(); + unsigned insn = *(unsigned*)pc; + if (maybe_cpool_ref(pc)) { + address addr = MacroAssembler::target_addr_for_insn(pc); + *(long*)addr = x; + } else { + MacroAssembler::pd_patch_instruction(pc, (address)intptr_t(x)); + ICache::invalidate_range(instruction_address(), instruction_size); + } +} + +void NativeMovRegMem::verify() { +#ifdef ASSERT + address dest = MacroAssembler::target_addr_for_insn(instruction_address()); +#endif +} + +//-------------------------------------------------------------------------------- + +void NativeJump::verify() { ; } + + +void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { +} + + +address NativeJump::jump_destination() const { + address dest = MacroAssembler::target_addr_for_insn(instruction_address()); + + // We use jump to self as the unresolved address which the inline + // cache code (and relocs) know about + + // return -1 if jump to self + dest = (dest == (address) this) ? (address) -1 : dest; + return dest; +} + +void NativeJump::set_jump_destination(address dest) { + // We use jump to self as the unresolved address which the inline + // cache code (and relocs) know about + if (dest == (address) -1) + dest = instruction_address(); + + MacroAssembler::pd_patch_instruction(instruction_address(), dest); + ICache::invalidate_range(instruction_address(), instruction_size); +}; + +//------------------------------------------------------------------- + +bool NativeInstruction::is_safepoint_poll() { + // a safepoint_poll is implemented in two steps as either + // + // adrp(reg, polling_page); + // ldr(zr, [reg, #offset]); + // + // or + // + // mov(reg, polling_page); + // ldr(zr, [reg, #offset]); + // + // however, we cannot rely on the polling page address load always + // directly preceding the read from the page. C1 does that but C2 + // has to do the load and read as two independent instruction + // generation steps. that's because with a single macro sequence the + // generic C2 code can only add the oop map before the mov/adrp and + // the trap handler expects an oop map to be associated with the + // load. with the load scheuled as a prior step the oop map goes + // where it is needed. + // + // so all we can do here is check that marked instruction is a load + // word to zr + return is_ldrw_to_zr(address(this)); +} + +bool NativeInstruction::is_adrp_at(address instr) { + unsigned insn = *(unsigned*)instr; + return (Instruction_aarch64::extract(insn, 31, 24) & 0b10011111) == 0b10010000; +} + +bool NativeInstruction::is_ldr_literal_at(address instr) { + unsigned insn = *(unsigned*)instr; + return (Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000; +} + +bool NativeInstruction::is_ldrw_to_zr(address instr) { + unsigned insn = *(unsigned*)instr; + return (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 && + Instruction_aarch64::extract(insn, 4, 0) == 0b11111); +} + +bool NativeInstruction::is_movz() { + return Instruction_aarch64::extract(int_at(0), 30, 23) == 0b10100101; +} + +bool NativeInstruction::is_movk() { + return Instruction_aarch64::extract(int_at(0), 30, 23) == 0b11100101; +} + +bool NativeInstruction::is_sigill_zombie_not_entrant() { + return uint_at(0) == 0xd4bbd5a1; // dcps1 #0xdead +} + +void NativeIllegalInstruction::insert(address code_pos) { + *(juint*)code_pos = 0xd4bbd5a1; // dcps1 #0xdead +} + +//------------------------------------------------------------------- + +// MT safe inserting of a jump over a jump or a nop (used by +// nmethod::makeZombie) + +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + + assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); + assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() + || nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(), + "Aarch64 cannot replace non-jump with jump"); + + // Patch this nmethod atomically. + if (Assembler::reachable_from_branch_at(verified_entry, dest)) { + ptrdiff_t disp = dest - verified_entry; + guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow"); + + unsigned int insn = (0b000101 << 26) | ((disp >> 2) & 0x3ffffff); + *(unsigned int*)verified_entry = insn; + } else { + // We use an illegal instruction for marking a method as + // not_entrant or zombie. + NativeIllegalInstruction::insert(verified_entry); + } + + ICache::invalidate_range(verified_entry, instruction_size); +} + +void NativeGeneralJump::verify() { } + +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + NativeGeneralJump* n_jump = (NativeGeneralJump*)code_pos; + + CodeBuffer cb(code_pos, instruction_size); + MacroAssembler a(&cb); + + a.movptr(rscratch1, (uintptr_t)entry); + a.br(rscratch1); + + ICache::invalidate_range(code_pos, instruction_size); +} + +// MT-safe patching of a long jump instruction. +void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { + ShouldNotCallThis(); +} + +bool NativeInstruction::is_dtrace_trap() { return false; } + +address NativeCallTrampolineStub::destination(nmethod *nm) const { + return ptr_at(data_offset); +} + +void NativeCallTrampolineStub::set_destination(address new_destination) { + set_ptr_at(data_offset, new_destination); + OrderAccess::release(); +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/nativeInst_aarch64.hpp 2021-01-25 19:31:52.682644073 +0000 @@ -0,0 +1,491 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP +#define CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP + +#include "asm/assembler.hpp" +#include "memory/allocation.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" +#include "utilities/top.hpp" + +// We have interfaces for the following instructions: +// - NativeInstruction +// - - NativeCall +// - - NativeMovConstReg +// - - NativeMovConstRegPatching +// - - NativeMovRegMem +// - - NativeMovRegMemPatching +// - - NativeJump +// - - NativeIllegalOpCode +// - - NativeGeneralJump +// - - NativeReturn +// - - NativeReturnX (return with argument) +// - - NativePushConst +// - - NativeTstRegMem + +// The base class for different kinds of native instruction abstractions. +// Provides the primitive operations to manipulate code relative to this. + +class NativeInstruction VALUE_OBJ_CLASS_SPEC { + friend class Relocation; + friend bool is_NativeCallTrampolineStub_at(address); + public: + enum { instruction_size = 4 }; + inline bool is_nop(); + bool is_dtrace_trap(); + inline bool is_illegal(); + inline bool is_return(); + bool is_jump(); + inline bool is_jump_or_nop(); + inline bool is_cond_jump(); + bool is_safepoint_poll(); + inline bool is_mov_literal64(); + bool is_movz(); + bool is_movk(); + bool is_sigill_zombie_not_entrant(); + + protected: + address addr_at(int offset) const { return address(this) + offset; } + + s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } + u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } + + jint int_at(int offset) const { return *(jint*) addr_at(offset); } + juint uint_at(int offset) const { return *(juint*) addr_at(offset); } + + address ptr_at(int offset) const { return *(address*) addr_at(offset); } + + oop oop_at (int offset) const { return *(oop*) addr_at(offset); } + + + void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; } + void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; } + void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; } + void set_ptr_at (int offset, address ptr) { *(address*) addr_at(offset) = ptr; } + void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; } + + public: + + // unit test stuff + static void test() {} // override for testing + + inline friend NativeInstruction* nativeInstruction_at(address address); + + static bool is_adrp_at(address instr); + static bool is_ldr_literal_at(address instr); + static bool is_ldrw_to_zr(address instr); + + static bool maybe_cpool_ref(address instr) { + return is_adrp_at(instr) || is_ldr_literal_at(instr); + } +}; + +inline NativeInstruction* nativeInstruction_at(address address) { + return (NativeInstruction*)address; +} + +// The natural type of an AArch64 instruction is uint32_t +inline NativeInstruction* nativeInstruction_at(uint32_t *address) { + return (NativeInstruction*)address; +} + +inline NativeCall* nativeCall_at(address address); +// The NativeCall is an abstraction for accessing/manipulating native call imm32/rel32off +// instructions (used to manipulate inline caches, primitive & dll calls, etc.). + +class NativeCall: public NativeInstruction { + public: + enum Aarch64_specific_constants { + instruction_size = 4, + instruction_offset = 0, + displacement_offset = 0, + return_address_offset = 4 + }; + + enum { cache_line_size = BytesPerWord }; // conservative estimate! + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { return addr_at(return_address_offset); } + int displacement() const { return (int_at(displacement_offset) << 6) >> 4; } + address displacement_address() const { return addr_at(displacement_offset); } + address return_address() const { return addr_at(return_address_offset); } + address destination() const; + void set_destination(address dest) { + int offset = dest - instruction_address(); + unsigned int insn = 0b100101 << 26; + assert((offset & 3) == 0, "should be"); + offset >>= 2; + offset &= (1 << 26) - 1; // mask off insn part + insn |= offset; + set_int_at(displacement_offset, insn); + } + + void verify_alignment() { ; } + void verify(); + void print(); + + // Creation + inline friend NativeCall* nativeCall_at(address address); + inline friend NativeCall* nativeCall_before(address return_address); + + static bool is_call_at(address instr) { + const uint32_t insn = (*(uint32_t*)instr); + return (insn >> 26) == 0b100101; + } + + static bool is_call_before(address return_address) { + return is_call_at(return_address - NativeCall::return_address_offset); + } + + // MT-safe patching of a call instruction. + static void insert(address code_pos, address entry); + + static void replace_mt_safe(address instr_addr, address code_buffer); + + // Similar to replace_mt_safe, but just changes the destination. The + // important thing is that free-running threads are able to execute + // this call instruction at all times. If the call is an immediate BL + // instruction we can simply rely on atomicity of 32-bit writes to + // make sure other threads will see no intermediate states. + + // We cannot rely on locks here, since the free-running threads must run at + // full speed. + // + // Used in the runtime linkage of calls; see class CompiledIC. + // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) + + // The parameter assert_lock disables the assertion during code generation. + void set_destination_mt_safe(address dest, bool assert_lock = true); + + address get_trampoline(); +}; + +inline NativeCall* nativeCall_at(address address) { + NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +inline NativeCall* nativeCall_before(address return_address) { + NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +// An interface for accessing/manipulating native mov reg, imm instructions. +// (used to manipulate inlined 64-bit data calls, etc.) +class NativeMovConstReg: public NativeInstruction { + public: + enum Aarch64_specific_constants { + instruction_size = 3 * 4, // movz, movk, movk. See movptr(). + instruction_offset = 0, + displacement_offset = 0, + }; + + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { + if (nativeInstruction_at(instruction_address())->is_movz()) + // Assume movz, movk, movk + return addr_at(instruction_size); + else if (is_adrp_at(instruction_address())) + return addr_at(2*4); + else if (is_ldr_literal_at(instruction_address())) + return(addr_at(4)); + assert(false, "Unknown instruction in NativeMovConstReg"); + return NULL; + } + + intptr_t data() const; + void set_data(intptr_t x); + + void flush() { + if (! maybe_cpool_ref(instruction_address())) { + ICache::invalidate_range(instruction_address(), instruction_size); + } + } + + void verify(); + void print(); + + // unit test stuff + static void test() {} + + // Creation + inline friend NativeMovConstReg* nativeMovConstReg_at(address address); + inline friend NativeMovConstReg* nativeMovConstReg_before(address address); +}; + +inline NativeMovConstReg* nativeMovConstReg_at(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +inline NativeMovConstReg* nativeMovConstReg_before(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +class NativeMovConstRegPatching: public NativeMovConstReg { + private: + friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { + NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + +// An interface for accessing/manipulating native moves of the form: +// mov[b/w/l/q] [reg + offset], reg (instruction_code_reg2mem) +// mov[b/w/l/q] reg, [reg+offset] (instruction_code_mem2reg +// mov[s/z]x[w/b/q] [reg + offset], reg +// fld_s [reg+offset] +// fld_d [reg+offset] +// fstp_s [reg + offset] +// fstp_d [reg + offset] +// mov_literal64 scratch, ; mov[b/w/l/q] 0(scratch),reg | mov[b/w/l/q] reg,0(scratch) +// +// Warning: These routines must be able to handle any instruction sequences +// that are generated as a result of the load/store byte,word,long +// macros. For example: The load_unsigned_byte instruction generates +// an xor reg,reg inst prior to generating the movb instruction. This +// class must skip the xor instruction. + +class NativeMovRegMem: public NativeInstruction { + enum AArch64_specific_constants { + instruction_size = 4, + instruction_offset = 0, + data_offset = 0, + next_instruction_offset = 4 + }; + + public: + // helper + int instruction_start() const; + + address instruction_address() const; + + address next_instruction_address() const; + + int offset() const; + + void set_offset(int x); + + void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } + + void verify(); + void print (); + + // unit test stuff + static void test() {} + + private: + inline friend NativeMovRegMem* nativeMovRegMem_at (address address); +}; + +inline NativeMovRegMem* nativeMovRegMem_at (address address) { + NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +class NativeMovRegMemPatching: public NativeMovRegMem { + private: + friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {Unimplemented(); return 0; } +}; + +// An interface for accessing/manipulating native leal instruction of form: +// leal reg, [reg + offset] + +class NativeLoadAddress: public NativeMovRegMem { + static const bool has_rex = true; + static const int rex_size = 1; + public: + + void verify(); + void print (); + + // unit test stuff + static void test() {} +}; + +class NativeJump: public NativeInstruction { + public: + enum AArch64_specific_constants { + instruction_size = 4, + instruction_offset = 0, + data_offset = 0, + next_instruction_offset = 4 + }; + + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { return addr_at(instruction_size); } + address jump_destination() const; + void set_jump_destination(address dest); + + // Creation + inline friend NativeJump* nativeJump_at(address address); + + void verify(); + + // Unit testing stuff + static void test() {} + + // Insertion of native jump instruction + static void insert(address code_pos, address entry); + // MT-safe insertion of native jump at verified method entry + static void check_verified_entry_alignment(address entry, address verified_entry); + static void patch_verified_entry(address entry, address verified_entry, address dest); +}; + +inline NativeJump* nativeJump_at(address address) { + NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); +#ifdef ASSERT + jump->verify(); +#endif + return jump; +} + +class NativeGeneralJump: public NativeJump { +public: + enum AArch64_specific_constants { + instruction_size = 4 * 4, + instruction_offset = 0, + data_offset = 0, + next_instruction_offset = 4 * 4 + }; + static void insert_unconditional(address code_pos, address entry); + static void replace_mt_safe(address instr_addr, address code_buffer); + static void verify(); +}; + +inline NativeGeneralJump* nativeGeneralJump_at(address address) { + NativeGeneralJump* jump = (NativeGeneralJump*)(address); + debug_only(jump->verify();) + return jump; +} + +class NativePopReg : public NativeInstruction { + public: + // Insert a pop instruction + static void insert(address code_pos, Register reg); +}; + + +class NativeIllegalInstruction: public NativeInstruction { + public: + // Insert illegal opcode as specific address + static void insert(address code_pos); +}; + +// return instruction that does not pop values of the stack +class NativeReturn: public NativeInstruction { + public: +}; + +// return instruction that does pop values of the stack +class NativeReturnX: public NativeInstruction { + public: +}; + +// Simple test vs memory +class NativeTstRegMem: public NativeInstruction { + public: +}; + +inline bool NativeInstruction::is_nop() { + uint32_t insn = *(uint32_t*)addr_at(0); + return insn == 0xd503201f; +} + +inline bool NativeInstruction::is_jump() { + uint32_t insn = *(uint32_t*)addr_at(0); + + if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) { + // Unconditional branch (immediate) + return true; + } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) { + // Conditional branch (immediate) + return true; + } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) { + // Compare & branch (immediate) + return true; + } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) { + // Test & branch (immediate) + return true; + } else + return false; +} + +inline bool NativeInstruction::is_jump_or_nop() { + return is_nop() || is_jump(); +} + +// Call trampoline stubs. +class NativeCallTrampolineStub : public NativeInstruction { + public: + + enum AArch64_specific_constants { + instruction_size = 4 * 4, + instruction_offset = 0, + data_offset = 2 * 4, + next_instruction_offset = 4 * 4 + }; + + address destination(nmethod *nm = NULL) const; + void set_destination(address new_destination); + ptrdiff_t destination_offset() const; +}; + +inline bool is_NativeCallTrampolineStub_at(address addr) { + // Ensure that the stub is exactly + // ldr xscratch1, L + // br xscratch1 + // L: + uint32_t *i = (uint32_t *)addr; + return i[0] == 0x58000048 && i[1] == 0xd61f0100; +} + +inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { + assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found"); + return (NativeCallTrampolineStub*)addr; +} + +#endif // CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/registerMap_aarch64.hpp 2021-01-25 19:31:53.136648845 +0000 @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_REGISTERMAP_AARCH64_HPP +#define CPU_AARCH64_VM_REGISTERMAP_AARCH64_HPP + +// machine-dependent implemention for register maps + friend class frame; + + private: + // This is the hook for finding a register in an "well-known" location, + // such as a register block of a predetermined format. + // Since there is none, we just return NULL. + // See registerMap_sparc.hpp for an example of grabbing registers + // from register save areas of a standard layout. + address pd_location(VMReg reg) const {return NULL;} + + // no PD state to clear or copy: + void pd_clear() {} + void pd_initialize() {} + void pd_initialize_from(const RegisterMap* map) {} + +#endif // CPU_AARCH64_VM_REGISTERMAP_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/register_aarch64.cpp 2021-01-25 19:31:53.604653764 +0000 @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "register_aarch64.hpp" + +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; + +const int ConcreteRegisterImpl::max_fpr + = ConcreteRegisterImpl::max_gpr + (FloatRegisterImpl::number_of_registers << 1); + +const char* RegisterImpl::name() const { + const char* names[number_of_registers] = { + "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", + "rscratch1", "rscratch2", + "r10", "r11", "r12", "r13", "r14", "r15", "r16", + "r17", "r18", "r19", + "resp", "rdispatch", "rbcp", "r23", "rlocals", "rmonitors", "rcpool", "rheapbase", + "rthread", "rfp", "lr", "sp" + }; + return is_valid() ? names[encoding()] : "noreg"; +} + +const char* FloatRegisterImpl::name() const { + const char* names[number_of_registers] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" + }; + return is_valid() ? names[encoding()] : "noreg"; +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/register_aarch64.hpp 2021-01-25 19:31:54.065658609 +0000 @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_REGISTER_AARCH64_HPP +#define CPU_AARCH64_VM_REGISTER_AARCH64_HPP + +#include "asm/register.hpp" +#include "vm_version_aarch64.hpp" + +class VMRegImpl; +typedef VMRegImpl* VMReg; + +// Use Register as shortcut +class RegisterImpl; +typedef RegisterImpl* Register; + +inline Register as_Register(int encoding) { + return (Register)(intptr_t) encoding; +} + +class RegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32, + number_of_byte_registers = 32 + }; + + // derived registers, offsets, and addresses + Register successor() const { return as_Register(encoding() + 1); } + + // construction + inline friend Register as_Register(int encoding); + + VMReg as_VMReg(); + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + bool has_byte_register() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; } + const char* name() const; + int encoding_nocheck() const { return (intptr_t)this; } + + // Return the bit which represents this register. This is intended + // to be ORed into a bitmask: for usage see class RegSet below. + unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } +}; + +// The integer registers of the aarch64 architecture + +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + + +CONSTANT_REGISTER_DECLARATION(Register, r0, (0)); +CONSTANT_REGISTER_DECLARATION(Register, r1, (1)); +CONSTANT_REGISTER_DECLARATION(Register, r2, (2)); +CONSTANT_REGISTER_DECLARATION(Register, r3, (3)); +CONSTANT_REGISTER_DECLARATION(Register, r4, (4)); +CONSTANT_REGISTER_DECLARATION(Register, r5, (5)); +CONSTANT_REGISTER_DECLARATION(Register, r6, (6)); +CONSTANT_REGISTER_DECLARATION(Register, r7, (7)); +CONSTANT_REGISTER_DECLARATION(Register, r8, (8)); +CONSTANT_REGISTER_DECLARATION(Register, r9, (9)); +CONSTANT_REGISTER_DECLARATION(Register, r10, (10)); +CONSTANT_REGISTER_DECLARATION(Register, r11, (11)); +CONSTANT_REGISTER_DECLARATION(Register, r12, (12)); +CONSTANT_REGISTER_DECLARATION(Register, r13, (13)); +CONSTANT_REGISTER_DECLARATION(Register, r14, (14)); +CONSTANT_REGISTER_DECLARATION(Register, r15, (15)); +CONSTANT_REGISTER_DECLARATION(Register, r16, (16)); +CONSTANT_REGISTER_DECLARATION(Register, r17, (17)); +CONSTANT_REGISTER_DECLARATION(Register, r18, (18)); +CONSTANT_REGISTER_DECLARATION(Register, r19, (19)); +CONSTANT_REGISTER_DECLARATION(Register, r20, (20)); +CONSTANT_REGISTER_DECLARATION(Register, r21, (21)); +CONSTANT_REGISTER_DECLARATION(Register, r22, (22)); +CONSTANT_REGISTER_DECLARATION(Register, r23, (23)); +CONSTANT_REGISTER_DECLARATION(Register, r24, (24)); +CONSTANT_REGISTER_DECLARATION(Register, r25, (25)); +CONSTANT_REGISTER_DECLARATION(Register, r26, (26)); +CONSTANT_REGISTER_DECLARATION(Register, r27, (27)); +CONSTANT_REGISTER_DECLARATION(Register, r28, (28)); +CONSTANT_REGISTER_DECLARATION(Register, r29, (29)); +CONSTANT_REGISTER_DECLARATION(Register, r30, (30)); + +CONSTANT_REGISTER_DECLARATION(Register, r31_sp, (31)); +CONSTANT_REGISTER_DECLARATION(Register, zr, (32)); +CONSTANT_REGISTER_DECLARATION(Register, sp, (33)); + +// Used as a filler in instructions where a register field is unused. +const Register dummy_reg = r31_sp; + +// Use FloatRegister as shortcut +class FloatRegisterImpl; +typedef FloatRegisterImpl* FloatRegister; + +inline FloatRegister as_FloatRegister(int encoding) { + return (FloatRegister)(intptr_t) encoding; +} + +// The implementation of floating point registers for the architecture +class FloatRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32 + }; + + // construction + inline friend FloatRegister as_FloatRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; + +}; + +// The float registers of the AARCH64 architecture + +CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, v0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v7 , ( 7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v8 , ( 8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v9 , ( 9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v10 , (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v11 , (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v12 , (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v13 , (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v14 , (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v15 , (15)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v16 , (16)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v17 , (17)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v18 , (18)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v19 , (19)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v20 , (20)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v21 , (21)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v22 , (22)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v23 , (23)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v24 , (24)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v25 , (25)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v26 , (26)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v27 , (27)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v28 , (28)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v29 , (29)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v30 , (30)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, v31 , (31)); + +// Need to know the total number of registers of all sorts for SharedInfo. +// Define a class that exports it. +class ConcreteRegisterImpl : public AbstractRegisterImpl { + public: + enum { + // A big enough number for C2: all the registers plus flags + // This number must be large enough to cover REG_COUNT (defined by c2) registers. + // There is no requirement that any ordering here matches any ordering c2 gives + // it's optoregs. + + number_of_registers = (2 * RegisterImpl::number_of_registers + + 4 * FloatRegisterImpl::number_of_registers + + 1) // flags + }; + + // added to make it compile + static const int max_gpr; + static const int max_fpr; +}; + +// A set of registers +class RegSet { + uint32_t _bitset; + + RegSet(uint32_t bitset) : _bitset(bitset) { } + +public: + + RegSet() : _bitset(0) { } + + RegSet(Register r1) : _bitset(r1->bit()) { } + + RegSet operator+(const RegSet aSet) const { + RegSet result(_bitset | aSet._bitset); + return result; + } + + RegSet operator-(const RegSet aSet) const { + RegSet result(_bitset & ~aSet._bitset); + return result; + } + + RegSet &operator+=(const RegSet aSet) { + *this = *this + aSet; + return *this; + } + + static RegSet of(Register r1) { + return RegSet(r1); + } + + static RegSet of(Register r1, Register r2) { + return of(r1) + r2; + } + + static RegSet of(Register r1, Register r2, Register r3) { + return of(r1, r2) + r3; + } + + static RegSet of(Register r1, Register r2, Register r3, Register r4) { + return of(r1, r2, r3) + r4; + } + + static RegSet range(Register start, Register end) { + uint32_t bits = ~0; + bits <<= start->encoding(); + bits <<= 31 - end->encoding(); + bits >>= 31 - end->encoding(); + + return RegSet(bits); + } + + uint32_t bits() const { return _bitset; } +}; + +#endif // CPU_AARCH64_VM_REGISTER_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/register_definitions_aarch64.cpp 2021-01-25 19:31:54.522663413 +0000 @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/register.hpp" +#include "register_aarch64.hpp" +# include "interp_masm_aarch64.hpp" + +REGISTER_DEFINITION(Register, r0); +REGISTER_DEFINITION(Register, r1); +REGISTER_DEFINITION(Register, r2); +REGISTER_DEFINITION(Register, r3); +REGISTER_DEFINITION(Register, r4); +REGISTER_DEFINITION(Register, r5); +REGISTER_DEFINITION(Register, r6); +REGISTER_DEFINITION(Register, r7); +REGISTER_DEFINITION(Register, r8); +REGISTER_DEFINITION(Register, r9); +REGISTER_DEFINITION(Register, r10); +REGISTER_DEFINITION(Register, r11); +REGISTER_DEFINITION(Register, r12); +REGISTER_DEFINITION(Register, r13); +REGISTER_DEFINITION(Register, r14); +REGISTER_DEFINITION(Register, r15); +REGISTER_DEFINITION(Register, r16); +REGISTER_DEFINITION(Register, r17); +REGISTER_DEFINITION(Register, r18); +REGISTER_DEFINITION(Register, r19); +REGISTER_DEFINITION(Register, r20); +REGISTER_DEFINITION(Register, r21); +REGISTER_DEFINITION(Register, r22); +REGISTER_DEFINITION(Register, r23); +REGISTER_DEFINITION(Register, r24); +REGISTER_DEFINITION(Register, r25); +REGISTER_DEFINITION(Register, r26); +REGISTER_DEFINITION(Register, r27); +REGISTER_DEFINITION(Register, r28); +REGISTER_DEFINITION(Register, r29); +REGISTER_DEFINITION(Register, r30); +REGISTER_DEFINITION(Register, sp); + +REGISTER_DEFINITION(FloatRegister, v0); +REGISTER_DEFINITION(FloatRegister, v1); +REGISTER_DEFINITION(FloatRegister, v2); +REGISTER_DEFINITION(FloatRegister, v3); +REGISTER_DEFINITION(FloatRegister, v4); +REGISTER_DEFINITION(FloatRegister, v5); +REGISTER_DEFINITION(FloatRegister, v6); +REGISTER_DEFINITION(FloatRegister, v7); +REGISTER_DEFINITION(FloatRegister, v8); +REGISTER_DEFINITION(FloatRegister, v9); +REGISTER_DEFINITION(FloatRegister, v10); +REGISTER_DEFINITION(FloatRegister, v11); +REGISTER_DEFINITION(FloatRegister, v12); +REGISTER_DEFINITION(FloatRegister, v13); +REGISTER_DEFINITION(FloatRegister, v14); +REGISTER_DEFINITION(FloatRegister, v15); +REGISTER_DEFINITION(FloatRegister, v16); +REGISTER_DEFINITION(FloatRegister, v17); +REGISTER_DEFINITION(FloatRegister, v18); +REGISTER_DEFINITION(FloatRegister, v19); +REGISTER_DEFINITION(FloatRegister, v20); +REGISTER_DEFINITION(FloatRegister, v21); +REGISTER_DEFINITION(FloatRegister, v22); +REGISTER_DEFINITION(FloatRegister, v23); +REGISTER_DEFINITION(FloatRegister, v24); +REGISTER_DEFINITION(FloatRegister, v25); +REGISTER_DEFINITION(FloatRegister, v26); +REGISTER_DEFINITION(FloatRegister, v27); +REGISTER_DEFINITION(FloatRegister, v28); +REGISTER_DEFINITION(FloatRegister, v29); +REGISTER_DEFINITION(FloatRegister, v30); +REGISTER_DEFINITION(FloatRegister, v31); + +REGISTER_DEFINITION(Register, zr); + +REGISTER_DEFINITION(Register, c_rarg0); +REGISTER_DEFINITION(Register, c_rarg1); +REGISTER_DEFINITION(Register, c_rarg2); +REGISTER_DEFINITION(Register, c_rarg3); +REGISTER_DEFINITION(Register, c_rarg4); +REGISTER_DEFINITION(Register, c_rarg5); +REGISTER_DEFINITION(Register, c_rarg6); +REGISTER_DEFINITION(Register, c_rarg7); + +REGISTER_DEFINITION(FloatRegister, c_farg0); +REGISTER_DEFINITION(FloatRegister, c_farg1); +REGISTER_DEFINITION(FloatRegister, c_farg2); +REGISTER_DEFINITION(FloatRegister, c_farg3); +REGISTER_DEFINITION(FloatRegister, c_farg4); +REGISTER_DEFINITION(FloatRegister, c_farg5); +REGISTER_DEFINITION(FloatRegister, c_farg6); +REGISTER_DEFINITION(FloatRegister, c_farg7); + +REGISTER_DEFINITION(Register, j_rarg0); +REGISTER_DEFINITION(Register, j_rarg1); +REGISTER_DEFINITION(Register, j_rarg2); +REGISTER_DEFINITION(Register, j_rarg3); +REGISTER_DEFINITION(Register, j_rarg4); +REGISTER_DEFINITION(Register, j_rarg5); +REGISTER_DEFINITION(Register, j_rarg6); +REGISTER_DEFINITION(Register, j_rarg7); + +REGISTER_DEFINITION(FloatRegister, j_farg0); +REGISTER_DEFINITION(FloatRegister, j_farg1); +REGISTER_DEFINITION(FloatRegister, j_farg2); +REGISTER_DEFINITION(FloatRegister, j_farg3); +REGISTER_DEFINITION(FloatRegister, j_farg4); +REGISTER_DEFINITION(FloatRegister, j_farg5); +REGISTER_DEFINITION(FloatRegister, j_farg6); +REGISTER_DEFINITION(FloatRegister, j_farg7); + +REGISTER_DEFINITION(Register, rscratch1); +REGISTER_DEFINITION(Register, rscratch2); +REGISTER_DEFINITION(Register, esp); +REGISTER_DEFINITION(Register, rdispatch); +REGISTER_DEFINITION(Register, rcpool); +REGISTER_DEFINITION(Register, rmonitors); +REGISTER_DEFINITION(Register, rlocals); +REGISTER_DEFINITION(Register, rmethod); +REGISTER_DEFINITION(Register, rbcp); + +REGISTER_DEFINITION(Register, lr); +REGISTER_DEFINITION(Register, rfp); +REGISTER_DEFINITION(Register, rthread); +REGISTER_DEFINITION(Register, rheapbase); + +REGISTER_DEFINITION(Register, r31_sp); + +// TODO : x86 uses rbp to save SP in method handle code +// we may need to do the same with fp +// REGISTER_DEFINITION(Register, rbp_mh_SP_save) --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/relocInfo_aarch64.cpp 2021-01-25 19:31:55.034668794 +0000 @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1998, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/relocInfo.hpp" +#include "nativeInst_aarch64.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/safepoint.hpp" + + +void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + if (verify_only) + return; + + int bytes; + + switch(type()) { + case relocInfo::oop_type: + { + oop_Relocation *reloc = (oop_Relocation *)this; + if (NativeInstruction::is_ldr_literal_at(addr())) { + address constptr = (address)code()->oop_addr_at(reloc->oop_index()); + bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); + assert(*(address*)constptr == x, "error in oop relocation"); + } else{ + bytes = MacroAssembler::patch_oop(addr(), x); + } + } + break; + default: + bytes = MacroAssembler::pd_patch_instruction_size(addr(), x); + break; + } + ICache::invalidate_range(addr(), bytes); +} + +address Relocation::pd_call_destination(address orig_addr) { + assert(is_call(), "should be a call here"); + if (NativeCall::is_call_at(addr())) { + address trampoline = nativeCall_at(addr())->get_trampoline(); + if (trampoline) { + return nativeCallTrampolineStub_at(trampoline)->destination(); + } + } + if (orig_addr != NULL) { + address new_addr = MacroAssembler::pd_call_destination(orig_addr); + // If call is branch to self, don't try to relocate it, just leave it + // as branch to self. This happens during code generation if the code + // buffer expands. It will be relocated to the trampoline above once + // code generation is complete. + new_addr = (new_addr == orig_addr) ? addr() : new_addr; + return new_addr; + } + return MacroAssembler::pd_call_destination(addr()); +} + + +void Relocation::pd_set_call_destination(address x) { + assert(is_call(), "should be a call here"); + if (NativeCall::is_call_at(addr())) { + address trampoline = nativeCall_at(addr())->get_trampoline(); + if (trampoline) { + nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false); + return; + } + } + MacroAssembler::pd_patch_instruction(addr(), x); + assert(pd_call_destination(addr()) == x, "fail in reloc"); +} + +address* Relocation::pd_address_in_code() { + return (address*)(addr() + 8); +} + + +address Relocation::pd_get_address_from_code() { + return MacroAssembler::pd_call_destination(addr()); +} + +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { + if (NativeInstruction::maybe_cpool_ref(addr())) { + address old_addr = old_addr_for(addr(), src, dest); + MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr)); + } +} + +void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { + if (NativeInstruction::maybe_cpool_ref(addr())) { + address old_addr = old_addr_for(addr(), src, dest); + MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr)); + } +} + +void metadata_Relocation::pd_fix_value(address x) { +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/relocInfo_aarch64.hpp 2021-01-25 19:31:55.562674344 +0000 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_RELOCINFO_AARCH64_HPP +#define CPU_AARCH64_VM_RELOCINFO_AARCH64_HPP + + // machine-dependent parts of class relocInfo + private: + enum { + // Intel instructions are byte-aligned. + // FIXME for AARCH64 + offset_unit = 1, + format_width = 2 + }; + +#endif // CPU_AARCH64_VM_RELOCINFO_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/runtime_aarch64.cpp 2021-01-25 19:31:56.120680208 +0000 @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#ifdef COMPILER2 +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "classfile/systemDictionary.hpp" +#include "code/vmreg.hpp" +#include "interpreter/interpreter.hpp" +#include "opto/runtime.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/globalDefinitions.hpp" +#include "vmreg_aarch64.inline.hpp" +#endif + + +// This file should really contain the code for generating the OptoRuntime +// exception_blob. However that code uses SimpleRuntimeFrame which only +// exists in sharedRuntime_x86_64.cpp. When there is a sharedRuntime_.hpp +// file and SimpleRuntimeFrame is able to move there then the exception_blob +// code will move here where it belongs. --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp 2021-01-25 19:31:56.678686073 +0000 @@ -0,0 +1,3004 @@ +/* + * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/compiledICHolder.hpp" +#include "prims/jvmtiRedefineClassesTrace.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_aarch64.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +#define __ masm-> + +const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; + +class SimpleRuntimeFrame { + + public: + + // Most of the runtime stubs have this simple frame layout. + // This class exists to make the layout shared in one place. + // Offsets are for compiler stack slots, which are jints. + enum layout { + // The frame sender code expects that rbp will be in the "natural" place and + // will override any oopMap setting for it. We must therefore force the layout + // so that it agrees with the frame sender code. + // we don't expect any arg reg save area so aarch64 asserts that + // frame::arg_reg_save_area_bytes == 0 + rbp_off = 0, + rbp_off2, + return_off, return_off2, + framesize + }; +}; + +// FIXME -- this is used by C1 +class RegisterSaver { + public: + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false); + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); + + // Offsets into the register save area + // Used by deoptimization when it is managing result register + // values on its own + + static int r0_offset_in_bytes(void) { return (32 + r0->encoding()) * wordSize; } + static int reg_offset_in_bytes(Register r) { return r0_offset_in_bytes() + r->encoding() * wordSize; } + static int rmethod_offset_in_bytes(void) { return reg_offset_in_bytes(rmethod); } + static int rscratch1_offset_in_bytes(void) { return (32 + rscratch1->encoding()) * wordSize; } + static int v0_offset_in_bytes(void) { return 0; } + static int return_offset_in_bytes(void) { return (32 /* floats*/ + 31 /* gregs*/) * wordSize; } + + // During deoptimization only the result registers need to be restored, + // all the other values have already been extracted. + static void restore_result_registers(MacroAssembler* masm); + + // Capture info about frame layout + enum layout { + fpu_state_off = 0, + fpu_state_end = fpu_state_off+FPUStateSizeInWords-1, + // The frame sender code expects that rfp will be in + // the "natural" place and will override any oopMap + // setting for it. We must therefore force the layout + // so that it agrees with the frame sender code. + r0_off = fpu_state_off+FPUStateSizeInWords, + rfp_off = r0_off + 30 * 2, + return_off = rfp_off + 2, // slot for return address + reg_save_size = return_off + 2}; + +}; + +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { +#ifdef COMPILER2 + if (save_vectors) { + // Save upper half of vector registers + int vect_words = 32 * 8 / wordSize; + additional_frame_words += vect_words; + } +#else + assert(!save_vectors, "vectors are generated only by C2"); +#endif + + int frame_size_in_bytes = round_to(additional_frame_words*wordSize + + reg_save_size*BytesPerInt, 16); + // OopMap frame size is in compiler stack slots (jint's) not bytes or words + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; + // The caller will allocate additional_frame_words + int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; + // CodeBlob frame size is in words. + int frame_size_in_words = frame_size_in_bytes / wordSize; + *total_frame_words = frame_size_in_words; + + // Save Integer and Float registers. + + __ enter(); + __ push_CPU_state(save_vectors); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + + for (int i = 0; i < RegisterImpl::number_of_registers; i++) { + Register r = as_Register(i); + if (r < rheapbase && r != rscratch1 && r != rscratch2) { + int sp_offset = 2 * (i + 32); // SP offsets are in 4-byte words, + // register slots are 8 bytes + // wide, 32 floating-point + // registers + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), + r->as_VMReg()); + } + } + + for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { + FloatRegister r = as_FloatRegister(i); + int sp_offset = save_vectors ? (4 * i) : (2 * i); + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), + r->as_VMReg()); + } + + return oop_map; +} + +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { +#ifndef COMPILER2 + assert(!restore_vectors, "vectors are generated only by C2"); +#endif + __ pop_CPU_state(restore_vectors); + __ leave(); +} + +void RegisterSaver::restore_result_registers(MacroAssembler* masm) { + + // Just restore result register. Only used by deoptimization. By + // now any callee save register that needs to be restored to a c2 + // caller of the deoptee has been extracted into the vframeArray + // and will be stuffed into the c2i adapter we create for later + // restoration so only result registers need to be restored here. + + // Restore fp result register + __ ldrd(v0, Address(sp, v0_offset_in_bytes())); + // Restore integer result register + __ ldr(r0, Address(sp, r0_offset_in_bytes())); + + // Pop all of the register save are off the stack + __ add(sp, sp, round_to(return_offset_in_bytes(), 16)); +} + +// Is vector's size (in bytes) bigger than a size saved by default? +// 8 bytes vector registers are saved by default on AArch64. +bool SharedRuntime::is_wide_vector(int size) { + return size > 8; +} +// The java_calling_convention describes stack locations as ideal slots on +// a frame with no abi restrictions. Since we must observe abi restrictions +// (like the placement of the register window) the slots must be biased by +// the following value. +static int reg2offset_in(VMReg r) { + // Account for saved rfp and lr + // This should really be in_preserve_stack_slots + return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size; +} + +static int reg2offset_out(VMReg r) { + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; +} + +// --------------------------------------------------------------------------- +// Read the array of BasicTypes from a signature, and compute where the +// arguments should go. Values in the VMRegPair regs array refer to 4-byte +// quantities. Values less than VMRegImpl::stack0 are registers, those above +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer +// as framesizes are fixed. +// VMRegImpl::stack0 refers to the first slot 0(sp). +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register +// up to RegisterImpl::number_of_registers) are the 64-bit +// integer registers. + +// Note: the INPUTS in sig_bt are in units of Java argument words, +// which are 64-bit. The OUTPUTS are in 32-bit units. + +// The Java calling convention is a "shifted" version of the C ABI. +// By skipping the first C ABI register we can call non-static jni +// methods with small numbers of arguments without having to shuffle +// the arguments at all. Since we control the java ABI we ought to at +// least get some advantage out of it. + +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed, + int is_outgoing) { + + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { + j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { + j_farg0, j_farg1, j_farg2, j_farg3, + j_farg4, j_farg5, j_farg6, j_farg7 + }; + + + uint int_args = 0; + uint fp_args = 0; + uint stk_args = 0; // inc by 2 each time + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < Argument::n_int_register_parameters_j) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + if (int_args < Argument::n_int_register_parameters_j) { + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return round_to(stk_args, 2); +} + +// Patch the callers callsite with entry to compiled code if it exists. +static void patch_callers_callsite(MacroAssembler *masm) { + Label L; + __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset()))); + __ cbz(rscratch1, L); + + __ enter(); + __ push_CPU_state(); + + // VM needs caller's callsite + // VM needs target method + // This needs to be a long call since we will relocate this adapter to + // the codeBuffer and it may not reach + +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + + __ mov(c_rarg0, rmethod); + __ mov(c_rarg1, lr); + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); + __ blr(rscratch1); + __ maybe_isb(); + + __ pop_CPU_state(); + // restore sp + __ leave(); + __ bind(L); +} + +static void gen_c2i_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + Label& skip_fixup) { + // Before we get into the guts of the C2I adapter, see if we should be here + // at all. We've come from compiled code and are attempting to jump to the + // interpreter, which means the caller made a static call to get here + // (vcalls always get a compiled target if there is one). Check for a + // compiled target. If there is one, we need to patch the caller's call. + patch_callers_callsite(masm); + + __ bind(skip_fixup); + + int words_pushed = 0; + + // Since all args are passed on the stack, total_args_passed * + // Interpreter::stackElementSize is the space we need. + + int extraspace = total_args_passed * Interpreter::stackElementSize; + + __ mov(r13, sp); + + // stack is aligned, keep it that way + extraspace = round_to(extraspace, 2*wordSize); + + if (extraspace) + __ sub(sp, sp, extraspace); + + // Now write the args into the outgoing interpreter space + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // offset to start parameters + int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; + int next_off = st_off - Interpreter::stackElementSize; + + // Say 4 args: + // i st_off + // 0 32 T_LONG + // 1 24 T_VOID + // 2 16 T_OBJECT + // 3 8 T_BOOL + // - 0 return address + // + // However to make thing extra confusing. Because we can fit a long/double in + // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter + // leaves one slot empty and only stores to a single slot. In this case the + // slot that is occupied is the T_VOID slot. See I said it was confusing. + + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // memory to memory use rscratch1 + int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + + extraspace + + words_pushed * wordSize); + if (!r_2->is_valid()) { + // sign extend?? + __ ldrw(rscratch1, Address(sp, ld_off)); + __ str(rscratch1, Address(sp, st_off)); + + } else { + + __ ldr(rscratch1, Address(sp, ld_off)); + + // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG + // T_DOUBLE and T_LONG use two slots in the interpreter + if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + // ld_off == LSW, ld_off+wordSize == MSW + // st_off == MSW, next_off == LSW + __ str(rscratch1, Address(sp, next_off)); +#ifdef ASSERT + // Overwrite the unused slot with known junk + __ mov(rscratch1, 0xdeadffffdeadaaaaul); + __ str(rscratch1, Address(sp, st_off)); +#endif /* ASSERT */ + } else { + __ str(rscratch1, Address(sp, st_off)); + } + } + } else if (r_1->is_Register()) { + Register r = r_1->as_Register(); + if (!r_2->is_valid()) { + // must be only an int (or less ) so move only 32bits to slot + // why not sign extend?? + __ str(r, Address(sp, st_off)); + } else { + // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG + // T_DOUBLE and T_LONG use two slots in the interpreter + if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + // long/double in gpr +#ifdef ASSERT + // Overwrite the unused slot with known junk + __ mov(rscratch1, 0xdeadffffdeadaaabul); + __ str(rscratch1, Address(sp, st_off)); +#endif /* ASSERT */ + __ str(r, Address(sp, next_off)); + } else { + __ str(r, Address(sp, st_off)); + } + } + } else { + assert(r_1->is_FloatRegister(), ""); + if (!r_2->is_valid()) { + // only a float use just part of the slot + __ strs(r_1->as_FloatRegister(), Address(sp, st_off)); + } else { +#ifdef ASSERT + // Overwrite the unused slot with known junk + __ mov(rscratch1, 0xdeadffffdeadaaacul); + __ str(rscratch1, Address(sp, st_off)); +#endif /* ASSERT */ + __ strd(r_1->as_FloatRegister(), Address(sp, next_off)); + } + } + } + + __ mov(esp, sp); // Interp expects args on caller's expression stack + + __ ldr(rscratch1, Address(rmethod, in_bytes(Method::interpreter_entry_offset()))); + __ br(rscratch1); +} + + +static void gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { + + // Note: r13 contains the senderSP on entry. We must preserve it since + // we may do a i2c -> c2i transition if we lose a race where compiled + // code goes non-entrant while we get args ready. + + // In addition we use r13 to locate all the interpreter args because + // we must align the stack to 16 bytes. + + // Adapters are frameless. + + // An i2c adapter is frameless because the *caller* frame, which is + // interpreted, routinely repairs its own esp (from + // interpreter_frame_last_sp), even if a callee has modified the + // stack pointer. It also recalculates and aligns sp. + + // A c2i adapter is frameless because the *callee* frame, which is + // interpreted, routinely repairs its caller's sp (from sender_sp, + // which is set up via the senderSP register). + + // In other words, if *either* the caller or callee is interpreted, we can + // get the stack pointer repaired after a call. + + // This is why c2i and i2c adapters cannot be indefinitely composed. + // In particular, if a c2i adapter were to somehow call an i2c adapter, + // both caller and callee would be compiled methods, and neither would + // clean up the stack pointer changes performed by the two adapters. + // If this happens, control eventually transfers back to the compiled + // caller, but with an uncorrected stack, causing delayed havoc. + + if (VerifyAdapterCalls && + (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { +#if 0 + // So, let's test for cascading c2i/i2c adapters right now. + // assert(Interpreter::contains($return_addr) || + // StubRoutines::contains($return_addr), + // "i2c adapter must return to an interpreter frame"); + __ block_comment("verify_i2c { "); + Label L_ok; + if (Interpreter::code() != NULL) + range_check(masm, rax, r11, + Interpreter::code()->code_start(), Interpreter::code()->code_end(), + L_ok); + if (StubRoutines::code1() != NULL) + range_check(masm, rax, r11, + StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), + L_ok); + if (StubRoutines::code2() != NULL) + range_check(masm, rax, r11, + StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), + L_ok); + const char* msg = "i2c adapter must return to an interpreter frame"; + __ block_comment(msg); + __ stop(msg); + __ bind(L_ok); + __ block_comment("} verify_i2ce "); +#endif + } + + // Cut-out for having no stack args. + int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; + if (comp_args_on_stack) { + __ sub(rscratch1, sp, comp_words_on_stack * wordSize); + __ andr(sp, rscratch1, -16); + } + + // Will jump to the compiled code just as if compiled code was doing it. + // Pre-load the register-jump target early, to schedule it better. + __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset()))); + + // Now generate the shuffle code. + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // Pick up 0, 1 or 2 words from SP+offset. + + assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), + "scrambled load targets?"); + // Load in argument order going down. + int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize; + // Point to interpreter value (vs. tag) + int next_off = ld_off - Interpreter::stackElementSize; + // + // + // + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // Convert stack slot to an SP offset (+ wordSize to account for return address ) + int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; + if (!r_2->is_valid()) { + // sign extend??? + __ ldrsw(rscratch2, Address(esp, ld_off)); + __ str(rscratch2, Address(sp, st_off)); + } else { + // + // We are using two optoregs. This can be either T_OBJECT, + // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates + // two slots but only uses one for thr T_LONG or T_DOUBLE case + // So we must adjust where to pick up the data to match the + // interpreter. + // + // Interpreter local[n] == MSW, local[n+1] == LSW however locals + // are accessed as negative so LSW is at LOW address + + // ld_off is MSW so get LSW + const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? + next_off : ld_off; + __ ldr(rscratch2, Address(esp, offset)); + // st_off is LSW (i.e. reg.first()) + __ str(rscratch2, Address(sp, st_off)); + } + } else if (r_1->is_Register()) { // Register argument + Register r = r_1->as_Register(); + if (r_2->is_valid()) { + // + // We are using two VMRegs. This can be either T_OBJECT, + // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates + // two slots but only uses one for thr T_LONG or T_DOUBLE case + // So we must adjust where to pick up the data to match the + // interpreter. + + const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? + next_off : ld_off; + + // this can be a misaligned move + __ ldr(r, Address(esp, offset)); + } else { + // sign extend and use a full word? + __ ldrw(r, Address(esp, ld_off)); + } + } else { + if (!r_2->is_valid()) { + __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off)); + } else { + __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off)); + } + } + } + + // 6243940 We might end up in handle_wrong_method if + // the callee is deoptimized as we race thru here. If that + // happens we don't want to take a safepoint because the + // caller frame will look interpreted and arguments are now + // "compiled" so it is much better to make this transition + // invisible to the stack walking code. Unfortunately if + // we try and find the callee by normal means a safepoint + // is possible. So we stash the desired callee in the thread + // and the vm will find there should this case occur. + + __ str(rmethod, Address(rthread, JavaThread::callee_target_offset())); + + __ br(rscratch1); +} + +// --------------------------------------------------------------- +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterFingerPrint* fingerprint) { + address i2c_entry = __ pc(); + + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + + address c2i_unverified_entry = __ pc(); + Label skip_fixup; + + Label ok; + + Register holder = rscratch2; + Register receiver = j_rarg0; + Register tmp = r10; // A call-clobbered register not used for arg passing + + // ------------------------------------------------------------------------- + // Generate a C2I adapter. On entry we know rmethod holds the Method* during calls + // to the interpreter. The args start out packed in the compiled layout. They + // need to be unpacked into the interpreter layout. This will almost always + // require some stack space. We grow the current (compiled) stack, then repack + // the args. We finally end in a jump to the generic interpreter entry point. + // On exit from the interpreter, the interpreter will restore our SP (lest the + // compiled code, which relys solely on SP and not FP, get sick). + + { + __ block_comment("c2i_unverified_entry {"); + __ load_klass(rscratch1, receiver); + __ ldr(tmp, Address(holder, CompiledICHolder::holder_klass_offset())); + __ cmp(rscratch1, tmp); + __ ldr(rmethod, Address(holder, CompiledICHolder::holder_metadata_offset())); + __ br(Assembler::EQ, ok); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + __ bind(ok); + // Method might have been compiled since the call site was patched to + // interpreted; if that is the case treat it as a miss so we can get + // the call site corrected. + __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset()))); + __ cbz(rscratch1, skip_fixup); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + __ block_comment("} c2i_unverified_entry"); + } + + address c2i_entry = __ pc(); + + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + __ flush(); + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); +} + +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, + int total_args_passed) { + assert(regs2 == NULL, "not needed on AArch64"); +// We return the amount of VMRegImpl stack slots we need to reserve for all +// the arguments NOT counting out_preserve_stack_slots. + + static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { + c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { + c_farg0, c_farg1, c_farg2, c_farg3, + c_farg4, c_farg5, c_farg6, c_farg7 + }; + + uint int_args = 0; + uint fp_args = 0; + uint stk_args = 0; // inc by 2 each time + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < Argument::n_int_register_parameters_c) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); +#ifdef _WIN64 + fp_args++; + // Allocate slots for callee to stuff register args the stack. + stk_args += 2; +#endif + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + if (int_args < Argument::n_int_register_parameters_c) { + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); +#ifdef _WIN64 + fp_args++; + stk_args += 2; +#endif + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters_c) { + regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); +#ifdef _WIN64 + int_args++; + // Allocate slots for callee to stuff register args the stack. + stk_args += 2; +#endif + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters_c) { + regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); +#ifdef _WIN64 + int_args++; + // Allocate slots for callee to stuff register args the stack. + stk_args += 2; +#endif + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_VOID: // Halves of longs and doubles + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + default: + ShouldNotReachHere(); + break; + } + } +#ifdef _WIN64 + // windows abi requires that we always allocate enough stack space + // for 4 64bit registers to be stored down. + if (stk_args < 8) { + stk_args = 8; + } +#endif // _WIN64 + + return stk_args; +} + +// On 64 bit we will store integer like items to the stack as +// 64 bits items (sparc abi) even though java would only store +// 32bits for a parameter. On 32bit it will simply be 32 bits +// So this routine will do 32->32 on 32bit and 32->64 on 64bit +static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first()))); + __ str(rscratch1, Address(sp, reg2offset_out(dst.first()))); + } else { + // stack to reg + __ ldrsw(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first()))); + } + } else if (dst.first()->is_stack()) { + // reg to stack + // Do we really have to sign extend??? + // __ movslq(src.first()->as_Register(), src.first()->as_Register()); + __ str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); + } else { + if (dst.first() != src.first()) { + __ sxtw(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + +// An oop arg. Must pass a handle not the oop itself +static void object_move(MacroAssembler* masm, + OopMap* map, + int oop_handle_offset, + int framesize_in_slots, + VMRegPair src, + VMRegPair dst, + bool is_receiver, + int* receiver_offset) { + + // must pass a handle. First figure out the location we use as a handle + + Register rHandle = dst.first()->is_stack() ? rscratch2 : dst.first()->as_Register(); + + // See if oop is NULL if it is we need no handle + + if (src.first()->is_stack()) { + + // Oop is already on the stack as an argument + int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); + if (is_receiver) { + *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; + } + + __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first()))); + __ lea(rHandle, Address(rfp, reg2offset_in(src.first()))); + // conditionally move a NULL + __ cmp(rscratch1, zr); + __ csel(rHandle, zr, rHandle, Assembler::EQ); + } else { + + // Oop is in an a register we must store it to the space we reserve + // on the stack for oop_handles and pass a handle if oop is non-NULL + + const Register rOop = src.first()->as_Register(); + int oop_slot; + if (rOop == j_rarg0) + oop_slot = 0; + else if (rOop == j_rarg1) + oop_slot = 1; + else if (rOop == j_rarg2) + oop_slot = 2; + else if (rOop == j_rarg3) + oop_slot = 3; + else if (rOop == j_rarg4) + oop_slot = 4; + else if (rOop == j_rarg5) + oop_slot = 5; + else if (rOop == j_rarg6) + oop_slot = 6; + else { + assert(rOop == j_rarg7, "wrong register"); + oop_slot = 7; + } + + oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; + int offset = oop_slot*VMRegImpl::stack_slot_size; + + map->set_oop(VMRegImpl::stack2reg(oop_slot)); + // Store oop in handle area, may be NULL + __ str(rOop, Address(sp, offset)); + if (is_receiver) { + *receiver_offset = offset; + } + + __ cmp(rOop, zr); + __ lea(rHandle, Address(sp, offset)); + // conditionally move a NULL + __ csel(rHandle, zr, rHandle, Assembler::EQ); + } + + // If arg is on the stack then place it otherwise it is already in correct reg. + if (dst.first()->is_stack()) { + __ str(rHandle, Address(sp, reg2offset_out(dst.first()))); + } +} + +// A float arg may have to do float reg int reg conversion +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + assert(src.first()->is_stack() && dst.first()->is_stack() || + src.first()->is_reg() && dst.first()->is_reg(), "Unexpected error"); + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + __ ldrw(rscratch1, Address(rfp, reg2offset_in(src.first()))); + __ strw(rscratch1, Address(sp, reg2offset_out(dst.first()))); + } else { + ShouldNotReachHere(); + } + } else if (src.first() != dst.first()) { + if (src.is_single_phys_reg() && dst.is_single_phys_reg()) + __ fmovs(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + else + ShouldNotReachHere(); + } +} + +// A long move +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first()))); + __ str(rscratch1, Address(sp, reg2offset_out(dst.first()))); + } else { + // stack to reg + __ ldr(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first()))); + } + } else if (dst.first()->is_stack()) { + // reg to stack + // Do we really have to sign extend??? + // __ movslq(src.first()->as_Register(), src.first()->as_Register()); + __ str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); + } else { + if (dst.first() != src.first()) { + __ mov(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + + +// A double move +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + assert(src.first()->is_stack() && dst.first()->is_stack() || + src.first()->is_reg() && dst.first()->is_reg(), "Unexpected error"); + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first()))); + __ str(rscratch1, Address(sp, reg2offset_out(dst.first()))); + } else { + ShouldNotReachHere(); + } + } else if (src.first() != dst.first()) { + if (src.is_single_phys_reg() && dst.is_single_phys_reg()) + __ fmovd(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + else + ShouldNotReachHere(); + } +} + + +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ strs(v0, Address(rfp, -wordSize)); + break; + case T_DOUBLE: + __ strd(v0, Address(rfp, -wordSize)); + break; + case T_VOID: break; + default: { + __ str(r0, Address(rfp, -wordSize)); + } + } +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ ldrs(v0, Address(rfp, -wordSize)); + break; + case T_DOUBLE: + __ ldrd(v0, Address(rfp, -wordSize)); + break; + case T_VOID: break; + default: { + __ ldr(r0, Address(rfp, -wordSize)); + } + } +} +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + RegSet x; + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + x = x + args[i].first()->as_Register(); + } else if (args[i].first()->is_FloatRegister()) { + __ strd(args[i].first()->as_FloatRegister(), Address(__ pre(sp, -2 * wordSize))); + } + } + __ push(x, sp); +} + +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + RegSet x; + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + x = x + args[i].first()->as_Register(); + } else { + ; + } + } + __ pop(x, sp); + for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { + if (args[i].first()->is_Register()) { + ; + } else if (args[i].first()->is_FloatRegister()) { + __ ldrd(args[i].first()->as_FloatRegister(), Address(__ post(sp, 2 * wordSize))); + } + } +} + + +// Check GC_locker::needs_gc and enter the runtime if it's true. This +// keeps a new JNI critical region from starting until a GC has been +// forced. Save down any oops in registers and describe them in an +// OopMap. +static void check_needs_gc_for_critical_native(MacroAssembler* masm, + int stack_slots, + int total_c_args, + int total_in_args, + int arg_save_area, + OopMapSet* oop_maps, + VMRegPair* in_regs, + BasicType* in_sig_bt) { Unimplemented(); } + +// Unpack an array argument into a pointer to the body and the length +// if the array is non-null, otherwise pass 0 for both. +static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); } + + +class ComputeMoveOrder: public StackObj { + class MoveOperation: public ResourceObj { + friend class ComputeMoveOrder; + private: + VMRegPair _src; + VMRegPair _dst; + int _src_index; + int _dst_index; + bool _processed; + MoveOperation* _next; + MoveOperation* _prev; + + static int get_id(VMRegPair r) { Unimplemented(); return 0; } + + public: + MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): + _src(src) + , _src_index(src_index) + , _dst(dst) + , _dst_index(dst_index) + , _next(NULL) + , _prev(NULL) + , _processed(false) { Unimplemented(); } + + VMRegPair src() const { Unimplemented(); return _src; } + int src_id() const { Unimplemented(); return 0; } + int src_index() const { Unimplemented(); return 0; } + VMRegPair dst() const { Unimplemented(); return _src; } + void set_dst(int i, VMRegPair dst) { Unimplemented(); } + int dst_index() const { Unimplemented(); return 0; } + int dst_id() const { Unimplemented(); return 0; } + MoveOperation* next() const { Unimplemented(); return 0; } + MoveOperation* prev() const { Unimplemented(); return 0; } + void set_processed() { Unimplemented(); } + bool is_processed() const { Unimplemented(); return 0; } + + // insert + void break_cycle(VMRegPair temp_register) { Unimplemented(); } + + void link(GrowableArray& killer) { Unimplemented(); } + }; + + private: + GrowableArray edges; + + public: + ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, + BasicType* in_sig_bt, GrowableArray& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); } + + // Collected all the move operations + void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); } + + // Walk the edges breaking cycles between moves. The result list + // can be walked in order to produce the proper set of loads + GrowableArray* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; } +}; + + +static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs, int type) { + CodeBlob *cb = CodeCache::find_blob(dest); + if (cb) { + __ far_call(RuntimeAddress(dest)); + } else { + assert((unsigned)gpargs < 256, "eek!"); + assert((unsigned)fpargs < 32, "eek!"); + __ lea(rscratch1, RuntimeAddress(dest)); + __ blr(rscratch1); + __ maybe_isb(); + } +} + +static void verify_oop_args(MacroAssembler* masm, + methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + Register temp_reg = r19; // not part of any compiled calling seq + if (VerifyOops) { + for (int i = 0; i < method->size_of_parameters(); i++) { + if (sig_bt[i] == T_OBJECT || + sig_bt[i] == T_ARRAY) { + VMReg r = regs[i].first(); + assert(r->is_valid(), "bad oop arg"); + if (r->is_stack()) { + __ ldr(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); + __ verify_oop(temp_reg); + } else { + __ verify_oop(r->as_Register()); + } + } + } + } +} + +static void gen_special_dispatch(MacroAssembler* masm, + methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); + + // Now write the args into the outgoing interpreter space + bool has_receiver = false; + Register receiver_reg = noreg; + int member_arg_pos = -1; + Register member_reg = noreg; + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); + if (ref_kind != 0) { + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = r19; // known to be free at this point + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); + } else if (iid == vmIntrinsics::_invokeBasic) { + has_receiver = true; + } else { + fatal(err_msg_res("unexpected intrinsic id %d", iid)); + } + + if (member_reg != noreg) { + // Load the member_arg into register, if necessary. + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); + VMReg r = regs[member_arg_pos].first(); + if (r->is_stack()) { + __ ldr(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + member_reg = r->as_Register(); + } + } + + if (has_receiver) { + // Make sure the receiver is loaded into a register. + assert(method->size_of_parameters() > 0, "oob"); + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); + VMReg r = regs[0].first(); + assert(r->is_valid(), "bad receiver arg"); + if (r->is_stack()) { + // Porting note: This assumes that compiled calling conventions always + // pass the receiver oop in a register. If this is not true on some + // platform, pick a temp and load the receiver from stack. + fatal("receiver always in a register"); + receiver_reg = r2; // known to be free at this point + __ ldr(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + receiver_reg = r->as_Register(); + } + } + + // Figure out which address we are really jumping to: + MethodHandles::generate_method_handle_dispatch(masm, iid, + receiver_reg, member_reg, /*for_compiler_entry:*/ true); +} + +// --------------------------------------------------------------------------- +// Generate a native wrapper for a given method. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// convention (handlizes oops, etc), transitions to native, makes the call, +// returns to java state (possibly blocking), unhandlizes any result and +// returns. +// +// Critical native functions are a shorthand for the use of +// GetPrimtiveArrayCritical and disallow the use of any other JNI +// functions. The wrapper is expected to unpack the arguments before +// passing them to the callee and perform checks before and after the +// native call to ensure that they GC_locker +// lock_critical/unlock_critical semantics are followed. Some other +// parts of JNI setup are skipped like the tear down of the JNI handle +// block and the check for pending exceptions it's impossible for them +// to be thrown. +// +// They are roughly structured like this: +// if (GC_locker::needs_gc()) +// SharedRuntime::block_for_jni_critical(); +// tranistion to thread_in_native +// unpack arrray arguments and call native entry point +// check for safepoint in progress +// check if any thread suspend flags are set +// call into JVM and possible unlock the JNI critical +// if a GC was suppressed while in the critical native. +// transition back to thread_in_Java +// return to caller +// +nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + methodHandle method, + int compile_id, + BasicType* in_sig_bt, + VMRegPair* in_regs, + BasicType ret_type) { + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); + int vep_offset = ((intptr_t)__ pc()) - start; + + // First instruction must be a nop as it may need to be patched on deoptimisation + __ nop(); + gen_special_dispatch(masm, + method, + in_sig_bt, + in_regs); + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period + __ flush(); + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually + return nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + in_ByteSize(-1), + in_ByteSize(-1), + (OopMapSet*)NULL); + } + bool is_critical_native = true; + address native_func = method->critical_native_function(); + if (native_func == NULL) { + native_func = method->native_function(); + is_critical_native = false; + } + assert(native_func != NULL, "must have function"); + + // An OopMap for lock (and class if static) + OopMapSet *oop_maps = new OopMapSet(); + intptr_t start = (intptr_t)__ pc(); + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the jni function will expect them. To figure out where they go + // we convert the java signature to a C signature by inserting + // the hidden arguments as arg[0] and possibly arg[1] (static method) + + const int total_in_args = method->size_of_parameters(); + int total_c_args = total_in_args; + if (!is_critical_native) { + total_c_args += 1; + if (method->is_static()) { + total_c_args++; + } + } else { + for (int i = 0; i < total_in_args; i++) { + if (in_sig_bt[i] == T_ARRAY) { + total_c_args++; + } + } + } + + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + BasicType* in_elem_bt = NULL; + + int argc = 0; + if (!is_critical_native) { + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } + + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; + } + } else { + Thread* THREAD = Thread::current(); + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); + SignatureStream ss(method->signature()); + for (int i = 0; i < total_in_args ; i++ ) { + if (in_sig_bt[i] == T_ARRAY) { + // Arrays are passed as int, elem* pair + out_sig_bt[argc++] = T_INT; + out_sig_bt[argc++] = T_ADDRESS; + Symbol* atype = ss.as_symbol(CHECK_NULL); + const char* at = atype->as_C_string(); + if (strlen(at) == 2) { + assert(at[0] == '[', "must be"); + switch (at[1]) { + case 'B': in_elem_bt[i] = T_BYTE; break; + case 'C': in_elem_bt[i] = T_CHAR; break; + case 'D': in_elem_bt[i] = T_DOUBLE; break; + case 'F': in_elem_bt[i] = T_FLOAT; break; + case 'I': in_elem_bt[i] = T_INT; break; + case 'J': in_elem_bt[i] = T_LONG; break; + case 'S': in_elem_bt[i] = T_SHORT; break; + case 'Z': in_elem_bt[i] = T_BOOLEAN; break; + default: ShouldNotReachHere(); + } + } + } else { + out_sig_bt[argc++] = in_sig_bt[i]; + in_elem_bt[i] = T_VOID; + } + if (in_sig_bt[i] != T_VOID) { + assert(in_sig_bt[i] == ss.type(), "must match"); + ss.next(); + } + } + } + + // Now figure out where the args must be stored and how much stack space + // they require. + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + // Compute framesize for the wrapper. We need to handlize all oops in + // incoming registers + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Now the space for the inbound oop handle area + int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers + if (is_critical_native) { + // Critical natives may have to call out so they need a save area + // for register arguments. + int double_slots = 0; + int single_slots = 0; + for ( int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_Register()) { + const Register reg = in_regs[i].first()->as_Register(); + switch (in_sig_bt[i]) { + case T_BOOLEAN: + case T_BYTE: + case T_SHORT: + case T_CHAR: + case T_INT: single_slots++; break; + case T_ARRAY: // specific to LP64 (7145024) + case T_LONG: double_slots++; break; + default: ShouldNotReachHere(); + } + } else if (in_regs[i].first()->is_FloatRegister()) { + ShouldNotReachHere(); + } + } + total_save_slots = double_slots * 2 + single_slots; + // align the save area + if (double_slots != 0) { + stack_slots = round_to(stack_slots, 2); + } + } + + int oop_handle_offset = stack_slots; + stack_slots += total_save_slots; + + // Now any space we need for handlizing a klass if static method + + int klass_slot_offset = 0; + int klass_offset = -1; + int lock_slot_offset = 0; + bool is_static = false; + + if (method->is_static()) { + klass_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; + is_static = true; + } + + // Plus a lock if needed + + if (method->is_synchronized()) { + lock_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + } + + // Now a place (+2) to save return values or temp during shuffling + // + 4 for return address (which we own) and saved rfp + stack_slots += 6; + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // |---------------------| + // | 2 slots for moves | + // |---------------------| + // | lock box (if sync) | + // |---------------------| <- lock_slot_offset + // | klass (if static) | + // |---------------------| <- klass_slot_offset + // | oopHandle area | + // |---------------------| <- oop_handle_offset (8 java arg registers) + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = round_to(stack_slots, StackAlignmentInSlots); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + + // First thing make an ic check to see if we should even be here + + // We are free to use all registers as temps without saving them and + // restoring them except rfp. rfp is the only callee save register + // as far as the interpreter and the compiler(s) are concerned. + + + const Register ic_reg = rscratch2; + const Register receiver = j_rarg0; + + Label hit; + Label exception_pending; + + assert_different_registers(ic_reg, receiver, rscratch1); + __ verify_oop(receiver); + __ cmp_klass(receiver, ic_reg, rscratch1); + __ br(Assembler::EQ, hit); + + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + // Verified entry point must be aligned + __ align(8); + + __ bind(hit); + + int vep_offset = ((intptr_t)__ pc()) - start; + + // If we have to make this method not-entrant we'll overwrite its + // first instruction with a jump. For this action to be legal we + // must ensure that this first instruction is a B, BL, NOP, BKPT, + // SVC, HVC, or SMC. Make it a NOP. + __ nop(); + + // Generate stack overflow check + if (UseStackBanging) { + __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); + } else { + Unimplemented(); + } + + // Generate a new frame for the wrapper. + __ enter(); + // -2 because return address is already present and so is saved rfp + __ sub(sp, sp, stack_size - 2*wordSize); + + // Frame is now completed as far as size and linkage. + int frame_complete = ((intptr_t)__ pc()) - start; + + // We use r20 as the oop handle for the receiver/klass + // It is callee save so it survives the call to native + + const Register oop_handle_reg = r20; + + if (is_critical_native) { + check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, + oop_handle_offset, oop_maps, in_regs, in_sig_bt); + } + + // + // We immediately shuffle the arguments so that any vm call we have to + // make from here on out (sync slow path, jvmti, etc.) we will have + // captured the oops from our caller and have a valid oopMap for + // them. + + // ----------------- + // The Grand Shuffle + + // The Java calling convention is either equal (linux) or denser (win64) than the + // c calling convention. However the because of the jni_env argument the c calling + // convention always has at least one more (and two for static) arguments than Java. + // Therefore if we move the args from java -> c backwards then we will never have + // a register->register conflict and we don't have to build a dependency graph + // and figure out how to break any cycles. + // + + // Record esp-based slot for receiver on stack for non-static methods + int receiver_offset = -1; + + // This is a trick. We double the stack slots so we can claim + // the oops in the caller's frame. Since we are sure to have + // more args than the caller doubling is enough to make + // sure we can capture all the incoming oop args from the + // caller. + // + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + + // Mark location of rfp (someday) + // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rfp)); + + + int float_args = 0; + int int_args = 0; + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif /* ASSERT */ + + // This may iterate in two different directions depending on the + // kind of native it is. The reason is that for regular JNI natives + // the incoming and outgoing registers are offset upwards and for + // critical natives they are offset down. + GrowableArray arg_order(2 * total_in_args); + VMRegPair tmp_vmreg; + tmp_vmreg.set1(r19->as_VMReg()); + + if (!is_critical_native) { + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { + arg_order.push(i); + arg_order.push(c_arg); + } + } else { + // Compute a valid move order, using tmp_vmreg to break any cycles + ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); + } + + int temploc = -1; + for (int ai = 0; ai < arg_order.length(); ai += 2) { + int i = arg_order.at(ai); + int c_arg = arg_order.at(ai + 1); + __ block_comment(err_msg("move %d -> %d", i, c_arg)); + if (c_arg == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // This arg needs to be moved to a temporary + __ mov(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); + in_regs[i] = tmp_vmreg; + temploc = i; + continue; + } else if (i == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // Read from the temporary location + assert(temploc != -1, "must be valid"); + i = temploc; + temploc = -1; + } +#ifdef ASSERT + if (in_regs[i].first()->is_Register()) { + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); + } else if (in_regs[i].first()->is_FloatRegister()) { + assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); + } + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif /* ASSERT */ + switch (in_sig_bt[i]) { + case T_ARRAY: + if (is_critical_native) { + unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); + c_arg++; +#ifdef ASSERT + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif + int_args++; + break; + } + case T_OBJECT: + assert(!is_critical_native, "no oop arguments"); + object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], + ((i == 0) && (!is_static)), + &receiver_offset); + int_args++; + break; + case T_VOID: + break; + + case T_FLOAT: + float_move(masm, in_regs[i], out_regs[c_arg]); + float_args++; + break; + + case T_DOUBLE: + assert( i + 1 < total_in_args && + in_sig_bt[i + 1] == T_VOID && + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); + double_move(masm, in_regs[i], out_regs[c_arg]); + float_args++; + break; + + case T_LONG : + long_move(masm, in_regs[i], out_regs[c_arg]); + int_args++; + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + + default: + move32_64(masm, in_regs[i], out_regs[c_arg]); + int_args++; + } + } + + // point c_arg at the first arg that is already loaded in case we + // need to spill before we call out + int c_arg = total_c_args - total_in_args; + + // Pre-load a static method's oop into c_rarg1. + if (method->is_static() && !is_critical_native) { + + // load oop into a register + __ movoop(c_rarg1, + JNIHandles::make_local(method->method_holder()->java_mirror()), + /*immediate*/true); + + // Now handlize the static class mirror it's known not-null. + __ str(c_rarg1, Address(sp, klass_offset)); + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + + // Now get the handle + __ lea(c_rarg1, Address(sp, klass_offset)); + // and protect the arg if we must spill + c_arg--; + } + + // Change state to native (we save the return address in the thread, since it might not + // be pushed on the stack when we do a stack traversal). + // We use the same pc/oopMap repeatedly when we call out + + Label native_return; + __ set_last_Java_frame(sp, noreg, native_return, rscratch1); + + Label dtrace_method_entry, dtrace_method_entry_done; + { + unsigned long offset; + __ adrp(rscratch1, ExternalAddress((address)&DTraceMethodProbes), offset); + __ ldrb(rscratch1, Address(rscratch1, offset)); + __ cbnzw(rscratch1, dtrace_method_entry); + __ bind(dtrace_method_entry_done); + } + + // RedefineClasses() tracing support for obsolete method entry + if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + rthread, c_rarg1); + restore_args(masm, total_c_args, c_arg, out_regs); + } + + // Lock a synchronized method + + // Register definitions used by locking and unlocking + + const Register swap_reg = r0; + const Register obj_reg = r19; // Will contain the oop + const Register lock_reg = r13; // Address of compiler lock object (BasicLock) + const Register old_hdr = r13; // value of old header at unlock time + const Register tmp = lr; + + Label slow_path_lock; + Label lock_done; + + if (method->is_synchronized()) { + assert(!is_critical_native, "unhandled"); + + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + + // Get the handle (the 2nd argument) + __ mov(oop_handle_reg, c_rarg1); + + // Get address of the box + + __ lea(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + + // Load the oop from the handle + __ ldr(obj_reg, Address(oop_handle_reg, 0)); + + if (UseBiasedLocking) { + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); + } + + // Load (object->mark() | 1) into swap_reg %r0 + __ ldr(rscratch1, Address(obj_reg, 0)); + __ orr(swap_reg, rscratch1, 1); + + // Save (object->mark() | 1) into BasicLock's displaced header + __ str(swap_reg, Address(lock_reg, mark_word_offset)); + + // src -> dest iff dest == r0 else r0 <- dest + { Label here; + __ cmpxchgptr(r0, lock_reg, obj_reg, rscratch1, lock_done, /*fallthrough*/NULL); + } + + // Hmm should this move to the slow path code area??? + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) sp <= mark < mark + os::pagesize() + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg + + __ sub(swap_reg, sp, swap_reg); + __ neg(swap_reg, swap_reg); + __ ands(swap_reg, swap_reg, 3 - os::vm_page_size()); + + // Save the test result, for recursive case, the result is zero + __ str(swap_reg, Address(lock_reg, mark_word_offset)); + __ br(Assembler::NE, slow_path_lock); + + // Slow path will re-enter here + + __ bind(lock_done); + } + + + // Finally just about ready to make the JNI call + + // get JNIEnv* which is first argument to native + if (!is_critical_native) { + __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset()))); + } + + // Now set thread in native + __ mov(rscratch1, _thread_in_native); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); + + { + int return_type = 0; + switch (ret_type) { + case T_VOID: break; + return_type = 0; break; + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + case T_BOOLEAN: + case T_LONG: + return_type = 1; break; + case T_ARRAY: + case T_OBJECT: + return_type = 1; break; + case T_FLOAT: + return_type = 2; break; + case T_DOUBLE: + return_type = 3; break; + default: + ShouldNotReachHere(); + } + rt_call(masm, native_func, + int_args + 2, // AArch64 passes up to 8 args in int registers + float_args, // and up to 8 float args + return_type); + } + + __ bind(native_return); + + intptr_t return_pc = (intptr_t) __ pc(); + oop_maps->add_gc_map(return_pc - start, map); + + // Unpack native results. + switch (ret_type) { + case T_BOOLEAN: __ c2bool(r0); break; + case T_CHAR : __ ubfx(r0, r0, 0, 16); break; + case T_BYTE : __ sbfx(r0, r0, 0, 8); break; + case T_SHORT : __ sbfx(r0, r0, 0, 16); break; + case T_INT : __ sbfx(r0, r0, 0, 32); break; + case T_DOUBLE : + case T_FLOAT : + // Result is in v0 we'll save as needed + break; + case T_ARRAY: // Really a handle + case T_OBJECT: // Really a handle + break; // can't de-handlize until after safepoint check + case T_VOID: break; + case T_LONG: break; + default : ShouldNotReachHere(); + } + + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // VM thread changes sync state to synchronizing and suspends threads for GC. + // Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization is progress, and escapes. + __ mov(rscratch1, _thread_in_native_trans); + + if(os::is_MP()) { + if (UseMembar) { + __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset())); + + // Force this write out before the read below + __ dmb(Assembler::SY); + } else { + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); + + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(rthread, r2); + } + } else { + __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset())); + } + + // check for safepoint operation in progress and/or pending suspend requests + Label safepoint_in_progress, safepoint_in_progress_done; + { + assert(SafepointSynchronize::_not_synchronized == 0, "fix this code"); + unsigned long offset; + __ adrp(rscratch1, + ExternalAddress((address)SafepointSynchronize::address_of_state()), + offset); + __ ldrw(rscratch1, Address(rscratch1, offset)); + __ cbnzw(rscratch1, safepoint_in_progress); + __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset())); + __ cbnzw(rscratch1, safepoint_in_progress); + __ bind(safepoint_in_progress_done); + } + + // change thread state + Label after_transition; + __ mov(rscratch1, _thread_in_Java); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); + __ bind(after_transition); + + Label reguard; + Label reguard_done; + __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset())); + __ cmpw(rscratch1, JavaThread::stack_guard_yellow_disabled); + __ br(Assembler::EQ, reguard); + __ bind(reguard_done); + + // native result if any is live + + // Unlock + Label unlock_done; + Label slow_path_unlock; + if (method->is_synchronized()) { + + // Get locked oop from the handle we passed to jni + __ ldr(obj_reg, Address(oop_handle_reg, 0)); + + Label done; + + if (UseBiasedLocking) { + __ biased_locking_exit(obj_reg, old_hdr, done); + } + + // Simple recursive lock? + + __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + __ cbz(rscratch1, done); + + // Must save r0 if if it is live now because cmpxchg must use it + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + save_native_result(masm, ret_type, stack_slots); + } + + + // get address of the stack lock + __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + // get old displaced header + __ ldr(old_hdr, Address(r0, 0)); + + // Atomic swap old header if oop still contains the stack lock + Label succeed; + __ cmpxchgptr(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock); + __ bind(succeed); + + // slow path re-enters here + __ bind(unlock_done); + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + restore_native_result(masm, ret_type, stack_slots); + } + + __ bind(done); + } + + Label dtrace_method_exit, dtrace_method_exit_done; + { + unsigned long offset; + __ adrp(rscratch1, ExternalAddress((address)&DTraceMethodProbes), offset); + __ ldrb(rscratch1, Address(rscratch1, offset)); + __ cbnzw(rscratch1, dtrace_method_exit); + __ bind(dtrace_method_exit_done); + } + + __ reset_last_Java_frame(false); + + // Unbox oop result, e.g. JNIHandles::resolve result. + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { + Label done, not_weak; + __ cbz(r0, done); // Use NULL as-is. + STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u); + __ tbz(r0, 0, not_weak); // Test for jweak tag. + // Resolve jweak. + __ ldr(r0, Address(r0, -JNIHandles::weak_tag_value)); + __ verify_oop(r0); +#if INCLUDE_ALL_GCS + if (UseG1GC) { + __ g1_write_barrier_pre(noreg /* obj */, + r0 /* pre_val */, + rthread /* thread */, + rscratch2 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + } +#endif // INCLUDE_ALL_GCS + __ b(done); + __ bind(not_weak); + // Resolve (untagged) jobject. + __ ldr(r0, Address(r0, 0)); + __ verify_oop(r0); + __ bind(done); + } + + if (!is_critical_native) { + // reset handle block + __ ldr(r2, Address(rthread, JavaThread::active_handles_offset())); + __ str(zr, Address(r2, JNIHandleBlock::top_offset_in_bytes())); + } + + __ leave(); + + if (!is_critical_native) { + // Any exception pending? + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cbnz(rscratch1, exception_pending); + } + + // We're done + __ ret(lr); + + // Unexpected paths are out of line and go here + + if (!is_critical_native) { + // forward the exception + __ bind(exception_pending); + + // and forward the exception + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + } + + // Slow path locking & unlocking + if (method->is_synchronized()) { + + __ block_comment("Slow path lock {"); + __ bind(slow_path_lock); + + // has last_Java_frame setup. No exceptions so do vanilla call not call_VM + // args are (oop obj, BasicLock* lock, JavaThread* thread) + + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + + __ mov(c_rarg0, obj_reg); + __ mov(c_rarg1, lock_reg); + __ mov(c_rarg2, rthread); + + // Not a leaf but we have last_Java_frame setup as we want + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); + restore_args(masm, total_c_args, c_arg, out_regs); + +#ifdef ASSERT + { Label L; + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cbz(rscratch1, L); + __ stop("no pending exception allowed on exit from monitorenter"); + __ bind(L); + } +#endif + __ b(lock_done); + + __ block_comment("} Slow path lock"); + + __ block_comment("Slow path unlock {"); + __ bind(slow_path_unlock); + + // If we haven't already saved the native result we must save it now as xmm registers + // are still exposed. + + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + save_native_result(masm, ret_type, stack_slots); + } + + __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + __ mov(c_rarg0, obj_reg); + + // Save pending exception around call to VM (which contains an EXCEPTION_MARK) + // NOTE that obj_reg == r19 currently + __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + + rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 2, 0, 1); + +#ifdef ASSERT + { + Label L; + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cbz(rscratch1, L); + __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); + __ bind(L); + } +#endif /* ASSERT */ + + __ str(r19, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + restore_native_result(masm, ret_type, stack_slots); + } + __ b(unlock_done); + + __ block_comment("} Slow path unlock"); + + } // synchronized + + // SLOW PATH Reguard the stack if needed + + __ bind(reguard); + save_native_result(masm, ret_type, stack_slots); + rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 0, 0, 0); + restore_native_result(masm, ret_type, stack_slots); + // and continue + __ b(reguard_done); + + // SLOW PATH safepoint + { + __ block_comment("safepoint {"); + __ bind(safepoint_in_progress); + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // + save_native_result(masm, ret_type, stack_slots); + __ mov(c_rarg0, rthread); +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + if (!is_critical_native) { + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); + } else { + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition))); + } + __ blr(rscratch1); + __ maybe_isb(); + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); + + if (is_critical_native) { + // The call above performed the transition to thread_in_Java so + // skip the transition logic above. + __ b(after_transition); + } + + __ b(safepoint_in_progress_done); + __ block_comment("} safepoint"); + } + + // SLOW PATH dtrace support + { + __ block_comment("dtrace entry {"); + __ bind(dtrace_method_entry); + + // We have all of the arguments setup at this point. We must not touch any register + // argument registers at this point (what if we save/restore them there are no oop? + + save_args(masm, total_c_args, c_arg, out_regs); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + rthread, c_rarg1); + restore_args(masm, total_c_args, c_arg, out_regs); + __ b(dtrace_method_entry_done); + __ block_comment("} dtrace entry"); + } + + { + __ block_comment("dtrace exit {"); + __ bind(dtrace_method_exit); + save_native_result(masm, ret_type, stack_slots); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + rthread, c_rarg1); + restore_native_result(masm, ret_type, stack_slots); + __ b(dtrace_method_exit_done); + __ block_comment("} dtrace exit"); + } + + + __ flush(); + + nmethod *nm = nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), + oop_maps); + + if (is_critical_native) { + nm->set_lazy_critical_native(true); + } + + return nm; + +} + + +#ifdef HAVE_DTRACE_H +// --------------------------------------------------------------------------- +// Generate a dtrace nmethod for a given signature. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// abi and then leaves nops at the position you would expect to call a native +// function. When the probe is enabled the nops are replaced with a trap +// instruction that dtrace inserts and the trace will cause a notification +// to dtrace. +// +// The probes are only able to take primitive types and java/lang/String as +// arguments. No other java types are allowed. Strings are converted to utf8 +// strings so that from dtrace point of view java strings are converted to C +// strings. There is an arbitrary fixed limit on the total space that a method +// can use for converting the strings. (256 chars per string in the signature). +// So any java string larger then this is truncated. + +static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; +static bool offsets_initialized = false; + + +nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, + methodHandle method) { Unimplemented(); return 0; } + +#endif // HAVE_DTRACE_H + +// this function returns the adjust size (in number of words) to a c2i adapter +// activation for use during deoptimization +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { + assert(callee_locals >= callee_parameters, + "test and remove; got more parms than locals"); + if (callee_locals < callee_parameters) + return 0; // No adjustment for negative locals + int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; + // diff is counted in stack words + return round_to(diff, 2); +} + + +//------------------------------generate_deopt_blob---------------------------- +void SharedRuntime::generate_deopt_blob() { + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + CodeBuffer buffer("deopt_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + int frame_size_in_words; + OopMap* map = NULL; + OopMapSet *oop_maps = new OopMapSet(); + + // ------------- + // This code enters when returning to a de-optimized nmethod. A return + // address has been pushed on the the stack, and return values are in + // registers. + // If we are doing a normal deopt then we were called from the patched + // nmethod from the point we returned to the nmethod. So the return + // address on the stack is wrong by NativeCall::instruction_size + // We will adjust the value so it looks like we have the original return + // address on the stack (like when we eagerly deoptimized). + // In the case of an exception pending when deoptimizing, we enter + // with a return address on the stack that points after the call we patched + // into the exception handler. We have the following register state from, + // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). + // r0: exception oop + // r19: exception handler + // r3: throwing pc + // So in this case we simply jam r3 into the useless return address and + // the stack looks just like we want. + // + // At this point we need to de-opt. We save the argument return + // registers. We call the first C routine, fetch_unroll_info(). This + // routine captures the return values and returns a structure which + // describes the current frame size and the sizes of all replacement frames. + // The current frame is compiled code and may contain many inlined + // functions, each with their own JVM state. We pop the current frame, then + // push all the new frames. Then we call the C routine unpack_frames() to + // populate these frames. Finally unpack_frames() returns us the new target + // address. Notice that callee-save registers are BLOWN here; they have + // already been captured in the vframeArray at the time the return PC was + // patched. + address start = __ pc(); + Label cont; + + // Prolog for non exception case! + + // Save everything in sight. + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + // Normal deoptimization. Save exec mode for unpack_frames. + __ movw(rcpool, Deoptimization::Unpack_deopt); // callee-saved + __ b(cont); + + int reexecute_offset = __ pc() - start; + + // Reexecute case + // return address is the pc describes what bci to do re-execute at + + // No need to update map as each call to save_live_registers will produce identical oopmap + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + __ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved + __ b(cont); + + int exception_offset = __ pc() - start; + + // Prolog for exception case + + // all registers are dead at this entry point, except for r0, and + // r3 which contain the exception oop and exception pc + // respectively. Set them in TLS and fall thru to the + // unpack_with_exception_in_tls entry point. + + __ str(r3, Address(rthread, JavaThread::exception_pc_offset())); + __ str(r0, Address(rthread, JavaThread::exception_oop_offset())); + + int exception_in_tls_offset = __ pc() - start; + + // new implementation because exception oop is now passed in JavaThread + + // Prolog for exception case + // All registers must be preserved because they might be used by LinearScan + // Exceptiop oop and throwing PC are passed in JavaThread + // tos: stack at point of call to method that threw the exception (i.e. only + // args are on the stack, no return address) + + // The return address pushed by save_live_registers will be patched + // later with the throwing pc. The correct value is not available + // now because loading it from memory would destroy registers. + + // NB: The SP at this point must be the SP of the method that is + // being deoptimized. Deoptimization assumes that the frame created + // here by save_live_registers is immediately below the method's SP. + // This is a somewhat fragile mechanism. + + // Save everything in sight. + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + // Now it is safe to overwrite any register + + // Deopt during an exception. Save exec mode for unpack_frames. + __ mov(rcpool, Deoptimization::Unpack_exception); // callee-saved + + // load throwing pc from JavaThread and patch it as the return address + // of the current frame. Then clear the field in JavaThread + + __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset())); + __ str(r3, Address(rfp, wordSize)); + __ str(zr, Address(rthread, JavaThread::exception_pc_offset())); + +#ifdef ASSERT + // verify that there is really an exception oop in JavaThread + __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset())); + __ verify_oop(r0); + + // verify that there is no pending exception + Label no_pending_exception; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, no_pending_exception); + __ stop("must not have pending exception here"); + __ bind(no_pending_exception); +#endif + + __ bind(cont); + + // Call C code. Need thread and this frame, but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. + // + // UnrollBlock* fetch_unroll_info(JavaThread* thread) + + // fetch_unroll_info needs to call last_java_frame(). + + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); +#ifdef ASSERT0 + { Label L; + __ ldr(rscratch1, Address(rthread, + JavaThread::last_Java_fp_offset())); + __ cbz(rscratch1, L); + __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); + __ bind(L); + } +#endif // ASSERT + __ mov(c_rarg0, rthread); + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); + __ blr(rscratch1); + __ bind(retaddr); + + // Need to have an oopmap that tells fetch_unroll_info where to + // find any register it might need. + oop_maps->add_gc_map(__ pc() - start, map); + + __ reset_last_Java_frame(false); + + // Load UnrollBlock* into rdi + __ mov(r5, r0); + + Label noException; + __ cmpw(rcpool, Deoptimization::Unpack_exception); // Was exception pending? + __ br(Assembler::NE, noException); + __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset())); + // QQQ this is useless it was NULL above + __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset())); + __ str(zr, Address(rthread, JavaThread::exception_oop_offset())); + __ str(zr, Address(rthread, JavaThread::exception_pc_offset())); + + __ verify_oop(r0); + + // Overwrite the result registers with the exception results. + __ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes())); + // I think this is useless + // __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes())); + + __ bind(noException); + + // Only register save data is on the stack. + // Now restore the result registers. Everything else is either dead + // or captured in the vframeArray. + RegisterSaver::restore_result_registers(masm); + + // All of the register save area has been popped of the stack. Only the + // return address remains. + + // Pop all the frames we must move/replace. + // + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: caller of deopting frame (could be compiled/interpreted). + // + // Note: by leaving the return address of self-frame on the stack + // and using the size of frame 2 to adjust the stack + // when we are done the return to frame 3 will still be on the stack. + + // Pop deoptimized frame + __ ldrw(r2, Address(r5, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); + __ sub(r2, r2, 2 * wordSize); + __ add(sp, sp, r2); + __ ldp(rfp, lr, __ post(sp, 2 * wordSize)); + // LR should now be the return address to the caller (3) + + // Stack bang to make sure there's enough room for these interpreter frames. + if (UseStackBanging) { + __ ldrw(r19, Address(r5, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); + __ bang_stack_size(r19, r2); + } + + // Load address of array of frame pcs into r2 + __ ldr(r2, Address(r5, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + + // Trash the old pc + // __ addptr(sp, wordSize); FIXME ???? + + // Load address of array of frame sizes into r4 + __ ldr(r4, Address(r5, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); + + // Load counter into r3 + __ ldrw(r3, Address(r5, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); + + // Now adjust the caller's stack to make up for the extra locals + // but record the original sp so that we can save it in the skeletal interpreter + // frame and the stack walking of interpreter_sender will get the unextended sp + // value and not the "real" sp value. + + const Register sender_sp = r6; + + __ mov(sender_sp, sp); + __ ldrw(r19, Address(r5, + Deoptimization::UnrollBlock:: + caller_adjustment_offset_in_bytes())); + __ sub(sp, sp, r19); + + // Push interpreter frames in a loop + __ mov(rscratch1, (address)badHeapOopVal); // Make a recognizable pattern + __ mov(rscratch2, rscratch1); + Label loop; + __ bind(loop); + __ ldr(r19, Address(__ post(r4, wordSize))); // Load frame size + __ sub(r19, r19, 2*wordSize); // We'll push pc and fp by hand + __ ldr(lr, Address(__ post(r2, wordSize))); // Load pc + __ enter(); // Save old & set new fp + __ sub(sp, sp, r19); // Prolog + // This value is corrected by layout_activation_impl + __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ str(sender_sp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable + __ mov(sender_sp, sp); // Pass sender_sp to next frame + __ sub(r3, r3, 1); // Decrement counter + __ cbnz(r3, loop); + + // Re-push self-frame + __ ldr(lr, Address(r2)); + __ enter(); + + // Allocate a full sized register save area. We subtract 2 because + // enter() just pushed 2 words + __ sub(sp, sp, (frame_size_in_words - 2) * wordSize); + + // Restore frame locals after moving the frame + __ strd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes())); + __ str(r0, Address(sp, RegisterSaver::r0_offset_in_bytes())); + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // restore return values to their stack-slots with the new SP. + // + // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) + + // Use rfp because the frames look interpreted now + // Don't need the precise return PC here, just precise enough to point into this code blob. + address the_pc = __ pc(); + __ set_last_Java_frame(sp, rfp, the_pc, rscratch1); + + __ mov(c_rarg0, rthread); + __ movw(c_rarg1, rcpool); // second arg: exec_mode + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); + __ blr(rscratch1); + + // Set an oopmap for the call site + // Use the same PC we used for the last java frame + oop_maps->add_gc_map(the_pc - start, + new OopMap( frame_size_in_words, 0 )); + + // Clear fp AND pc + __ reset_last_Java_frame(true); + + // Collect return values + __ ldrd(v0, Address(sp, RegisterSaver::v0_offset_in_bytes())); + __ ldr(r0, Address(sp, RegisterSaver::r0_offset_in_bytes())); + // I think this is useless (throwing pc?) + // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes())); + + // Pop self-frame. + __ leave(); // Epilog + + // Jump to interpreter + __ ret(lr); + + // Make sure all code is generated + masm->flush(); + + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); +} + +uint SharedRuntime::out_preserve_stack_slots() { + return 0; +} + +#ifdef COMPILER2 +//------------------------------generate_uncommon_trap_blob-------------------- +void SharedRuntime::generate_uncommon_trap_blob() { + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + + assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); + + address start = __ pc(); + + // Push self-frame. We get here with a return address in LR + // and sp should be 16 byte aligned + // push rfp and retaddr by hand + __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize))); + // we don't expect an arg reg save area +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + // compiler left unloaded_class_index in j_rarg0 move to where the + // runtime expects it. + if (c_rarg1 != j_rarg0) { + __ movw(c_rarg1, j_rarg0); + } + + // we need to set the past SP to the stack pointer of the stub frame + // and the pc to the address where this runtime call will return + // although actually any pc in this code blob will do). + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // capture callee-saved registers as well as return values. + // Thread is in rdi already. + // + // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); + // + // n.b. 2 gp args, 0 fp args, integral return type + + __ mov(c_rarg0, rthread); + __ lea(rscratch1, + RuntimeAddress(CAST_FROM_FN_PTR(address, + Deoptimization::uncommon_trap))); + __ blr(rscratch1); + __ bind(retaddr); + + // Set an oopmap for the call site + OopMapSet* oop_maps = new OopMapSet(); + OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); + + // location of rfp is known implicitly by the frame sender code + + oop_maps->add_gc_map(__ pc() - start, map); + + __ reset_last_Java_frame(false); + + // move UnrollBlock* into r4 + __ mov(r4, r0); + + // Pop all the frames we must move/replace. + // + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: caller of deopting frame (could be compiled/interpreted). + + // Pop self-frame. We have no frame, and must rely only on r0 and sp. + __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog! + + // Pop deoptimized frame (int) + __ ldrw(r2, Address(r4, + Deoptimization::UnrollBlock:: + size_of_deoptimized_frame_offset_in_bytes())); + __ sub(r2, r2, 2 * wordSize); + __ add(sp, sp, r2); + __ ldp(rfp, lr, __ post(sp, 2 * wordSize)); + // LR should now be the return address to the caller (3) frame + + // Stack bang to make sure there's enough room for these interpreter frames. + if (UseStackBanging) { + __ ldrw(r1, Address(r4, + Deoptimization::UnrollBlock:: + total_frame_sizes_offset_in_bytes())); + __ bang_stack_size(r1, r2); + } + + // Load address of array of frame pcs into r2 (address*) + __ ldr(r2, Address(r4, + Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + + // Load address of array of frame sizes into r5 (intptr_t*) + __ ldr(r5, Address(r4, + Deoptimization::UnrollBlock:: + frame_sizes_offset_in_bytes())); + + // Counter + __ ldrw(r3, Address(r4, + Deoptimization::UnrollBlock:: + number_of_frames_offset_in_bytes())); // (int) + + // Now adjust the caller's stack to make up for the extra locals but + // record the original sp so that we can save it in the skeletal + // interpreter frame and the stack walking of interpreter_sender + // will get the unextended sp value and not the "real" sp value. + + const Register sender_sp = r8; + + __ mov(sender_sp, sp); + __ ldrw(r1, Address(r4, + Deoptimization::UnrollBlock:: + caller_adjustment_offset_in_bytes())); // (int) + __ sub(sp, sp, r1); + + // Push interpreter frames in a loop + Label loop; + __ bind(loop); + __ ldr(r1, Address(r5, 0)); // Load frame size + __ sub(r1, r1, 2 * wordSize); // We'll push pc and rfp by hand + __ ldr(lr, Address(r2, 0)); // Save return address + __ enter(); // and old rfp & set new rfp + __ sub(sp, sp, r1); // Prolog + __ str(sender_sp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable + // This value is corrected by layout_activation_impl + __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ mov(sender_sp, sp); // Pass sender_sp to next frame + __ add(r5, r5, wordSize); // Bump array pointer (sizes) + __ add(r2, r2, wordSize); // Bump array pointer (pcs) + __ subsw(r3, r3, 1); // Decrement counter + __ br(Assembler::GT, loop); + __ ldr(lr, Address(r2, 0)); // save final return address + // Re-push self-frame + __ enter(); // & old rfp & set new rfp + + // Use rfp because the frames look interpreted now + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. + // Don't need the precise return PC here, just precise enough to point into this code blob. + address the_pc = __ pc(); + __ set_last_Java_frame(sp, rfp, the_pc, rscratch1); + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // restore return values to their stack-slots with the new SP. + // Thread is in rdi already. + // + // BasicType unpack_frames(JavaThread* thread, int exec_mode); + // + // n.b. 2 gp args, 0 fp args, integral return type + + // sp should already be aligned + __ mov(c_rarg0, rthread); + __ movw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap); + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); + __ blr(rscratch1); + + // Set an oopmap for the call site + // Use the same PC we used for the last java frame + oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); + + // Clear fp AND pc + __ reset_last_Java_frame(true); + + // Pop self-frame. + __ leave(); // Epilog + + // Jump to interpreter + __ ret(lr); + + // Make sure all code is generated + masm->flush(); + + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, + SimpleRuntimeFrame::framesize >> 1); +} +#endif // COMPILER2 + + +//------------------------------generate_handler_blob------ +// +// Generate a special Compile2Runtime blob that saves all registers, +// and setup oopmap. +// +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { + ResourceMark rm; + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map; + + // Allocate space for the code. Setup code generation tools. + CodeBuffer buffer("handler_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + + address start = __ pc(); + address call_pc = NULL; + int frame_size_in_words; + bool cause_return = (poll_type == POLL_AT_RETURN); + bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); + + // Save Integer and Float registers. + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors); + + // The following is basically a call_VM. However, we need the precise + // address of the call in order to generate an oopmap. Hence, we do all the + // work outselves. + + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); + + // The return address must always be correct so that frame constructor never + // sees an invalid pc. + + if (!cause_return) { + // overwrite the return address pushed by save_live_registers + __ ldr(c_rarg0, Address(rthread, JavaThread::saved_exception_pc_offset())); + __ str(c_rarg0, Address(rfp, wordSize)); + } + + // Do the call + __ mov(c_rarg0, rthread); + __ lea(rscratch1, RuntimeAddress(call_ptr)); + __ blr(rscratch1); + __ bind(retaddr); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + + oop_maps->add_gc_map( __ pc() - start, map); + + Label noException; + + __ reset_last_Java_frame(false); + + __ maybe_isb(); + __ membar(Assembler::LoadLoad | Assembler::LoadStore); + + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, noException); + + // Exception pending + + RegisterSaver::restore_live_registers(masm); + + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // No exception case + __ bind(noException); + + // Normal exit, restore registers and exit. + RegisterSaver::restore_live_registers(masm, save_vectors); + + __ ret(lr); + + // Make sure all code is generated + masm->flush(); + + // Fill-out other meta info + return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); +} + +// +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss +// +// Generate a stub that calls into vm to find out the proper destination +// of a java call. All the argument registers are live at this point +// but since this is generic code we don't know what they are and the caller +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; + + CodeBuffer buffer(name, 1000, 512); + MacroAssembler* masm = new MacroAssembler(&buffer); + + int frame_size_in_words; + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = NULL; + + int start = __ offset(); + + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + int frame_complete = __ offset(); + + { + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); + + __ mov(c_rarg0, rthread); + __ lea(rscratch1, RuntimeAddress(destination)); + + __ blr(rscratch1); + __ bind(retaddr); + } + + // Set an oopmap for the call site. + // We need this not only for callee-saved registers, but also for volatile + // registers that the compiler might be keeping live across a safepoint. + + oop_maps->add_gc_map( __ offset() - start, map); + + __ maybe_isb(); + + // r0 contains the address we are going to jump to assuming no exception got installed + + // clear last_Java_sp + __ reset_last_Java_frame(false); + // check for pending exceptions + Label pending; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbnz(rscratch1, pending); + + // get the returned Method* + __ get_vm_result_2(rmethod, rthread); + __ str(rmethod, Address(sp, RegisterSaver::reg_offset_in_bytes(rmethod))); + + // r0 is where we want to jump, overwrite rscratch1 which is saved and scratch + __ str(r0, Address(sp, RegisterSaver::rscratch1_offset_in_bytes())); + RegisterSaver::restore_live_registers(masm); + + // We are back the the original state on entry and ready to go. + + __ br(rscratch1); + + // Pending exception after the safepoint + + __ bind(pending); + + RegisterSaver::restore_live_registers(masm); + + // exception pending => remove activation and forward to exception handler + + __ str(zr, Address(rthread, JavaThread::vm_result_offset())); + + __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // ------------- + // make sure all code is generated + masm->flush(); + + // return the blob + // frame_size_words or bytes?? + return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); +} + + +#ifdef COMPILER2 +// This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame +// +//------------------------------generate_exception_blob--------------------------- +// creates exception blob at the end +// Using exception blob, this code is jumped from a compiled method. +// (see emit_exception_handler in x86_64.ad file) +// +// Given an exception pc at a call we call into the runtime for the +// handler in this method. This handler might merely restore state +// (i.e. callee save registers) unwind the frame and jump to the +// exception handler for the nmethod if there is no Java level handler +// for the nmethod. +// +// This code is entered with a jmp. +// +// Arguments: +// r0: exception oop +// r3: exception pc +// +// Results: +// r0: exception oop +// r3: exception pc in caller or ??? +// destination: exception handler of caller +// +// Note: the exception pc MUST be at a call (precise debug information) +// Registers r0, r3, r2, r4, r5, r8-r11 are not callee saved. +// + +void OptoRuntime::generate_exception_blob() { + assert(!OptoRuntime::is_callee_saved_register(R3_num), ""); + assert(!OptoRuntime::is_callee_saved_register(R0_num), ""); + assert(!OptoRuntime::is_callee_saved_register(R2_num), ""); + + assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); + + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + CodeBuffer buffer("exception_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + + // TODO check various assumptions made here + // + // make sure we do so before running this + + address start = __ pc(); + + // push rfp and retaddr by hand + // Exception pc is 'return address' for stack walker + __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize))); + // there are no callee save registers and we don't expect an + // arg reg save area +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + // Store exception in Thread object. We cannot pass any arguments to the + // handle_exception call, since we do not want to make any assumption + // about the size of the frame where the exception happened in. + __ str(r0, Address(rthread, JavaThread::exception_oop_offset())); + __ str(r3, Address(rthread, JavaThread::exception_pc_offset())); + + // This call does all the hard work. It checks if an exception handler + // exists in the method. + // If so, it returns the handler address. + // If not, it prepares for stack-unwinding, restoring the callee-save + // registers of the frame being removed. + // + // address OptoRuntime::handle_exception_C(JavaThread* thread) + // + // n.b. 1 gp arg, 0 fp args, integral return type + + // the stack should always be aligned + address the_pc = __ pc(); + __ set_last_Java_frame(sp, noreg, the_pc, rscratch1); + __ mov(c_rarg0, rthread); + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); + __ blr(rscratch1); + __ maybe_isb(); + + // Set an oopmap for the call site. This oopmap will only be used if we + // are unwinding the stack. Hence, all locations will be dead. + // Callee-saved registers will be the same as the frame above (i.e., + // handle_exception_stub), since they were restored when we got the + // exception. + + OopMapSet* oop_maps = new OopMapSet(); + + oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); + + __ reset_last_Java_frame(false); + + // Restore callee-saved registers + + // rfp is an implicitly saved callee saved register (i.e. the calling + // convention will save restore it in prolog/epilog) Other than that + // there are no callee save registers now that adapter frames are gone. + // and we dont' expect an arg reg save area + __ ldp(rfp, r3, Address(__ post(sp, 2 * wordSize))); + + // r0: exception handler + + // Restore SP from BP if the exception PC is a MethodHandle call site. + __ ldrw(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset())); + // n.b. Intel uses special register rbp_mh_SP_save here but we will + // just hard wire rfp + __ cmpw(rscratch1, zr); + // the obvious way to conditionally copy rfp to sp if NE + // Label skip; + // __ br(Assembler::EQ, skip); + // __ mov(sp, rfp); + // __ bind(skip); + // same but branchless + __ mov(rscratch1, sp); + __ csel(rscratch1, rfp, rscratch1, Assembler::NE); + __ mov(sp, rscratch1); + + // We have a handler in r0 (could be deopt blob). + __ mov(r8, r0); + + // Get the exception oop + __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset())); + // Get the exception pc in case we are deoptimized + __ ldr(r4, Address(rthread, JavaThread::exception_pc_offset())); +#ifdef ASSERT + __ str(zr, Address(rthread, JavaThread::exception_handler_pc_offset())); + __ str(zr, Address(rthread, JavaThread::exception_pc_offset())); +#endif + // Clear the exception oop so GC no longer processes it as a root. + __ str(zr, Address(rthread, JavaThread::exception_oop_offset())); + + // r0: exception oop + // r8: exception handler + // r4: exception pc + // Jump to handler + + __ br(r8); + + // Make sure all code is generated + masm->flush(); + + // Set exception blob + _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); +} +#endif // COMPILER2 --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp 2021-01-25 19:31:57.360693242 +0000 @@ -0,0 +1,4286 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2003, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_aarch64.hpp" +#include "oops/instanceOop.hpp" +#include "oops/method.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/top.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +// Declaration and definition of StubGenerator (no .hpp file). +// For a more detailed description of the stub routine structure +// see the comment in stubRoutines.hpp + +#undef __ +#define __ _masm-> +#define TIMES_OOP Address::sxtw(exact_log2(UseCompressedOops ? 4 : 8)) + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Stub Code definitions + +class StubGenerator: public StubCodeGenerator { + private: + +#ifdef PRODUCT +#define inc_counter_np(counter) ((void)0) +#else + void inc_counter_np_(int& counter) { + __ lea(rscratch2, ExternalAddress((address)&counter)); + __ ldrw(rscratch1, Address(rscratch2)); + __ addw(rscratch1, rscratch1, 1); + __ strw(rscratch1, Address(rscratch2)); + } +#define inc_counter_np(counter) \ + BLOCK_COMMENT("inc_counter " #counter); \ + inc_counter_np_(counter); +#endif + + // Call stubs are used to call Java from C + // + // Arguments: + // c_rarg0: call wrapper address address + // c_rarg1: result address + // c_rarg2: result type BasicType + // c_rarg3: method Method* + // c_rarg4: (interpreter) entry point address + // c_rarg5: parameters intptr_t* + // c_rarg6: parameter size (in words) int + // c_rarg7: thread Thread* + // + // There is no return from the stub itself as any Java result + // is written to result + // + // we save r30 (lr) as the return PC at the base of the frame and + // link r29 (fp) below it as the frame pointer installing sp (r31) + // into fp. + // + // we save r0-r7, which accounts for all the c arguments. + // + // TODO: strictly do we need to save them all? they are treated as + // volatile by C so could we omit saving the ones we are going to + // place in global registers (thread? method?) or those we only use + // during setup of the Java call? + // + // we don't need to save r8 which C uses as an indirect result location + // return register. + // + // we don't need to save r9-r15 which both C and Java treat as + // volatile + // + // we don't need to save r16-18 because Java does not use them + // + // we save r19-r28 which Java uses as scratch registers and C + // expects to be callee-save + // + // we save the bottom 64 bits of each value stored in v8-v15; it is + // the responsibility of the caller to preserve larger values. + // + // so the stub frame looks like this when we enter Java code + // + // [ return_from_Java ] <--- sp + // [ argument word n ] + // ... + // -27 [ argument word 1 ] + // -26 [ saved v15 ] <--- sp_after_call + // -25 [ saved v14 ] + // -24 [ saved v13 ] + // -23 [ saved v12 ] + // -22 [ saved v11 ] + // -21 [ saved v10 ] + // -20 [ saved v9 ] + // -19 [ saved v8 ] + // -18 [ saved r28 ] + // -17 [ saved r27 ] + // -16 [ saved r26 ] + // -15 [ saved r25 ] + // -14 [ saved r24 ] + // -13 [ saved r23 ] + // -12 [ saved r22 ] + // -11 [ saved r21 ] + // -10 [ saved r20 ] + // -9 [ saved r19 ] + // -8 [ call wrapper (r0) ] + // -7 [ result (r1) ] + // -6 [ result type (r2) ] + // -5 [ method (r3) ] + // -4 [ entry point (r4) ] + // -3 [ parameters (r5) ] + // -2 [ parameter size (r6) ] + // -1 [ thread (r7) ] + // 0 [ saved fp (r29) ] <--- fp == saved sp (r31) + // 1 [ saved lr (r30) ] + + // Call stub stack layout word offsets from fp + enum call_stub_layout { + sp_after_call_off = -26, + + d15_off = -26, + d13_off = -24, + d11_off = -22, + d9_off = -20, + + r28_off = -18, + r26_off = -16, + r24_off = -14, + r22_off = -12, + r20_off = -10, + call_wrapper_off = -8, + result_off = -7, + result_type_off = -6, + method_off = -5, + entry_point_off = -4, + parameter_size_off = -2, + thread_off = -1, + fp_f = 0, + retaddr_off = 1, + }; + + address generate_call_stub(address& return_address) { + assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 && + (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, + "adjust this code"); + + StubCodeMark mark(this, "StubRoutines", "call_stub"); + address start = __ pc(); + + const Address sp_after_call(rfp, sp_after_call_off * wordSize); + + const Address call_wrapper (rfp, call_wrapper_off * wordSize); + const Address result (rfp, result_off * wordSize); + const Address result_type (rfp, result_type_off * wordSize); + const Address method (rfp, method_off * wordSize); + const Address entry_point (rfp, entry_point_off * wordSize); + const Address parameter_size(rfp, parameter_size_off * wordSize); + + const Address thread (rfp, thread_off * wordSize); + + const Address d15_save (rfp, d15_off * wordSize); + const Address d13_save (rfp, d13_off * wordSize); + const Address d11_save (rfp, d11_off * wordSize); + const Address d9_save (rfp, d9_off * wordSize); + + const Address r28_save (rfp, r28_off * wordSize); + const Address r26_save (rfp, r26_off * wordSize); + const Address r24_save (rfp, r24_off * wordSize); + const Address r22_save (rfp, r22_off * wordSize); + const Address r20_save (rfp, r20_off * wordSize); + + // stub code + + address aarch64_entry = __ pc(); + + // set up frame and move sp to end of save area + __ enter(); + __ sub(sp, rfp, -sp_after_call_off * wordSize); + + // save register parameters and Java scratch/global registers + // n.b. we save thread even though it gets installed in + // rthread because we want to sanity check rthread later + __ str(c_rarg7, thread); + __ strw(c_rarg6, parameter_size); + __ stp(c_rarg4, c_rarg5, entry_point); + __ stp(c_rarg2, c_rarg3, result_type); + __ stp(c_rarg0, c_rarg1, call_wrapper); + + __ stp(r20, r19, r20_save); + __ stp(r22, r21, r22_save); + __ stp(r24, r23, r24_save); + __ stp(r26, r25, r26_save); + __ stp(r28, r27, r28_save); + + __ stpd(v9, v8, d9_save); + __ stpd(v11, v10, d11_save); + __ stpd(v13, v12, d13_save); + __ stpd(v15, v14, d15_save); + + // install Java thread in global register now we have saved + // whatever value it held + __ mov(rthread, c_rarg7); + // And method + __ mov(rmethod, c_rarg3); + + // set up the heapbase register + __ reinit_heapbase(); + +#ifdef ASSERT + // make sure we have no pending exceptions + { + Label L; + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cmp(rscratch1, (unsigned)NULL_WORD); + __ br(Assembler::EQ, L); + __ stop("StubRoutines::call_stub: entered with pending exception"); + __ BIND(L); + } +#endif + // pass parameters if any + __ mov(esp, sp); + __ sub(rscratch1, sp, c_rarg6, ext::uxtw, LogBytesPerWord); // Move SP out of the way + __ andr(sp, rscratch1, -2 * wordSize); + + BLOCK_COMMENT("pass parameters if any"); + Label parameters_done; + // parameter count is still in c_rarg6 + // and parameter pointer identifying param 1 is in c_rarg5 + __ cbzw(c_rarg6, parameters_done); + + address loop = __ pc(); + __ ldr(rscratch1, Address(__ post(c_rarg5, wordSize))); + __ subsw(c_rarg6, c_rarg6, 1); + __ push(rscratch1); + __ br(Assembler::GT, loop); + + __ BIND(parameters_done); + + // call Java entry -- passing methdoOop, and current sp + // rmethod: Method* + // r13: sender sp + BLOCK_COMMENT("call Java function"); + __ mov(r13, sp); + __ blr(c_rarg4); + + // we do this here because the notify will already have been done + // if we get to the next instruction via an exception + // + // n.b. adding this instruction here affects the calculation of + // whether or not a routine returns to the call stub (used when + // doing stack walks) since the normal test is to check the return + // pc against the address saved below. so we may need to allow for + // this extra instruction in the check. + + // save current address for use by exception handling code + + return_address = __ pc(); + + // store result depending on type (everything that is not + // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) + // n.b. this assumes Java returns an integral result in r0 + // and a floating result in j_farg0 + __ ldr(j_rarg2, result); + Label is_long, is_float, is_double, exit; + __ ldr(j_rarg1, result_type); + __ cmp(j_rarg1, T_OBJECT); + __ br(Assembler::EQ, is_long); + __ cmp(j_rarg1, T_LONG); + __ br(Assembler::EQ, is_long); + __ cmp(j_rarg1, T_FLOAT); + __ br(Assembler::EQ, is_float); + __ cmp(j_rarg1, T_DOUBLE); + __ br(Assembler::EQ, is_double); + + // handle T_INT case + __ strw(r0, Address(j_rarg2)); + + __ BIND(exit); + + // pop parameters + __ sub(esp, rfp, -sp_after_call_off * wordSize); + +#ifdef ASSERT + // verify that threads correspond + { + Label L, S; + __ ldr(rscratch1, thread); + __ cmp(rthread, rscratch1); + __ br(Assembler::NE, S); + __ get_thread(rscratch1); + __ cmp(rthread, rscratch1); + __ br(Assembler::EQ, L); + __ BIND(S); + __ stop("StubRoutines::call_stub: threads must correspond"); + __ BIND(L); + } +#endif + + // restore callee-save registers + __ ldpd(v15, v14, d15_save); + __ ldpd(v13, v12, d13_save); + __ ldpd(v11, v10, d11_save); + __ ldpd(v9, v8, d9_save); + + __ ldp(r28, r27, r28_save); + __ ldp(r26, r25, r26_save); + __ ldp(r24, r23, r24_save); + __ ldp(r22, r21, r22_save); + __ ldp(r20, r19, r20_save); + + __ ldp(c_rarg0, c_rarg1, call_wrapper); + __ ldrw(c_rarg2, result_type); + __ ldr(c_rarg3, method); + __ ldp(c_rarg4, c_rarg5, entry_point); + __ ldp(c_rarg6, c_rarg7, parameter_size); + + // leave frame and return to caller + __ leave(); + __ ret(lr); + + // handle return types different from T_INT + + __ BIND(is_long); + __ str(r0, Address(j_rarg2, 0)); + __ br(Assembler::AL, exit); + + __ BIND(is_float); + __ strs(j_farg0, Address(j_rarg2, 0)); + __ br(Assembler::AL, exit); + + __ BIND(is_double); + __ strd(j_farg0, Address(j_rarg2, 0)); + __ br(Assembler::AL, exit); + + return start; + } + + // Return point for a Java call if there's an exception thrown in + // Java code. The exception is caught and transformed into a + // pending exception stored in JavaThread that can be tested from + // within the VM. + // + // Note: Usually the parameters are removed by the callee. In case + // of an exception crossing an activation frame boundary, that is + // not the case if the callee is compiled code => need to setup the + // rsp. + // + // r0: exception oop + + address generate_catch_exception() { + StubCodeMark mark(this, "StubRoutines", "catch_exception"); + address start = __ pc(); + + // same as in generate_call_stub(): + const Address sp_after_call(rfp, sp_after_call_off * wordSize); + const Address thread (rfp, thread_off * wordSize); + +#ifdef ASSERT + // verify that threads correspond + { + Label L, S; + __ ldr(rscratch1, thread); + __ cmp(rthread, rscratch1); + __ br(Assembler::NE, S); + __ get_thread(rscratch1); + __ cmp(rthread, rscratch1); + __ br(Assembler::EQ, L); + __ bind(S); + __ stop("StubRoutines::catch_exception: threads must correspond"); + __ bind(L); + } +#endif + + // set pending exception + __ verify_oop(r0); + + __ str(r0, Address(rthread, Thread::pending_exception_offset())); + __ mov(rscratch1, (address)__FILE__); + __ str(rscratch1, Address(rthread, Thread::exception_file_offset())); + __ movw(rscratch1, (int)__LINE__); + __ strw(rscratch1, Address(rthread, Thread::exception_line_offset())); + + // complete return to VM + assert(StubRoutines::_call_stub_return_address != NULL, + "_call_stub_return_address must have been generated before"); + __ b(StubRoutines::_call_stub_return_address); + + return start; + } + + // Continuation point for runtime calls returning with a pending + // exception. The pending exception check happened in the runtime + // or native call stub. The pending exception in Thread is + // converted into a Java-level exception. + // + // Contract with Java-level exception handlers: + // r0: exception + // r3: throwing pc + // + // NOTE: At entry of this stub, exception-pc must be in LR !! + + // NOTE: this is always used as a jump target within generated code + // so it just needs to be generated code wiht no x86 prolog + + address generate_forward_exception() { + StubCodeMark mark(this, "StubRoutines", "forward exception"); + address start = __ pc(); + + // Upon entry, LR points to the return address returning into + // Java (interpreted or compiled) code; i.e., the return address + // becomes the throwing pc. + // + // Arguments pushed before the runtime call are still on the stack + // but the exception handler will reset the stack pointer -> + // ignore them. A potential result in registers can be ignored as + // well. + +#ifdef ASSERT + // make sure this code is only executed if there is a pending exception + { + Label L; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbnz(rscratch1, L); + __ stop("StubRoutines::forward exception: no pending exception (1)"); + __ bind(L); + } +#endif + + // compute exception handler into r19 + + // call the VM to find the handler address associated with the + // caller address. pass thread in r0 and caller pc (ret address) + // in r1. n.b. the caller pc is in lr, unlike x86 where it is on + // the stack. + __ mov(c_rarg1, lr); + // lr will be trashed by the VM call so we move it to R19 + // (callee-saved) because we also need to pass it to the handler + // returned by this call. + __ mov(r19, lr); + BLOCK_COMMENT("call exception_handler_for_return_address"); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, + SharedRuntime::exception_handler_for_return_address), + rthread, c_rarg1); + // we should not really care that lr is no longer the callee + // address. we saved the value the handler needs in r19 so we can + // just copy it to r3. however, the C2 handler will push its own + // frame and then calls into the VM and the VM code asserts that + // the PC for the frame above the handler belongs to a compiled + // Java method. So, we restore lr here to satisfy that assert. + __ mov(lr, r19); + // setup r0 & r3 & clear pending exception + __ mov(r3, r19); + __ mov(r19, r0); + __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); + __ str(zr, Address(rthread, Thread::pending_exception_offset())); + +#ifdef ASSERT + // make sure exception is set + { + Label L; + __ cbnz(r0, L); + __ stop("StubRoutines::forward exception: no pending exception (2)"); + __ bind(L); + } +#endif + + // continue at exception handler + // r0: exception + // r3: throwing pc + // r19: exception handler + __ verify_oop(r0); + __ br(r19); + + return start; + } + + // Non-destructive plausibility checks for oops + // + // Arguments: + // r0: oop to verify + // rscratch1: error message + // + // Stack after saving c_rarg3: + // [tos + 0]: saved c_rarg3 + // [tos + 1]: saved c_rarg2 + // [tos + 2]: saved lr + // [tos + 3]: saved rscratch2 + // [tos + 4]: saved r0 + // [tos + 5]: saved rscratch1 + address generate_verify_oop() { + + StubCodeMark mark(this, "StubRoutines", "verify_oop"); + address start = __ pc(); + + Label exit, error; + + // save c_rarg2 and c_rarg3 + __ stp(c_rarg3, c_rarg2, Address(__ pre(sp, -16))); + + // __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr())); + __ lea(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr())); + __ ldr(c_rarg3, Address(c_rarg2)); + __ add(c_rarg3, c_rarg3, 1); + __ str(c_rarg3, Address(c_rarg2)); + + // object is in r0 + // make sure object is 'reasonable' + __ cbz(r0, exit); // if obj is NULL it is OK + + // Check if the oop is in the right area of memory + __ mov(c_rarg3, (intptr_t) Universe::verify_oop_mask()); + __ andr(c_rarg2, r0, c_rarg3); + __ mov(c_rarg3, (intptr_t) Universe::verify_oop_bits()); + + // Compare c_rarg2 and c_rarg3. We don't use a compare + // instruction here because the flags register is live. + __ eor(c_rarg2, c_rarg2, c_rarg3); + __ cbnz(c_rarg2, error); + + // make sure klass is 'reasonable', which is not zero. + __ load_klass(r0, r0); // get klass + __ cbz(r0, error); // if klass is NULL it is broken + + // return if everything seems ok + __ bind(exit); + + __ ldp(c_rarg3, c_rarg2, Address(__ post(sp, 16))); + __ ret(lr); + + // handle errors + __ bind(error); + __ ldp(c_rarg3, c_rarg2, Address(__ post(sp, 16))); + + __ push(RegSet::range(r0, r29), sp); + // debug(char* msg, int64_t pc, int64_t regs[]) + __ mov(c_rarg0, rscratch1); // pass address of error message + __ mov(c_rarg1, lr); // pass return address + __ mov(c_rarg2, sp); // pass address of regs on stack +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + BLOCK_COMMENT("call MacroAssembler::debug"); + __ mov(rscratch1, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); + __ blr(rscratch1); + + return start; + } + + void array_overlap_test(Label& L_no_overlap, Address::sxtw sf) { __ b(L_no_overlap); } + + // Generate code for an array write pre barrier + // + // addr - starting address + // count - element count + // tmp - scratch register + // + // Destroy no registers except rscratch1 and rscratch2 + // + void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { + BarrierSet* bs = Universe::heap()->barrier_set(); + switch (bs->kind()) { + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + // With G1, don't generate the call if we statically know that the target in uninitialized + if (!dest_uninitialized) { + __ push_call_clobbered_registers(); + if (count == c_rarg0) { + if (addr == c_rarg1) { + // exactly backwards!! + __ mov(rscratch1, c_rarg0); + __ mov(c_rarg0, c_rarg1); + __ mov(c_rarg1, rscratch1); + } else { + __ mov(c_rarg1, count); + __ mov(c_rarg0, addr); + } + } else { + __ mov(c_rarg0, addr); + __ mov(c_rarg1, count); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2); + __ pop_call_clobbered_registers(); + break; + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + case BarrierSet::ModRef: + break; + default: + ShouldNotReachHere(); + + } + } + } + + // + // Generate code for an array write post barrier + // + // Input: + // start - register containing starting address of destination array + // end - register containing ending address of destination array + // scratch - scratch register + // + // The input registers are overwritten. + // The ending address is inclusive. + void gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) { + assert_different_registers(start, end, scratch); + Label L_done; + + // "end" is inclusive end pointer == start + (count - 1) * array_element_size + // If count == 0, "end" is less than "start" and we need to skip card marking. + __ cmp(end, start); + __ br(__ LO, L_done); + + BarrierSet* bs = Universe::heap()->barrier_set(); + switch (bs->kind()) { + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + + { + __ push_call_clobbered_registers(); + // must compute element count unless barrier set interface is changed (other platforms supply count) + assert_different_registers(start, end, scratch); + __ lea(scratch, Address(end, BytesPerHeapOop)); + __ sub(scratch, scratch, start); // subtract start to get #bytes + __ lsr(scratch, scratch, LogBytesPerHeapOop); // convert to element count + __ mov(c_rarg0, start); + __ mov(c_rarg1, scratch); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2); + __ pop_call_clobbered_registers(); + } + break; + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + Label L_loop; + + __ lsr(start, start, CardTableModRefBS::card_shift); + __ lsr(end, end, CardTableModRefBS::card_shift); + __ sub(end, end, start); // number of bytes to copy + + const Register count = end; // 'end' register contains bytes count now + __ load_byte_map_base(scratch); + __ add(start, start, scratch); + if (UseConcMarkSweepGC) { + __ membar(__ StoreStore); + } + __ BIND(L_loop); + __ strb(zr, Address(start, count)); + __ subs(count, count, 1); + __ br(Assembler::GE, L_loop); + } + break; + default: + ShouldNotReachHere(); + + } + __ bind(L_done); + } + + address generate_zero_longs(Register base, Register cnt) { + Register tmp = rscratch1; + Register tmp2 = rscratch2; + int zva_length = VM_Version::zva_length(); + Label initial_table_end, loop_zva; + Label fini; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "zero_longs"); + address start = __ pc(); + + // Base must be 16 byte aligned. If not just return and let caller handle it + __ tst(base, 0x0f); + __ br(Assembler::NE, fini); + // Align base with ZVA length. + __ neg(tmp, base); + __ andr(tmp, tmp, zva_length - 1); + + // tmp: the number of bytes to be filled to align the base with ZVA length. + __ add(base, base, tmp); + __ sub(cnt, cnt, tmp, Assembler::ASR, 3); + __ adr(tmp2, initial_table_end); + __ sub(tmp2, tmp2, tmp, Assembler::LSR, 2); + __ br(tmp2); + + for (int i = -zva_length + 16; i < 0; i += 16) + __ stp(zr, zr, Address(base, i)); + __ bind(initial_table_end); + + __ sub(cnt, cnt, zva_length >> 3); + __ bind(loop_zva); + __ dc(Assembler::ZVA, base); + __ subs(cnt, cnt, zva_length >> 3); + __ add(base, base, zva_length); + __ br(Assembler::GE, loop_zva); + __ add(cnt, cnt, zva_length >> 3); // count not zeroed by DC ZVA + __ bind(fini); + __ ret(lr); + + return start; + } + + typedef enum { + copy_forwards = 1, + copy_backwards = -1 + } copy_direction; + + // Bulk copy of blocks of 8 words. + // + // count is a count of words. + // + // Precondition: count >= 8 + // + // Postconditions: + // + // The least significant bit of count contains the remaining count + // of words to copy. The rest of count is trash. + // + // s and d are adjusted to point to the remaining words to copy + // + void generate_copy_longs(Label &start, Register s, Register d, Register count, + copy_direction direction) { + int unit = wordSize * direction; + int bias = (UseSIMDForMemoryOps ? 4:2) * wordSize; + + int offset; + const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6, + t4 = r7, t5 = r10, t6 = r11, t7 = r12; + const Register stride = r13; + + assert_different_registers(rscratch1, t0, t1, t2, t3, t4, t5, t6, t7); + assert_different_registers(s, d, count, rscratch1); + + Label again, drain; + const char *stub_name; + if (direction == copy_forwards) + stub_name = "foward_copy_longs"; + else + stub_name = "backward_copy_longs"; + + __ align(CodeEntryAlignment); + + StubCodeMark mark(this, "StubRoutines", stub_name); + + __ bind(start); + + Label unaligned_copy_long; + if (AvoidUnalignedAccesses) { + __ tbnz(d, 3, unaligned_copy_long); + } + + if (direction == copy_forwards) { + __ sub(s, s, bias); + __ sub(d, d, bias); + } + +#ifdef ASSERT + // Make sure we are never given < 8 words + { + Label L; + __ cmp(count, 8); + __ br(Assembler::GE, L); + __ stop("genrate_copy_longs called with < 8 words"); + __ bind(L); + } +#endif + + // Fill 8 registers + if (UseSIMDForMemoryOps) { + __ ldpq(v0, v1, Address(s, 4 * unit)); + __ ldpq(v2, v3, Address(__ pre(s, 8 * unit))); + } else { + __ ldp(t0, t1, Address(s, 2 * unit)); + __ ldp(t2, t3, Address(s, 4 * unit)); + __ ldp(t4, t5, Address(s, 6 * unit)); + __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); + } + + __ subs(count, count, 16); + __ br(Assembler::LO, drain); + + int prefetch = PrefetchCopyIntervalInBytes; + bool use_stride = false; + if (direction == copy_backwards) { + use_stride = prefetch > 256; + prefetch = -prefetch; + if (use_stride) __ mov(stride, prefetch); + } + + __ bind(again); + + if (PrefetchCopyIntervalInBytes > 0) + __ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP); + + if (UseSIMDForMemoryOps) { + __ stpq(v0, v1, Address(d, 4 * unit)); + __ ldpq(v0, v1, Address(s, 4 * unit)); + __ stpq(v2, v3, Address(__ pre(d, 8 * unit))); + __ ldpq(v2, v3, Address(__ pre(s, 8 * unit))); + } else { + __ stp(t0, t1, Address(d, 2 * unit)); + __ ldp(t0, t1, Address(s, 2 * unit)); + __ stp(t2, t3, Address(d, 4 * unit)); + __ ldp(t2, t3, Address(s, 4 * unit)); + __ stp(t4, t5, Address(d, 6 * unit)); + __ ldp(t4, t5, Address(s, 6 * unit)); + __ stp(t6, t7, Address(__ pre(d, 8 * unit))); + __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); + } + + __ subs(count, count, 8); + __ br(Assembler::HS, again); + + // Drain + __ bind(drain); + if (UseSIMDForMemoryOps) { + __ stpq(v0, v1, Address(d, 4 * unit)); + __ stpq(v2, v3, Address(__ pre(d, 8 * unit))); + } else { + __ stp(t0, t1, Address(d, 2 * unit)); + __ stp(t2, t3, Address(d, 4 * unit)); + __ stp(t4, t5, Address(d, 6 * unit)); + __ stp(t6, t7, Address(__ pre(d, 8 * unit))); + } + + { + Label L1, L2; + __ tbz(count, exact_log2(4), L1); + if (UseSIMDForMemoryOps) { + __ ldpq(v0, v1, Address(__ pre(s, 4 * unit))); + __ stpq(v0, v1, Address(__ pre(d, 4 * unit))); + } else { + __ ldp(t0, t1, Address(s, 2 * unit)); + __ ldp(t2, t3, Address(__ pre(s, 4 * unit))); + __ stp(t0, t1, Address(d, 2 * unit)); + __ stp(t2, t3, Address(__ pre(d, 4 * unit))); + } + __ bind(L1); + + if (direction == copy_forwards) { + __ add(s, s, bias); + __ add(d, d, bias); + } + + __ tbz(count, 1, L2); + __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards))); + __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards))); + __ bind(L2); + } + + __ ret(lr); + + if (AvoidUnalignedAccesses) { + Label drain, again; + // Register order for storing. Order is different for backward copy. + + __ bind(unaligned_copy_long); + + // source address is even aligned, target odd aligned + // + // when forward copying word pairs we read long pairs at offsets + // {0, 2, 4, 6} (in long words). when backwards copying we read + // long pairs at offsets {-2, -4, -6, -8}. We adjust the source + // address by -2 in the forwards case so we can compute the + // source offsets for both as {2, 4, 6, 8} * unit where unit = 1 + // or -1. + // + // when forward copying we need to store 1 word, 3 pairs and + // then 1 word at offsets {0, 1, 3, 5, 7}. Rather thna use a + // zero offset We adjust the destination by -1 which means we + // have to use offsets { 1, 2, 4, 6, 8} * unit for the stores. + // + // When backwards copyng we need to store 1 word, 3 pairs and + // then 1 word at offsets {-1, -3, -5, -7, -8} i.e. we use + // offsets {1, 3, 5, 7, 8} * unit. + + if (direction == copy_forwards) { + __ sub(s, s, 16); + __ sub(d, d, 8); + } + + // Fill 8 registers + // + // for forwards copy s was offset by -16 from the original input + // value of s so the register contents are at these offsets + // relative to the 64 bit block addressed by that original input + // and so on for each successive 64 byte block when s is updated + // + // t0 at offset 0, t1 at offset 8 + // t2 at offset 16, t3 at offset 24 + // t4 at offset 32, t5 at offset 40 + // t6 at offset 48, t7 at offset 56 + + // for backwards copy s was not offset so the register contents + // are at these offsets into the preceding 64 byte block + // relative to that original input and so on for each successive + // preceding 64 byte block when s is updated. this explains the + // slightly counter-intuitive looking pattern of register usage + // in the stp instructions for backwards copy. + // + // t0 at offset -16, t1 at offset -8 + // t2 at offset -32, t3 at offset -24 + // t4 at offset -48, t5 at offset -40 + // t6 at offset -64, t7 at offset -56 + + __ ldp(t0, t1, Address(s, 2 * unit)); + __ ldp(t2, t3, Address(s, 4 * unit)); + __ ldp(t4, t5, Address(s, 6 * unit)); + __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); + + __ subs(count, count, 16); + __ br(Assembler::LO, drain); + + int prefetch = PrefetchCopyIntervalInBytes; + bool use_stride = false; + if (direction == copy_backwards) { + use_stride = prefetch > 256; + prefetch = -prefetch; + if (use_stride) __ mov(stride, prefetch); + } + + __ bind(again); + + if (PrefetchCopyIntervalInBytes > 0) + __ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP); + + if (direction == copy_forwards) { + // allowing for the offset of -8 the store instructions place + // registers into the target 64 bit block at the following + // offsets + // + // t0 at offset 0 + // t1 at offset 8, t2 at offset 16 + // t3 at offset 24, t4 at offset 32 + // t5 at offset 40, t6 at offset 48 + // t7 at offset 56 + + __ str(t0, Address(d, 1 * unit)); + __ stp(t1, t2, Address(d, 2 * unit)); + __ ldp(t0, t1, Address(s, 2 * unit)); + __ stp(t3, t4, Address(d, 4 * unit)); + __ ldp(t2, t3, Address(s, 4 * unit)); + __ stp(t5, t6, Address(d, 6 * unit)); + __ ldp(t4, t5, Address(s, 6 * unit)); + __ str(t7, Address(__ pre(d, 8 * unit))); + __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); + } else { + // d was not offset when we started so the registers are + // written into the 64 bit block preceding d with the following + // offsets + // + // t1 at offset -8 + // t3 at offset -24, t0 at offset -16 + // t5 at offset -48, t2 at offset -32 + // t7 at offset -56, t4 at offset -48 + // t6 at offset -64 + // + // note that this matches the offsets previously noted for the + // loads + + __ str(t1, Address(d, 1 * unit)); + __ stp(t3, t0, Address(d, 3 * unit)); + __ ldp(t0, t1, Address(s, 2 * unit)); + __ stp(t5, t2, Address(d, 5 * unit)); + __ ldp(t2, t3, Address(s, 4 * unit)); + __ stp(t7, t4, Address(d, 7 * unit)); + __ ldp(t4, t5, Address(s, 6 * unit)); + __ str(t6, Address(__ pre(d, 8 * unit))); + __ ldp(t6, t7, Address(__ pre(s, 8 * unit))); + } + + __ subs(count, count, 8); + __ br(Assembler::HS, again); + + // Drain + // + // this uses the same pattern of offsets and register arguments + // as above + __ bind(drain); + if (direction == copy_forwards) { + __ str(t0, Address(d, 1 * unit)); + __ stp(t1, t2, Address(d, 2 * unit)); + __ stp(t3, t4, Address(d, 4 * unit)); + __ stp(t5, t6, Address(d, 6 * unit)); + __ str(t7, Address(__ pre(d, 8 * unit))); + } else { + __ str(t1, Address(d, 1 * unit)); + __ stp(t3, t0, Address(d, 3 * unit)); + __ stp(t5, t2, Address(d, 5 * unit)); + __ stp(t7, t4, Address(d, 7 * unit)); + __ str(t6, Address(__ pre(d, 8 * unit))); + } + // now we need to copy any remaining part block which may + // include a 4 word block subblock and/or a 2 word subblock. + // bits 2 and 1 in the count are the tell-tale for whetehr we + // have each such subblock + { + Label L1, L2; + __ tbz(count, exact_log2(4), L1); + // this is the same as above but copying only 4 longs hence + // with ony one intervening stp between the str instructions + // but note that the offsets and registers still follow the + // same pattern + __ ldp(t0, t1, Address(s, 2 * unit)); + __ ldp(t2, t3, Address(__ pre(s, 4 * unit))); + if (direction == copy_forwards) { + __ str(t0, Address(d, 1 * unit)); + __ stp(t1, t2, Address(d, 2 * unit)); + __ str(t3, Address(__ pre(d, 4 * unit))); + } else { + __ str(t1, Address(d, 1 * unit)); + __ stp(t3, t0, Address(d, 3 * unit)); + __ str(t2, Address(__ pre(d, 4 * unit))); + } + __ bind(L1); + + __ tbz(count, 1, L2); + // this is the same as above but copying only 2 longs hence + // there is no intervening stp between the str instructions + // but note that the offset and register patterns are still + // the same + __ ldp(t0, t1, Address(__ pre(s, 2 * unit))); + if (direction == copy_forwards) { + __ str(t0, Address(d, 1 * unit)); + __ str(t1, Address(__ pre(d, 2 * unit))); + } else { + __ str(t1, Address(d, 1 * unit)); + __ str(t0, Address(__ pre(d, 2 * unit))); + } + __ bind(L2); + + // for forwards copy we need to re-adjust the offsets we + // applied so that s and d are follow the last words written + + if (direction == copy_forwards) { + __ add(s, s, 16); + __ add(d, d, 8); + } + + } + + __ ret(lr); + } + } + + // Small copy: less than 16 bytes. + // + // NB: Ignores all of the bits of count which represent more than 15 + // bytes, so a caller doesn't have to mask them. + + void copy_memory_small(Register s, Register d, Register count, Register tmp, int step) { + bool is_backwards = step < 0; + size_t granularity = uabs(step); + int direction = is_backwards ? -1 : 1; + int unit = wordSize * direction; + + Label Lpair, Lword, Lint, Lshort, Lbyte; + + assert(granularity + && granularity <= sizeof (jlong), "Impossible granularity in copy_memory_small"); + + const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6; + + // ??? I don't know if this bit-test-and-branch is the right thing + // to do. It does a lot of jumping, resulting in several + // mispredicted branches. It might make more sense to do this + // with something like Duff's device with a single computed branch. + + __ tbz(count, 3 - exact_log2(granularity), Lword); + __ ldr(tmp, Address(__ adjust(s, unit, is_backwards))); + __ str(tmp, Address(__ adjust(d, unit, is_backwards))); + __ bind(Lword); + + if (granularity <= sizeof (jint)) { + __ tbz(count, 2 - exact_log2(granularity), Lint); + __ ldrw(tmp, Address(__ adjust(s, sizeof (jint) * direction, is_backwards))); + __ strw(tmp, Address(__ adjust(d, sizeof (jint) * direction, is_backwards))); + __ bind(Lint); + } + + if (granularity <= sizeof (jshort)) { + __ tbz(count, 1 - exact_log2(granularity), Lshort); + __ ldrh(tmp, Address(__ adjust(s, sizeof (jshort) * direction, is_backwards))); + __ strh(tmp, Address(__ adjust(d, sizeof (jshort) * direction, is_backwards))); + __ bind(Lshort); + } + + if (granularity <= sizeof (jbyte)) { + __ tbz(count, 0, Lbyte); + __ ldrb(tmp, Address(__ adjust(s, sizeof (jbyte) * direction, is_backwards))); + __ strb(tmp, Address(__ adjust(d, sizeof (jbyte) * direction, is_backwards))); + __ bind(Lbyte); + } + } + + Label copy_f, copy_b; + + // All-singing all-dancing memory copy. + // + // Copy count units of memory from s to d. The size of a unit is + // step, which can be positive or negative depending on the direction + // of copy. If is_aligned is false, we align the source address. + // + + void copy_memory(bool is_aligned, Register s, Register d, + Register count, Register tmp, int step) { + copy_direction direction = step < 0 ? copy_backwards : copy_forwards; + bool is_backwards = step < 0; + int granularity = uabs(step); + const Register t0 = r3, t1 = r4; + + // <= 96 bytes do inline. Direction doesn't matter because we always + // load all the data before writing anything + Label copy4, copy8, copy16, copy32, copy80, copy128, copy_big, finish; + const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8; + const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12; + const Register send = r17, dend = r18; + + if (PrefetchCopyIntervalInBytes > 0) + __ prfm(Address(s, 0), PLDL1KEEP); + __ cmp(count, (UseSIMDForMemoryOps ? 96:80)/granularity); + __ br(Assembler::HI, copy_big); + + __ lea(send, Address(s, count, Address::lsl(exact_log2(granularity)))); + __ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity)))); + + __ cmp(count, 16/granularity); + __ br(Assembler::LS, copy16); + + __ cmp(count, 64/granularity); + __ br(Assembler::HI, copy80); + + __ cmp(count, 32/granularity); + __ br(Assembler::LS, copy32); + + // 33..64 bytes + if (UseSIMDForMemoryOps) { + __ ldpq(v0, v1, Address(s, 0)); + __ ldpq(v2, v3, Address(send, -32)); + __ stpq(v0, v1, Address(d, 0)); + __ stpq(v2, v3, Address(dend, -32)); + } else { + __ ldp(t0, t1, Address(s, 0)); + __ ldp(t2, t3, Address(s, 16)); + __ ldp(t4, t5, Address(send, -32)); + __ ldp(t6, t7, Address(send, -16)); + + __ stp(t0, t1, Address(d, 0)); + __ stp(t2, t3, Address(d, 16)); + __ stp(t4, t5, Address(dend, -32)); + __ stp(t6, t7, Address(dend, -16)); + } + __ b(finish); + + // 17..32 bytes + __ bind(copy32); + __ ldp(t0, t1, Address(s, 0)); + __ ldp(t2, t3, Address(send, -16)); + __ stp(t0, t1, Address(d, 0)); + __ stp(t2, t3, Address(dend, -16)); + __ b(finish); + + // 65..80/96 bytes + // (96 bytes if SIMD because we do 32 byes per instruction) + __ bind(copy80); + if (UseSIMDForMemoryOps) { + __ ld4(v0, v1, v2, v3, __ T16B, Address(s, 0)); + __ ldpq(v4, v5, Address(send, -32)); + __ st4(v0, v1, v2, v3, __ T16B, Address(d, 0)); + __ stpq(v4, v5, Address(dend, -32)); + } else { + __ ldp(t0, t1, Address(s, 0)); + __ ldp(t2, t3, Address(s, 16)); + __ ldp(t4, t5, Address(s, 32)); + __ ldp(t6, t7, Address(s, 48)); + __ ldp(t8, t9, Address(send, -16)); + + __ stp(t0, t1, Address(d, 0)); + __ stp(t2, t3, Address(d, 16)); + __ stp(t4, t5, Address(d, 32)); + __ stp(t6, t7, Address(d, 48)); + __ stp(t8, t9, Address(dend, -16)); + } + __ b(finish); + + // 0..16 bytes + __ bind(copy16); + __ cmp(count, 8/granularity); + __ br(Assembler::LO, copy8); + + // 8..16 bytes + __ ldr(t0, Address(s, 0)); + __ ldr(t1, Address(send, -8)); + __ str(t0, Address(d, 0)); + __ str(t1, Address(dend, -8)); + __ b(finish); + + if (granularity < 8) { + // 4..7 bytes + __ bind(copy8); + __ tbz(count, 2 - exact_log2(granularity), copy4); + __ ldrw(t0, Address(s, 0)); + __ ldrw(t1, Address(send, -4)); + __ strw(t0, Address(d, 0)); + __ strw(t1, Address(dend, -4)); + __ b(finish); + if (granularity < 4) { + // 0..3 bytes + __ bind(copy4); + __ cbz(count, finish); // get rid of 0 case + if (granularity == 2) { + __ ldrh(t0, Address(s, 0)); + __ strh(t0, Address(d, 0)); + } else { // granularity == 1 + // Now 1..3 bytes. Handle the 1 and 2 byte case by copying + // the first and last byte. + // Handle the 3 byte case by loading and storing base + count/2 + // (count == 1 (s+0)->(d+0), count == 2,3 (s+1) -> (d+1)) + // This does means in the 1 byte case we load/store the same + // byte 3 times. + __ lsr(count, count, 1); + __ ldrb(t0, Address(s, 0)); + __ ldrb(t1, Address(send, -1)); + __ ldrb(t2, Address(s, count)); + __ strb(t0, Address(d, 0)); + __ strb(t1, Address(dend, -1)); + __ strb(t2, Address(d, count)); + } + __ b(finish); + } + } + + __ bind(copy_big); + if (is_backwards) { + __ lea(s, Address(s, count, Address::lsl(exact_log2(-step)))); + __ lea(d, Address(d, count, Address::lsl(exact_log2(-step)))); + } + + // Now we've got the small case out of the way we can align the + // source address on a 2-word boundary. + + Label aligned; + + if (is_aligned) { + // We may have to adjust by 1 word to get s 2-word-aligned. + __ tbz(s, exact_log2(wordSize), aligned); + __ ldr(tmp, Address(__ adjust(s, direction * wordSize, is_backwards))); + __ str(tmp, Address(__ adjust(d, direction * wordSize, is_backwards))); + __ sub(count, count, wordSize/granularity); + } else { + if (is_backwards) { + __ andr(rscratch2, s, 2 * wordSize - 1); + } else { + __ neg(rscratch2, s); + __ andr(rscratch2, rscratch2, 2 * wordSize - 1); + } + // rscratch2 is the byte adjustment needed to align s. + __ cbz(rscratch2, aligned); + int shift = exact_log2(granularity); + if (shift) __ lsr(rscratch2, rscratch2, shift); + __ sub(count, count, rscratch2); + +#if 0 + // ?? This code is only correct for a disjoint copy. It may or + // may not make sense to use it in that case. + + // Copy the first pair; s and d may not be aligned. + __ ldp(t0, t1, Address(s, is_backwards ? -2 * wordSize : 0)); + __ stp(t0, t1, Address(d, is_backwards ? -2 * wordSize : 0)); + + // Align s and d, adjust count + if (is_backwards) { + __ sub(s, s, rscratch2); + __ sub(d, d, rscratch2); + } else { + __ add(s, s, rscratch2); + __ add(d, d, rscratch2); + } +#else + copy_memory_small(s, d, rscratch2, rscratch1, step); +#endif + } + + __ bind(aligned); + + // s is now 2-word-aligned. + + // We have a count of units and some trailing bytes. Adjust the + // count and do a bulk copy of words. + __ lsr(rscratch2, count, exact_log2(wordSize/granularity)); + if (direction == copy_forwards) + __ bl(copy_f); + else + __ bl(copy_b); + + // And the tail. + copy_memory_small(s, d, count, tmp, step); + + if (granularity >= 8) __ bind(copy8); + if (granularity >= 4) __ bind(copy4); + __ bind(finish); + } + + + void clobber_registers() { +#ifdef ASSERT + __ mov(rscratch1, (uint64_t)0xdeadbeef); + __ orr(rscratch1, rscratch1, rscratch1, Assembler::LSL, 32); + for (Register r = r3; r <= r18; r++) + if (r != rscratch1) __ mov(r, rscratch1); +#endif + } + + // Scan over array at a for count oops, verifying each one. + // Preserves a and count, clobbers rscratch1 and rscratch2. + void verify_oop_array (size_t size, Register a, Register count, Register temp) { + Label loop, end; + __ mov(rscratch1, a); + __ mov(rscratch2, zr); + __ bind(loop); + __ cmp(rscratch2, count); + __ br(Assembler::HS, end); + if (size == (size_t)wordSize) { + __ ldr(temp, Address(a, rscratch2, Address::lsl(exact_log2(size)))); + __ verify_oop(temp); + } else { + __ ldrw(r16, Address(a, rscratch2, Address::lsl(exact_log2(size)))); + __ decode_heap_oop(temp); // calls verify_oop + } + __ add(rscratch2, rscratch2, size); + __ b(loop); + __ bind(end); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address *entry, + const char *name, bool dest_uninitialized = false) { + Register s = c_rarg0, d = c_rarg1, count = c_rarg2; + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + __ enter(); + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + + if (is_oop) { + __ push(RegSet::of(d, count), sp); + // no registers are destroyed by this call + gen_write_ref_array_pre_barrier(d, count, dest_uninitialized); + } + copy_memory(aligned, s, d, count, rscratch1, size); + if (is_oop) { + __ pop(RegSet::of(d, count), sp); + if (VerifyOops) + verify_oop_array(size, d, count, r16); + __ sub(count, count, 1); // make an inclusive end pointer + __ lea(count, Address(d, count, Address::lsl(exact_log2(size)))); + gen_write_ref_array_post_barrier(d, count, rscratch1); + } + __ leave(); + __ mov(r0, zr); // return 0 + __ ret(lr); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target, + address *entry, const char *name, + bool dest_uninitialized = false) { + Register s = c_rarg0, d = c_rarg1, count = c_rarg2; + + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + __ enter(); + + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + + // use fwd copy when (d-s) above_equal (count*size) + __ sub(rscratch1, d, s); + __ cmp(rscratch1, count, Assembler::LSL, exact_log2(size)); + __ br(Assembler::HS, nooverlap_target); + + if (is_oop) { + __ push(RegSet::of(d, count), sp); + // no registers are destroyed by this call + gen_write_ref_array_pre_barrier(d, count, dest_uninitialized); + } + copy_memory(aligned, s, d, count, rscratch1, -size); + if (is_oop) { + __ pop(RegSet::of(d, count), sp); + if (VerifyOops) + verify_oop_array(size, d, count, r16); + __ sub(count, count, 1); // make an inclusive end pointer + __ lea(count, Address(d, count, Address::lsl(exact_log2(size)))); + gen_write_ref_array_post_barrier(d, count, rscratch1); + } + __ leave(); + __ mov(r0, zr); // return 0 + __ ret(lr); + return start; +} + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_byte_copy_entry is set to the no-overlap entry point // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_byte_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_byte_copy(). + // + address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) { + const bool not_oop = false; + return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, + address* entry, const char *name) { + const bool not_oop = false; + return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we + // let the hardware handle it. The two or four words within dwords + // or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_short_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_short_copy(). + // + address generate_disjoint_short_copy(bool aligned, + address* entry, const char *name) { + const bool not_oop = false; + return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we + // let the hardware handle it. The two or four words within dwords + // or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_short_copy(bool aligned, address nooverlap_target, + address *entry, const char *name) { + const bool not_oop = false; + return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name); + + } + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_int_copy(bool aligned, address *entry, + const char *name, bool dest_uninitialized = false) { + const bool not_oop = false; + return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_int_copy(bool aligned, address nooverlap_target, + address *entry, const char *name, + bool dest_uninitialized = false) { + const bool not_oop = false; + return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name); + } + + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as size_t, can be zero + // + // Side Effects: + // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the + // no-overlap entry point used by generate_conjoint_long_oop_copy(). + // + address generate_disjoint_long_copy(bool aligned, address *entry, + const char *name, bool dest_uninitialized = false) { + const bool not_oop = false; + return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as size_t, can be zero + // + address generate_conjoint_long_copy(bool aligned, + address nooverlap_target, address *entry, + const char *name, bool dest_uninitialized = false) { + const bool not_oop = false; + return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as size_t, can be zero + // + // Side Effects: + // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the + // no-overlap entry point used by generate_conjoint_long_oop_copy(). + // + address generate_disjoint_oop_copy(bool aligned, address *entry, + const char *name, bool dest_uninitialized) { + const bool is_oop = true; + const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); + return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as size_t, can be zero + // + address generate_conjoint_oop_copy(bool aligned, + address nooverlap_target, address *entry, + const char *name, bool dest_uninitialized) { + const bool is_oop = true; + const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); + return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, + name, dest_uninitialized); + } + + + // Helper for generating a dynamic type check. + // Smashes rscratch1. + void generate_type_check(Register sub_klass, + Register super_check_offset, + Register super_klass, + Label& L_success) { + assert_different_registers(sub_klass, super_check_offset, super_klass); + + BLOCK_COMMENT("type_check:"); + + Label L_miss; + + __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, + super_check_offset); + __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL); + + // Fall through on failure! + __ BIND(L_miss); + } + + // + // Generate checkcasting array copy stub + // + // Input: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // c_rarg3 - size_t ckoff (super_check_offset) + // c_rarg4 - oop ckval (super_klass) + // + // Output: + // r0 == 0 - success + // r0 == -1^K - failure, where K is partial transfer count + // + address generate_checkcast_copy(const char *name, address *entry, + bool dest_uninitialized = false) { + + Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; + + // Input registers (after setup_arg_regs) + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register count = c_rarg2; // elementscount + const Register ckoff = c_rarg3; // super_check_offset + const Register ckval = c_rarg4; // super_klass + + // Registers used as temps (r18, r19, r20 are save-on-entry) + const Register count_save = r21; // orig elementscount + const Register start_to = r20; // destination array start address + const Register copied_oop = r18; // actual oop copied + const Register r19_klass = r19; // oop._klass + + //--------------------------------------------------------------- + // Assembler stub will be used for this call to arraycopy + // if the two arrays are subtypes of Object[] but the + // destination array type is not equal to or a supertype + // of the source type. Each element must be separately + // checked. + + assert_different_registers(from, to, count, ckoff, ckval, start_to, + copied_oop, r19_klass, count_save); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef ASSERT + // caller guarantees that the arrays really are different + // otherwise, we would have to make conjoint checks + { Label L; + array_overlap_test(L, TIMES_OOP); + __ stop("checkcast_copy within a single array"); + __ bind(L); + } +#endif //ASSERT + + // Caller of this entry point must set up the argument registers. + if (entry != NULL) { + *entry = __ pc(); + BLOCK_COMMENT("Entry:"); + } + + // Empty array: Nothing to do. + __ cbz(count, L_done); + + __ push(RegSet::of(r18, r19, r20, r21), sp); + +#ifdef ASSERT + BLOCK_COMMENT("assert consistent ckoff/ckval"); + // The ckoff and ckval must be mutually consistent, + // even though caller generates both. + { Label L; + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + __ ldrw(start_to, Address(ckval, sco_offset)); + __ cmpw(ckoff, start_to); + __ br(Assembler::EQ, L); + __ stop("super_check_offset inconsistent"); + __ bind(L); + } +#endif //ASSERT + + gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); + + // save the original count + __ mov(count_save, count); + + // Copy from low to high addresses + __ mov(start_to, to); // Save destination array start address + __ b(L_load_element); + + // ======== begin loop ======== + // (Loop is rotated; its entry is L_load_element.) + // Loop control: + // for (; count != 0; count--) { + // copied_oop = load_heap_oop(from++); + // ... generate_type_check ...; + // store_heap_oop(to++, copied_oop); + // } + __ align(OptoLoopAlignment); + + __ BIND(L_store_element); + __ store_heap_oop(__ post(to, UseCompressedOops ? 4 : 8), copied_oop); // store the oop + __ sub(count, count, 1); + __ cbz(count, L_do_card_marks); + + // ======== loop entry is here ======== + __ BIND(L_load_element); + __ load_heap_oop(copied_oop, __ post(from, UseCompressedOops ? 4 : 8)); // load the oop + __ cbz(copied_oop, L_store_element); + + __ load_klass(r19_klass, copied_oop);// query the object klass + generate_type_check(r19_klass, ckoff, ckval, L_store_element); + // ======== end loop ======== + + // It was a real error; we must depend on the caller to finish the job. + // Register count = remaining oops, count_orig = total oops. + // Emit GC store barriers for the oops we have copied and report + // their number to the caller. + + __ subs(count, count_save, count); // K = partially copied oop count + __ eon(count, count, zr); // report (-1^K) to caller + __ br(Assembler::EQ, L_done_pop); + + __ BIND(L_do_card_marks); + __ add(to, to, -heapOopSize); // make an inclusive end pointer + gen_write_ref_array_post_barrier(start_to, to, rscratch1); + + __ bind(L_done_pop); + __ pop(RegSet::of(r18, r19, r20, r21), sp); + inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); + + __ bind(L_done); + __ mov(r0, count); + __ leave(); + __ ret(lr); + + return start; + } + + // Perform range checks on the proposed arraycopy. + // Kills temp, but nothing else. + // Also, clean the sign bits of src_pos and dst_pos. + void arraycopy_range_checks(Register src, // source array oop (c_rarg0) + Register src_pos, // source position (c_rarg1) + Register dst, // destination array oo (c_rarg2) + Register dst_pos, // destination position (c_rarg3) + Register length, + Register temp, + Label& L_failed) { + BLOCK_COMMENT("arraycopy_range_checks:"); + + assert_different_registers(rscratch1, temp); + + // if (src_pos + length > arrayOop(src)->length()) FAIL; + __ ldrw(rscratch1, Address(src, arrayOopDesc::length_offset_in_bytes())); + __ addw(temp, length, src_pos); + __ cmpw(temp, rscratch1); + __ br(Assembler::HI, L_failed); + + // if (dst_pos + length > arrayOop(dst)->length()) FAIL; + __ ldrw(rscratch1, Address(dst, arrayOopDesc::length_offset_in_bytes())); + __ addw(temp, length, dst_pos); + __ cmpw(temp, rscratch1); + __ br(Assembler::HI, L_failed); + + // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'. + __ movw(src_pos, src_pos); + __ movw(dst_pos, dst_pos); + + BLOCK_COMMENT("arraycopy_range_checks done"); + } + + // These stubs get called from some dumb test routine. + // I'll write them properly when they're called from + // something that's actually doing something. + static void fake_arraycopy_stub(address src, address dst, int count) { + assert(count == 0, "huh?"); + } + + + // + // Generate stub for array fill. If "aligned" is true, the + // "to" address is assumed to be heapword aligned. + // + // Arguments for generated stub: + // to: c_rarg0 + // value: c_rarg1 + // count: c_rarg2 treated as signed + // + address generate_fill(BasicType t, bool aligned, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + BLOCK_COMMENT("Entry:"); + + const Register to = c_rarg0; // source array address + const Register value = c_rarg1; // value + const Register count = c_rarg2; // elements count + + const Register bz_base = r10; // base for block_zero routine + const Register cnt_words = r11; // temp register + + __ enter(); + + Label L_fill_elements, L_exit1; + + int shift = -1; + switch (t) { + case T_BYTE: + shift = 0; + __ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element + __ bfi(value, value, 8, 8); // 8 bit -> 16 bit + __ bfi(value, value, 16, 16); // 16 bit -> 32 bit + __ br(Assembler::LO, L_fill_elements); + break; + case T_SHORT: + shift = 1; + __ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element + __ bfi(value, value, 16, 16); // 16 bit -> 32 bit + __ br(Assembler::LO, L_fill_elements); + break; + case T_INT: + shift = 2; + __ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element + __ br(Assembler::LO, L_fill_elements); + break; + default: ShouldNotReachHere(); + } + + // Align source address at 8 bytes address boundary. + Label L_skip_align1, L_skip_align2, L_skip_align4; + if (!aligned) { + switch (t) { + case T_BYTE: + // One byte misalignment happens only for byte arrays. + __ tbz(to, 0, L_skip_align1); + __ strb(value, Address(__ post(to, 1))); + __ subw(count, count, 1); + __ bind(L_skip_align1); + // Fallthrough + case T_SHORT: + // Two bytes misalignment happens only for byte and short (char) arrays. + __ tbz(to, 1, L_skip_align2); + __ strh(value, Address(__ post(to, 2))); + __ subw(count, count, 2 >> shift); + __ bind(L_skip_align2); + // Fallthrough + case T_INT: + // Align to 8 bytes, we know we are 4 byte aligned to start. + __ tbz(to, 2, L_skip_align4); + __ strw(value, Address(__ post(to, 4))); + __ subw(count, count, 4 >> shift); + __ bind(L_skip_align4); + break; + default: ShouldNotReachHere(); + } + } + + // + // Fill large chunks + // + __ lsrw(cnt_words, count, 3 - shift); // number of words + __ bfi(value, value, 32, 32); // 32 bit -> 64 bit + __ subw(count, count, cnt_words, Assembler::LSL, 3 - shift); + if (UseBlockZeroing) { + Label non_block_zeroing, rest; + // count >= BlockZeroingLowLimit && value == 0 + __ subs(rscratch1, cnt_words, BlockZeroingLowLimit >> 3); + __ ccmp(value, 0 /* comparing value */, 0 /* NZCV */, Assembler::GE); + __ br(Assembler::NE, non_block_zeroing); + __ mov(bz_base, to); + __ block_zero(bz_base, cnt_words, true); + __ mov(to, bz_base); + __ b(rest); + __ bind(non_block_zeroing); + __ fill_words(to, cnt_words, value); + __ bind(rest); + } + else { + __ fill_words(to, cnt_words, value); + } + + // Remaining count is less than 8 bytes. Fill it by a single store. + // Note that the total length is no less than 8 bytes. + if (t == T_BYTE || t == T_SHORT) { + Label L_exit1; + __ cbzw(count, L_exit1); + __ add(to, to, count, Assembler::LSL, shift); // points to the end + __ str(value, Address(to, -8)); // overwrite some elements + __ bind(L_exit1); + __ leave(); + __ ret(lr); + } + + // Handle copies less than 8 bytes. + Label L_fill_2, L_fill_4, L_exit2; + __ bind(L_fill_elements); + switch (t) { + case T_BYTE: + __ tbz(count, 0, L_fill_2); + __ strb(value, Address(__ post(to, 1))); + __ bind(L_fill_2); + __ tbz(count, 1, L_fill_4); + __ strh(value, Address(__ post(to, 2))); + __ bind(L_fill_4); + __ tbz(count, 2, L_exit2); + __ strw(value, Address(to)); + break; + case T_SHORT: + __ tbz(count, 0, L_fill_4); + __ strh(value, Address(__ post(to, 2))); + __ bind(L_fill_4); + __ tbz(count, 1, L_exit2); + __ strw(value, Address(to)); + break; + case T_INT: + __ cbzw(count, L_exit2); + __ strw(value, Address(to)); + break; + default: ShouldNotReachHere(); + } + __ bind(L_exit2); + __ leave(); + __ ret(lr); + return start; + } + + // + // Generate 'unsafe' array copy stub + // Though just as safe as the other stubs, it takes an unscaled + // size_t argument instead of an element count. + // + // Input: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - byte count, treated as ssize_t, can be zero + // + // Examines the alignment of the operands and dispatches + // to a long, int, short, or byte copy loop. + // + address generate_unsafe_copy(const char *name, + address byte_copy_entry, + address short_copy_entry, + address int_copy_entry, + address long_copy_entry) { + Label L_long_aligned, L_int_aligned, L_short_aligned; + Register s = c_rarg0, d = c_rarg1, count = c_rarg2; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + // bump this on entry, not on exit: + inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); + + __ orr(rscratch1, s, d); + __ orr(rscratch1, rscratch1, count); + + __ andr(rscratch1, rscratch1, BytesPerLong-1); + __ cbz(rscratch1, L_long_aligned); + __ andr(rscratch1, rscratch1, BytesPerInt-1); + __ cbz(rscratch1, L_int_aligned); + __ tbz(rscratch1, 0, L_short_aligned); + __ b(RuntimeAddress(byte_copy_entry)); + + __ BIND(L_short_aligned); + __ lsr(count, count, LogBytesPerShort); // size => short_count + __ b(RuntimeAddress(short_copy_entry)); + __ BIND(L_int_aligned); + __ lsr(count, count, LogBytesPerInt); // size => int_count + __ b(RuntimeAddress(int_copy_entry)); + __ BIND(L_long_aligned); + __ lsr(count, count, LogBytesPerLong); // size => long_count + __ b(RuntimeAddress(long_copy_entry)); + + return start; + } + + // + // Generate generic array copy stubs + // + // Input: + // c_rarg0 - src oop + // c_rarg1 - src_pos (32-bits) + // c_rarg2 - dst oop + // c_rarg3 - dst_pos (32-bits) + // c_rarg4 - element count (32-bits) + // + // Output: + // r0 == 0 - success + // r0 == -1^K - failure, where K is partial transfer count + // + address generate_generic_copy(const char *name, + address byte_copy_entry, address short_copy_entry, + address int_copy_entry, address oop_copy_entry, + address long_copy_entry, address checkcast_copy_entry) { + + Label L_failed, L_failed_0, L_objArray; + Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; + + // Input registers + const Register src = c_rarg0; // source array oop + const Register src_pos = c_rarg1; // source position + const Register dst = c_rarg2; // destination array oop + const Register dst_pos = c_rarg3; // destination position + const Register length = c_rarg4; + + __ align(CodeEntryAlignment); + + StubCodeMark mark(this, "StubRoutines", name); + + address start = __ pc(); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + // bump this on entry, not on exit: + inc_counter_np(SharedRuntime::_generic_array_copy_ctr); + + //----------------------------------------------------------------------- + // Assembler stub will be used for this call to arraycopy + // if the following conditions are met: + // + // (1) src and dst must not be null. + // (2) src_pos must not be negative. + // (3) dst_pos must not be negative. + // (4) length must not be negative. + // (5) src klass and dst klass should be the same and not NULL. + // (6) src and dst should be arrays. + // (7) src_pos + length must not exceed length of src. + // (8) dst_pos + length must not exceed length of dst. + // + + // if (src == NULL) return -1; + __ cbz(src, L_failed); + + // if (src_pos < 0) return -1; + __ tbnz(src_pos, 31, L_failed); // i.e. sign bit set + + // if (dst == NULL) return -1; + __ cbz(dst, L_failed); + + // if (dst_pos < 0) return -1; + __ tbnz(dst_pos, 31, L_failed); // i.e. sign bit set + + // registers used as temp + const Register scratch_length = r16; // elements count to copy + const Register scratch_src_klass = r17; // array klass + const Register lh = r18; // layout helper + + // if (length < 0) return -1; + __ movw(scratch_length, length); // length (elements count, 32-bits value) + __ tbnz(scratch_length, 31, L_failed); // i.e. sign bit set + + __ load_klass(scratch_src_klass, src); +#ifdef ASSERT + // assert(src->klass() != NULL); + { + BLOCK_COMMENT("assert klasses not null {"); + Label L1, L2; + __ cbnz(scratch_src_klass, L2); // it is broken if klass is NULL + __ bind(L1); + __ stop("broken null klass"); + __ bind(L2); + __ load_klass(rscratch1, dst); + __ cbz(rscratch1, L1); // this would be broken also + BLOCK_COMMENT("} assert klasses not null done"); + } +#endif + + // Load layout helper (32-bits) + // + // |array_tag| | header_size | element_type | |log2_element_size| + // 32 30 24 16 8 2 0 + // + // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 + // + + const int lh_offset = in_bytes(Klass::layout_helper_offset()); + + // Handle objArrays completely differently... + const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ ldrw(lh, Address(scratch_src_klass, lh_offset)); + __ movw(rscratch1, objArray_lh); + __ eorw(rscratch2, lh, rscratch1); + __ cbzw(rscratch2, L_objArray); + + // if (src->klass() != dst->klass()) return -1; + __ load_klass(rscratch2, dst); + __ eor(rscratch2, rscratch2, scratch_src_klass); + __ cbnz(rscratch2, L_failed); + + // if (!src->is_Array()) return -1; + __ tbz(lh, 31, L_failed); // i.e. (lh >= 0) + + // At this point, it is known to be a typeArray (array_tag 0x3). +#ifdef ASSERT + { + BLOCK_COMMENT("assert primitive array {"); + Label L; + __ movw(rscratch2, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); + __ cmpw(lh, rscratch2); + __ br(Assembler::GE, L); + __ stop("must be a primitive array"); + __ bind(L); + BLOCK_COMMENT("} assert primitive array done"); + } +#endif + + arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, + rscratch2, L_failed); + + // TypeArrayKlass + // + // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); + // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); + // + + const Register rscratch1_offset = rscratch1; // array offset + const Register r18_elsize = lh; // element size + + __ ubfx(rscratch1_offset, lh, Klass::_lh_header_size_shift, + exact_log2(Klass::_lh_header_size_mask+1)); // array_offset + __ add(src, src, rscratch1_offset); // src array offset + __ add(dst, dst, rscratch1_offset); // dst array offset + BLOCK_COMMENT("choose copy loop based on element size"); + + // next registers should be set before the jump to corresponding stub + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register count = c_rarg2; // elements count + + // 'from', 'to', 'count' registers should be set in such order + // since they are the same as 'src', 'src_pos', 'dst'. + + assert(Klass::_lh_log2_element_size_shift == 0, "fix this code"); + + // The possible values of elsize are 0-3, i.e. exact_log2(element + // size in bytes). We do a simple bitwise binary search. + __ BIND(L_copy_bytes); + __ tbnz(r18_elsize, 1, L_copy_ints); + __ tbnz(r18_elsize, 0, L_copy_shorts); + __ lea(from, Address(src, src_pos));// src_addr + __ lea(to, Address(dst, dst_pos));// dst_addr + __ movw(count, scratch_length); // length + __ b(RuntimeAddress(byte_copy_entry)); + + __ BIND(L_copy_shorts); + __ lea(from, Address(src, src_pos, Address::lsl(1)));// src_addr + __ lea(to, Address(dst, dst_pos, Address::lsl(1)));// dst_addr + __ movw(count, scratch_length); // length + __ b(RuntimeAddress(short_copy_entry)); + + __ BIND(L_copy_ints); + __ tbnz(r18_elsize, 0, L_copy_longs); + __ lea(from, Address(src, src_pos, Address::lsl(2)));// src_addr + __ lea(to, Address(dst, dst_pos, Address::lsl(2)));// dst_addr + __ movw(count, scratch_length); // length + __ b(RuntimeAddress(int_copy_entry)); + + __ BIND(L_copy_longs); +#ifdef ASSERT + { + BLOCK_COMMENT("assert long copy {"); + Label L; + __ andw(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> r18_elsize + __ cmpw(r18_elsize, LogBytesPerLong); + __ br(Assembler::EQ, L); + __ stop("must be long copy, but elsize is wrong"); + __ bind(L); + BLOCK_COMMENT("} assert long copy done"); + } +#endif + __ lea(from, Address(src, src_pos, Address::lsl(3)));// src_addr + __ lea(to, Address(dst, dst_pos, Address::lsl(3)));// dst_addr + __ movw(count, scratch_length); // length + __ b(RuntimeAddress(long_copy_entry)); + + // ObjArrayKlass + __ BIND(L_objArray); + // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos] + + Label L_plain_copy, L_checkcast_copy; + // test array classes for subtyping + __ load_klass(r18, dst); + __ cmp(scratch_src_klass, r18); // usual case is exact equality + __ br(Assembler::NE, L_checkcast_copy); + + // Identically typed arrays can be copied without element-wise checks. + arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, + rscratch2, L_failed); + + __ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop))); + __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ lea(to, Address(dst, dst_pos, Address::lsl(LogBytesPerHeapOop))); + __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ movw(count, scratch_length); // length + __ BIND(L_plain_copy); + __ b(RuntimeAddress(oop_copy_entry)); + + __ BIND(L_checkcast_copy); + // live at this point: scratch_src_klass, scratch_length, r18 (dst_klass) + { + // Before looking at dst.length, make sure dst is also an objArray. + __ ldrw(rscratch1, Address(r18, lh_offset)); + __ movw(rscratch2, objArray_lh); + __ eorw(rscratch1, rscratch1, rscratch2); + __ cbnzw(rscratch1, L_failed); + + // It is safe to examine both src.length and dst.length. + arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, + r18, L_failed); + + const Register rscratch2_dst_klass = rscratch2; + __ load_klass(rscratch2_dst_klass, dst); // reload + + // Marshal the base address arguments now, freeing registers. + __ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop))); + __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ lea(to, Address(dst, dst_pos, Address::lsl(LogBytesPerHeapOop))); + __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ movw(count, length); // length (reloaded) + Register sco_temp = c_rarg3; // this register is free now + assert_different_registers(from, to, count, sco_temp, + rscratch2_dst_klass, scratch_src_klass); + // assert_clean_int(count, sco_temp); + + // Generate the type check. + const int sco_offset = in_bytes(Klass::super_check_offset_offset()); + __ ldrw(sco_temp, Address(rscratch2_dst_klass, sco_offset)); + // assert_clean_int(sco_temp, r18); + generate_type_check(scratch_src_klass, sco_temp, rscratch2_dst_klass, L_plain_copy); + + // Fetch destination element klass from the ObjArrayKlass header. + int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); + __ ldr(rscratch2_dst_klass, Address(rscratch2_dst_klass, ek_offset)); + __ ldrw(sco_temp, Address(rscratch2_dst_klass, sco_offset)); + + // the checkcast_copy loop needs two extra arguments: + assert(c_rarg3 == sco_temp, "#3 already in place"); + // Set up arguments for checkcast_copy_entry. + __ mov(c_rarg4, rscratch2_dst_klass); // dst.klass.element_klass + __ b(RuntimeAddress(checkcast_copy_entry)); + } + + __ BIND(L_failed); + __ mov(r0, -1); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(lr); + + return start; + } + + void generate_arraycopy_stubs() { + address entry; + address entry_jbyte_arraycopy; + address entry_jshort_arraycopy; + address entry_jint_arraycopy; + address entry_oop_arraycopy; + address entry_jlong_arraycopy; + address entry_checkcast_arraycopy; + + generate_copy_longs(copy_f, r0, r1, rscratch2, copy_forwards); + generate_copy_longs(copy_b, r0, r1, rscratch2, copy_backwards); + + StubRoutines::aarch64::_zero_longs = generate_zero_longs(r10, r11); + + //*** jbyte + // Always need aligned and unaligned versions + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, + "jbyte_disjoint_arraycopy"); + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, + &entry_jbyte_arraycopy, + "jbyte_arraycopy"); + StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry, + "arrayof_jbyte_disjoint_arraycopy"); + StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL, + "arrayof_jbyte_arraycopy"); + + //*** jshort + // Always need aligned and unaligned versions + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, + "jshort_disjoint_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, + &entry_jshort_arraycopy, + "jshort_arraycopy"); + StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry, + "arrayof_jshort_disjoint_arraycopy"); + StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL, + "arrayof_jshort_arraycopy"); + + //*** jint + // Aligned versions + StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, + "arrayof_jint_disjoint_arraycopy"); + StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy, + "arrayof_jint_arraycopy"); + // In 64 bit we need both aligned and unaligned versions of jint arraycopy. + // entry_jint_arraycopy always points to the unaligned version + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, + "jint_disjoint_arraycopy"); + StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry, + &entry_jint_arraycopy, + "jint_arraycopy"); + + //*** jlong + // It is always aligned + StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry, + "arrayof_jlong_disjoint_arraycopy"); + StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy, + "arrayof_jlong_arraycopy"); + StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy; + StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy; + + //*** oops + { + // With compressed oops we need unaligned versions; notice that + // we overwrite entry_oop_arraycopy. + bool aligned = !UseCompressedOops; + + StubRoutines::_arrayof_oop_disjoint_arraycopy + = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy", + /*dest_uninitialized*/false); + StubRoutines::_arrayof_oop_arraycopy + = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy", + /*dest_uninitialized*/false); + // Aligned versions without pre-barriers + StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit + = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit", + /*dest_uninitialized*/true); + StubRoutines::_arrayof_oop_arraycopy_uninit + = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit", + /*dest_uninitialized*/true); + } + + StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy; + StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy; + StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit; + StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit; + + StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); + StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, + /*dest_uninitialized*/true); + + StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", + entry_jbyte_arraycopy, + entry_jshort_arraycopy, + entry_jint_arraycopy, + entry_jlong_arraycopy); + + StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", + entry_jbyte_arraycopy, + entry_jshort_arraycopy, + entry_jint_arraycopy, + entry_oop_arraycopy, + entry_jlong_arraycopy, + entry_checkcast_arraycopy); + + StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); + StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); + StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); + StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); + StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); + StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_encryptBlock() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + + Label L_doLast; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = rscratch1; + + address start = __ pc(); + __ enter(); + + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ ld1(v0, __ T16B, from); // get 16 bytes of input + + __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + __ aese(v0, v3); + __ aesmc(v0, v0); + __ aese(v0, v4); + __ aesmc(v0, v0); + + __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + __ aese(v0, v3); + __ aesmc(v0, v0); + __ aese(v0, v4); + __ aesmc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ cmpw(keylen, 44); + __ br(Assembler::EQ, L_doLast); + + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ cmpw(keylen, 52); + __ br(Assembler::EQ, L_doLast); + + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ BIND(L_doLast); + + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + + __ ld1(v1, __ T16B, key); + __ rev32(v1, __ T16B, v1); + __ eor(v0, __ T16B, v0, v1); + + __ st1(v0, __ T16B, to); + + __ mov(r0, 0); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_decryptBlock() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + Label L_doLast; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = rscratch1; + + address start = __ pc(); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ ld1(v0, __ T16B, from); // get 16 bytes of input + + __ ld1(v5, __ T16B, __ post(key, 16)); + __ rev32(v5, __ T16B, v5); + + __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + __ aesd(v0, v3); + __ aesimc(v0, v0); + __ aesd(v0, v4); + __ aesimc(v0, v0); + + __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + __ aesd(v0, v3); + __ aesimc(v0, v0); + __ aesd(v0, v4); + __ aesimc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ cmpw(keylen, 44); + __ br(Assembler::EQ, L_doLast); + + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ cmpw(keylen, 52); + __ br(Assembler::EQ, L_doLast); + + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + + __ ld1(v1, v2, __ T16B, __ post(key, 32)); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + + __ BIND(L_doLast); + + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + + __ eor(v0, __ T16B, v0, v5); + + __ st1(v0, __ T16B, to); + + __ mov(r0, 0); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + // Output: + // x0 - input length + // + address generate_cipherBlockChaining_encryptAESCrypt() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); + + Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52, _L_finish; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) + const Register keylen = rscratch1; + + address start = __ pc(); + + __ enter(); + + __ subsw(rscratch2, len_reg, zr); + __ br(Assembler::LE, _L_finish); + + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ ld1(v0, __ T16B, rvec); + + __ cmpw(keylen, 52); + __ br(Assembler::CC, L_loadkeys_44); + __ br(Assembler::EQ, L_loadkeys_52); + + __ ld1(v17, v18, __ T16B, __ post(key, 32)); + __ rev32(v17, __ T16B, v17); + __ rev32(v18, __ T16B, v18); + __ BIND(L_loadkeys_52); + __ ld1(v19, v20, __ T16B, __ post(key, 32)); + __ rev32(v19, __ T16B, v19); + __ rev32(v20, __ T16B, v20); + __ BIND(L_loadkeys_44); + __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64)); + __ rev32(v21, __ T16B, v21); + __ rev32(v22, __ T16B, v22); + __ rev32(v23, __ T16B, v23); + __ rev32(v24, __ T16B, v24); + __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64)); + __ rev32(v25, __ T16B, v25); + __ rev32(v26, __ T16B, v26); + __ rev32(v27, __ T16B, v27); + __ rev32(v28, __ T16B, v28); + __ ld1(v29, v30, v31, __ T16B, key); + __ rev32(v29, __ T16B, v29); + __ rev32(v30, __ T16B, v30); + __ rev32(v31, __ T16B, v31); + + __ BIND(L_aes_loop); + __ ld1(v1, __ T16B, __ post(from, 16)); + __ eor(v0, __ T16B, v0, v1); + + __ br(Assembler::CC, L_rounds_44); + __ br(Assembler::EQ, L_rounds_52); + + __ aese(v0, v17); __ aesmc(v0, v0); + __ aese(v0, v18); __ aesmc(v0, v0); + __ BIND(L_rounds_52); + __ aese(v0, v19); __ aesmc(v0, v0); + __ aese(v0, v20); __ aesmc(v0, v0); + __ BIND(L_rounds_44); + __ aese(v0, v21); __ aesmc(v0, v0); + __ aese(v0, v22); __ aesmc(v0, v0); + __ aese(v0, v23); __ aesmc(v0, v0); + __ aese(v0, v24); __ aesmc(v0, v0); + __ aese(v0, v25); __ aesmc(v0, v0); + __ aese(v0, v26); __ aesmc(v0, v0); + __ aese(v0, v27); __ aesmc(v0, v0); + __ aese(v0, v28); __ aesmc(v0, v0); + __ aese(v0, v29); __ aesmc(v0, v0); + __ aese(v0, v30); + __ eor(v0, __ T16B, v0, v31); + + __ st1(v0, __ T16B, __ post(to, 16)); + + __ subw(len_reg, len_reg, 16); + __ cbnzw(len_reg, L_aes_loop); + + __ st1(v0, __ T16B, rvec); + + __ BIND(_L_finish); + __ mov(r0, rscratch2); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + // Output: + // r0 - input length + // + address generate_cipherBlockChaining_decryptAESCrypt() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); + + Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52, _L_finish; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) + const Register keylen = rscratch1; + + address start = __ pc(); + + __ enter(); + + __ subsw(rscratch2, len_reg, zr); + __ br(Assembler::LE, _L_finish); + + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ ld1(v2, __ T16B, rvec); + + __ ld1(v31, __ T16B, __ post(key, 16)); + __ rev32(v31, __ T16B, v31); + + __ cmpw(keylen, 52); + __ br(Assembler::CC, L_loadkeys_44); + __ br(Assembler::EQ, L_loadkeys_52); + + __ ld1(v17, v18, __ T16B, __ post(key, 32)); + __ rev32(v17, __ T16B, v17); + __ rev32(v18, __ T16B, v18); + __ BIND(L_loadkeys_52); + __ ld1(v19, v20, __ T16B, __ post(key, 32)); + __ rev32(v19, __ T16B, v19); + __ rev32(v20, __ T16B, v20); + __ BIND(L_loadkeys_44); + __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64)); + __ rev32(v21, __ T16B, v21); + __ rev32(v22, __ T16B, v22); + __ rev32(v23, __ T16B, v23); + __ rev32(v24, __ T16B, v24); + __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64)); + __ rev32(v25, __ T16B, v25); + __ rev32(v26, __ T16B, v26); + __ rev32(v27, __ T16B, v27); + __ rev32(v28, __ T16B, v28); + __ ld1(v29, v30, __ T16B, key); + __ rev32(v29, __ T16B, v29); + __ rev32(v30, __ T16B, v30); + + __ BIND(L_aes_loop); + __ ld1(v0, __ T16B, __ post(from, 16)); + __ orr(v1, __ T16B, v0, v0); + + __ br(Assembler::CC, L_rounds_44); + __ br(Assembler::EQ, L_rounds_52); + + __ aesd(v0, v17); __ aesimc(v0, v0); + __ aesd(v0, v18); __ aesimc(v0, v0); + __ BIND(L_rounds_52); + __ aesd(v0, v19); __ aesimc(v0, v0); + __ aesd(v0, v20); __ aesimc(v0, v0); + __ BIND(L_rounds_44); + __ aesd(v0, v21); __ aesimc(v0, v0); + __ aesd(v0, v22); __ aesimc(v0, v0); + __ aesd(v0, v23); __ aesimc(v0, v0); + __ aesd(v0, v24); __ aesimc(v0, v0); + __ aesd(v0, v25); __ aesimc(v0, v0); + __ aesd(v0, v26); __ aesimc(v0, v0); + __ aesd(v0, v27); __ aesimc(v0, v0); + __ aesd(v0, v28); __ aesimc(v0, v0); + __ aesd(v0, v29); __ aesimc(v0, v0); + __ aesd(v0, v30); + __ eor(v0, __ T16B, v0, v31); + __ eor(v0, __ T16B, v0, v2); + + __ st1(v0, __ T16B, __ post(to, 16)); + __ orr(v2, __ T16B, v1, v1); + + __ subw(len_reg, len_reg, 16); + __ cbnzw(len_reg, L_aes_loop); + + __ st1(v2, __ T16B, rvec); + + __ BIND(_L_finish); + __ mov(r0, rscratch2); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - byte[] source+offset + // c_rarg1 - int[] SHA.state + // c_rarg2 - int offset + // c_rarg3 - int limit + // + address generate_sha1_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Register buf = c_rarg0; + Register state = c_rarg1; + Register ofs = c_rarg2; + Register limit = c_rarg3; + + Label keys; + Label sha1_loop; + + // load the keys into v0..v3 + __ adr(rscratch1, keys); + __ ld4r(v0, v1, v2, v3, __ T4S, Address(rscratch1)); + // load 5 words state into v6, v7 + __ ldrq(v6, Address(state, 0)); + __ ldrs(v7, Address(state, 16)); + + + __ BIND(sha1_loop); + // load 64 bytes of data into v16..v19 + __ ld1(v16, v17, v18, v19, __ T4S, multi_block ? __ post(buf, 64) : buf); + __ rev32(v16, __ T16B, v16); + __ rev32(v17, __ T16B, v17); + __ rev32(v18, __ T16B, v18); + __ rev32(v19, __ T16B, v19); + + // do the sha1 + __ addv(v4, __ T4S, v16, v0); + __ orr(v20, __ T16B, v6, v6); + + FloatRegister d0 = v16; + FloatRegister d1 = v17; + FloatRegister d2 = v18; + FloatRegister d3 = v19; + + for (int round = 0; round < 20; round++) { + FloatRegister tmp1 = (round & 1) ? v4 : v5; + FloatRegister tmp2 = (round & 1) ? v21 : v22; + FloatRegister tmp3 = round ? ((round & 1) ? v22 : v21) : v7; + FloatRegister tmp4 = (round & 1) ? v5 : v4; + FloatRegister key = (round < 4) ? v0 : ((round < 9) ? v1 : ((round < 14) ? v2 : v3)); + + if (round < 16) __ sha1su0(d0, __ T4S, d1, d2); + if (round < 19) __ addv(tmp1, __ T4S, d1, key); + __ sha1h(tmp2, __ T4S, v20); + if (round < 5) + __ sha1c(v20, __ T4S, tmp3, tmp4); + else if (round < 10 || round >= 15) + __ sha1p(v20, __ T4S, tmp3, tmp4); + else + __ sha1m(v20, __ T4S, tmp3, tmp4); + if (round < 16) __ sha1su1(d0, __ T4S, d3); + + tmp1 = d0; d0 = d1; d1 = d2; d2 = d3; d3 = tmp1; + } + + __ addv(v7, __ T2S, v7, v21); + __ addv(v6, __ T4S, v6, v20); + + if (multi_block) { + __ add(ofs, ofs, 64); + __ cmp(ofs, limit); + __ br(Assembler::LE, sha1_loop); + __ mov(c_rarg0, ofs); // return ofs + } + + __ strq(v6, Address(state, 0)); + __ strs(v7, Address(state, 16)); + + __ ret(lr); + + __ bind(keys); + __ emit_int32(0x5a827999); + __ emit_int32(0x6ed9eba1); + __ emit_int32(0x8f1bbcdc); + __ emit_int32(0xca62c1d6); + + return start; + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - byte[] source+offset + // c_rarg1 - int[] SHA.state + // c_rarg2 - int offset + // c_rarg3 - int limit + // + address generate_sha256_implCompress(bool multi_block, const char *name) { + static const uint32_t round_consts[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, + }; + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Register buf = c_rarg0; + Register state = c_rarg1; + Register ofs = c_rarg2; + Register limit = c_rarg3; + + Label sha1_loop; + + __ stpd(v8, v9, __ pre(sp, -32)); + __ stpd(v10, v11, Address(sp, 16)); + +// dga == v0 +// dgb == v1 +// dg0 == v2 +// dg1 == v3 +// dg2 == v4 +// t0 == v6 +// t1 == v7 + + // load 16 keys to v16..v31 + __ lea(rscratch1, ExternalAddress((address)round_consts)); + __ ld1(v16, v17, v18, v19, __ T4S, __ post(rscratch1, 64)); + __ ld1(v20, v21, v22, v23, __ T4S, __ post(rscratch1, 64)); + __ ld1(v24, v25, v26, v27, __ T4S, __ post(rscratch1, 64)); + __ ld1(v28, v29, v30, v31, __ T4S, rscratch1); + + // load 8 words (256 bits) state + __ ldpq(v0, v1, state); + + __ BIND(sha1_loop); + // load 64 bytes of data into v8..v11 + __ ld1(v8, v9, v10, v11, __ T4S, multi_block ? __ post(buf, 64) : buf); + __ rev32(v8, __ T16B, v8); + __ rev32(v9, __ T16B, v9); + __ rev32(v10, __ T16B, v10); + __ rev32(v11, __ T16B, v11); + + __ addv(v6, __ T4S, v8, v16); + __ orr(v2, __ T16B, v0, v0); + __ orr(v3, __ T16B, v1, v1); + + FloatRegister d0 = v8; + FloatRegister d1 = v9; + FloatRegister d2 = v10; + FloatRegister d3 = v11; + + + for (int round = 0; round < 16; round++) { + FloatRegister tmp1 = (round & 1) ? v6 : v7; + FloatRegister tmp2 = (round & 1) ? v7 : v6; + FloatRegister tmp3 = (round & 1) ? v2 : v4; + FloatRegister tmp4 = (round & 1) ? v4 : v2; + + if (round < 12) __ sha256su0(d0, __ T4S, d1); + __ orr(v4, __ T16B, v2, v2); + if (round < 15) + __ addv(tmp1, __ T4S, d1, as_FloatRegister(round + 17)); + __ sha256h(v2, __ T4S, v3, tmp2); + __ sha256h2(v3, __ T4S, v4, tmp2); + if (round < 12) __ sha256su1(d0, __ T4S, d2, d3); + + tmp1 = d0; d0 = d1; d1 = d2; d2 = d3; d3 = tmp1; + } + + __ addv(v0, __ T4S, v0, v2); + __ addv(v1, __ T4S, v1, v3); + + if (multi_block) { + __ add(ofs, ofs, 64); + __ cmp(ofs, limit); + __ br(Assembler::LE, sha1_loop); + __ mov(c_rarg0, ofs); // return ofs + } + + __ ldpd(v10, v11, Address(sp, 16)); + __ ldpd(v8, v9, __ post(sp, 32)); + + __ stpq(v0, v1, state); + + __ ret(lr); + + return start; + } + + // Safefetch stubs. + void generate_safefetch(const char* name, int size, address* entry, + address* fault_pc, address* continuation_pc) { + // safefetch signatures: + // int SafeFetch32(int* adr, int errValue); + // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); + // + // arguments: + // c_rarg0 = adr + // c_rarg1 = errValue + // + // result: + // PPC_RET = *adr or errValue + + StubCodeMark mark(this, "StubRoutines", name); + + // Entry point, pc or function descriptor. + *entry = __ pc(); + + // Load *adr into c_rarg1, may fault. + *fault_pc = __ pc(); + switch (size) { + case 4: + // int32_t + __ ldrw(c_rarg1, Address(c_rarg0, 0)); + break; + case 8: + // int64_t + __ ldr(c_rarg1, Address(c_rarg0, 0)); + break; + default: + ShouldNotReachHere(); + } + + // return errValue or *adr + *continuation_pc = __ pc(); + __ mov(r0, c_rarg1); + __ ret(lr); + } + + /** + * Arguments: + * + * Inputs: + * c_rarg0 - int crc + * c_rarg1 - byte* buf + * c_rarg2 - int length + * + * Output: + * r0 - int crc result + * + * Preserves: + * r13 + * + */ + address generate_updateBytesCRC32() { + assert(UseCRC32Intrinsics, "what are we doing here?"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); + + address start = __ pc(); + + const Register crc = c_rarg0; // crc + const Register buf = c_rarg1; // source java byte array address + const Register len = c_rarg2; // length + const Register table0 = c_rarg3; // crc_table address + const Register table1 = c_rarg4; + const Register table2 = c_rarg5; + const Register table3 = c_rarg6; + const Register tmp3 = c_rarg7; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ kernel_crc32(crc, buf, len, + table0, table1, table2, table3, rscratch1, rscratch2, tmp3); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(lr); + + return start; + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - x address + * c_rarg1 - x length + * c_rarg2 - y address + * c_rarg3 - y lenth + * c_rarg4 - z address + * c_rarg5 - z length + */ + address generate_multiplyToLen() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); + + address start = __ pc(); + const Register x = r0; + const Register xlen = r1; + const Register y = r2; + const Register ylen = r3; + const Register z = r4; + const Register zlen = r5; + + const Register tmp1 = r10; + const Register tmp2 = r11; + const Register tmp3 = r12; + const Register tmp4 = r13; + const Register tmp5 = r14; + const Register tmp6 = r15; + const Register tmp7 = r16; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(lr); + + return start; + } + + // Continuation point for throwing of implicit exceptions that are + // not handled in the current activation. Fabricates an exception + // oop and initiates normal exception dispatching in this + // frame. Since we need to preserve callee-saved values (currently + // only for C2, but done for C1 as well) we need a callee-saved oop + // map and therefore have to make these stubs into RuntimeStubs + // rather than BufferBlobs. If the compiler needs all registers to + // be preserved between the fault point and the exception handler + // then it must assume responsibility for that in + // AbstractCompiler::continuation_for_implicit_null_exception or + // continuation_for_implicit_division_by_zero_exception. All other + // implicit exceptions (e.g., NullPointerException or + // AbstractMethodError on entry) are either at call sites or + // otherwise assume that stack unwinding will be initiated, so + // caller saved registers were assumed volatile in the compiler. + +#undef __ +#define __ masm-> + + address generate_throw_exception(const char* name, + address runtime_entry, + Register arg1 = noreg, + Register arg2 = noreg) { + // Information about frame layout at time of blocking runtime call. + // Note that we only have to preserve callee-saved registers since + // the compilers are responsible for supplying a continuation point + // if they expect all registers to be preserved. + // n.b. aarch64 asserts that frame::arg_reg_save_area_bytes == 0 + enum layout { + rfp_off = 0, + rfp_off2, + return_off, + return_off2, + framesize // inclusive of return address + }; + + int insts_size = 512; + int locs_size = 64; + + CodeBuffer code(name, insts_size, locs_size); + OopMapSet* oop_maps = new OopMapSet(); + MacroAssembler* masm = new MacroAssembler(&code); + + address start = __ pc(); + + // This is an inlined and slightly modified version of call_VM + // which has the ability to fetch the return PC out of + // thread-local storage and also sets up last_Java_sp slightly + // differently than the real call_VM + + __ enter(); // Save FP and LR before call + + assert(is_even(framesize/2), "sp not 16-byte aligned"); + + // lr and fp are already in place + __ sub(sp, rfp, ((unsigned)framesize-4) << LogBytesPerInt); // prolog + + int frame_complete = __ pc() - start; + + // Set up last_Java_sp and last_Java_fp + address the_pc = __ pc(); + __ set_last_Java_frame(sp, rfp, (address)NULL, rscratch1); + + // Call runtime + if (arg1 != noreg) { + assert(arg2 != c_rarg1, "clobbered"); + __ mov(c_rarg1, arg1); + } + if (arg2 != noreg) { + __ mov(c_rarg2, arg2); + } + __ mov(c_rarg0, rthread); + BLOCK_COMMENT("call runtime_entry"); + __ mov(rscratch1, runtime_entry); + __ blr(rscratch1); + + // Generate oop map + OopMap* map = new OopMap(framesize, 0); + + oop_maps->add_gc_map(the_pc - start, map); + + __ reset_last_Java_frame(true); + __ maybe_isb(); + + __ leave(); + + // check for pending exceptions +#ifdef ASSERT + Label L; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbnz(rscratch1, L); + __ should_not_reach_here(); + __ bind(L); +#endif // ASSERT + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + + // codeBlob framesize is in words (not VMRegImpl::slot_size) + RuntimeStub* stub = + RuntimeStub::new_runtime_stub(name, + &code, + frame_complete, + (framesize >> (LogBytesPerWord - LogBytesPerInt)), + oop_maps, false); + return stub->entry_point(); + } + + class MontgomeryMultiplyGenerator : public MacroAssembler { + + Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn, + Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj; + + RegSet _toSave; + bool _squaring; + + public: + MontgomeryMultiplyGenerator (Assembler *as, bool squaring) + : MacroAssembler(as->code()), _squaring(squaring) { + + // Register allocation + + Register reg = c_rarg0; + Pa_base = reg; // Argument registers + if (squaring) + Pb_base = Pa_base; + else + Pb_base = ++reg; + Pn_base = ++reg; + Rlen= ++reg; + inv = ++reg; + Pm_base = ++reg; + + // Working registers: + Ra = ++reg; // The current digit of a, b, n, and m. + Rb = ++reg; + Rm = ++reg; + Rn = ++reg; + + Pa = ++reg; // Pointers to the current/next digit of a, b, n, and m. + Pb = ++reg; + Pm = ++reg; + Pn = ++reg; + + t0 = ++reg; // Three registers which form a + t1 = ++reg; // triple-precision accumuator. + t2 = ++reg; + + Ri = ++reg; // Inner and outer loop indexes. + Rj = ++reg; + + Rhi_ab = ++reg; // Product registers: low and high parts + Rlo_ab = ++reg; // of a*b and m*n. + Rhi_mn = ++reg; + Rlo_mn = ++reg; + + // r19 and up are callee-saved. + _toSave = RegSet::range(r19, reg) + Pm_base; + } + + private: + void save_regs() { + push(_toSave, sp); + } + + void restore_regs() { + pop(_toSave, sp); + } + + template + void unroll_2(Register count, T block) { + Label loop, end, odd; + tbnz(count, 0, odd); + cbz(count, end); + align(16); + bind(loop); + (this->*block)(); + bind(odd); + (this->*block)(); + subs(count, count, 2); + br(Assembler::GT, loop); + bind(end); + } + + template + void unroll_2(Register count, T block, Register d, Register s, Register tmp) { + Label loop, end, odd; + tbnz(count, 0, odd); + cbz(count, end); + align(16); + bind(loop); + (this->*block)(d, s, tmp); + bind(odd); + (this->*block)(d, s, tmp); + subs(count, count, 2); + br(Assembler::GT, loop); + bind(end); + } + + void pre1(RegisterOrConstant i) { + block_comment("pre1"); + // Pa = Pa_base; + // Pb = Pb_base + i; + // Pm = Pm_base; + // Pn = Pn_base + i; + // Ra = *Pa; + // Rb = *Pb; + // Rm = *Pm; + // Rn = *Pn; + ldr(Ra, Address(Pa_base)); + ldr(Rb, Address(Pb_base, i, Address::uxtw(LogBytesPerWord))); + ldr(Rm, Address(Pm_base)); + ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord))); + lea(Pa, Address(Pa_base)); + lea(Pb, Address(Pb_base, i, Address::uxtw(LogBytesPerWord))); + lea(Pm, Address(Pm_base)); + lea(Pn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord))); + + // Zero the m*n result. + mov(Rhi_mn, zr); + mov(Rlo_mn, zr); + } + + // The core multiply-accumulate step of a Montgomery + // multiplication. The idea is to schedule operations as a + // pipeline so that instructions with long latencies (loads and + // multiplies) have time to complete before their results are + // used. This most benefits in-order implementations of the + // architecture but out-of-order ones also benefit. + void step() { + block_comment("step"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + umulh(Rhi_ab, Ra, Rb); + mul(Rlo_ab, Ra, Rb); + ldr(Ra, pre(Pa, wordSize)); + ldr(Rb, pre(Pb, -wordSize)); + acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n from the + // previous iteration. + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + umulh(Rhi_mn, Rm, Rn); + mul(Rlo_mn, Rm, Rn); + ldr(Rm, pre(Pm, wordSize)); + ldr(Rn, pre(Pn, -wordSize)); + acc(Rhi_ab, Rlo_ab, t0, t1, t2); + } + + void post1() { + block_comment("post1"); + + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + umulh(Rhi_ab, Ra, Rb); + mul(Rlo_ab, Ra, Rb); + acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n + acc(Rhi_ab, Rlo_ab, t0, t1, t2); + + // *Pm = Rm = t0 * inv; + mul(Rm, t0, inv); + str(Rm, Address(Pm)); + + // MACC(Rm, Rn, t0, t1, t2); + // t0 = t1; t1 = t2; t2 = 0; + umulh(Rhi_mn, Rm, Rn); + +#ifndef PRODUCT + // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply"); + { + mul(Rlo_mn, Rm, Rn); + add(Rlo_mn, t0, Rlo_mn); + Label ok; + cbz(Rlo_mn, ok); { + stop("broken Montgomery multiply"); + } bind(ok); + } +#endif + // We have very carefully set things up so that + // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate + // the lower half of Rm * Rn because we know the result already: + // it must be -t0. t0 + (-t0) must generate a carry iff + // t0 != 0. So, rather than do a mul and an adds we just set + // the carry flag iff t0 is nonzero. + // + // mul(Rlo_mn, Rm, Rn); + // adds(zr, t0, Rlo_mn); + subs(zr, t0, 1); // Set carry iff t0 is nonzero + adcs(t0, t1, Rhi_mn); + adc(t1, t2, zr); + mov(t2, zr); + } + + void pre2(RegisterOrConstant i, RegisterOrConstant len) { + block_comment("pre2"); + // Pa = Pa_base + i-len; + // Pb = Pb_base + len; + // Pm = Pm_base + i-len; + // Pn = Pn_base + len; + + if (i.is_register()) { + sub(Rj, i.as_register(), len); + } else { + mov(Rj, i.as_constant()); + sub(Rj, Rj, len); + } + // Rj == i-len + + lea(Pa, Address(Pa_base, Rj, Address::uxtw(LogBytesPerWord))); + lea(Pb, Address(Pb_base, len, Address::uxtw(LogBytesPerWord))); + lea(Pm, Address(Pm_base, Rj, Address::uxtw(LogBytesPerWord))); + lea(Pn, Address(Pn_base, len, Address::uxtw(LogBytesPerWord))); + + // Ra = *++Pa; + // Rb = *--Pb; + // Rm = *++Pm; + // Rn = *--Pn; + ldr(Ra, pre(Pa, wordSize)); + ldr(Rb, pre(Pb, -wordSize)); + ldr(Rm, pre(Pm, wordSize)); + ldr(Rn, pre(Pn, -wordSize)); + + mov(Rhi_mn, zr); + mov(Rlo_mn, zr); + } + + void post2(RegisterOrConstant i, RegisterOrConstant len) { + block_comment("post2"); + if (i.is_constant()) { + mov(Rj, i.as_constant()-len.as_constant()); + } else { + sub(Rj, i.as_register(), len); + } + + adds(t0, t0, Rlo_mn); // The pending m*n, low part + + // As soon as we know the least significant digit of our result, + // store it. + // Pm_base[i-len] = t0; + str(t0, Address(Pm_base, Rj, Address::uxtw(LogBytesPerWord))); + + // t0 = t1; t1 = t2; t2 = 0; + adcs(t0, t1, Rhi_mn); // The pending m*n, high part + adc(t1, t2, zr); + mov(t2, zr); + } + + // A carry in t0 after Montgomery multiplication means that we + // should subtract multiples of n from our result in m. We'll + // keep doing that until there is no carry. + void normalize(RegisterOrConstant len) { + block_comment("normalize"); + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + Label loop, post, again; + Register cnt = t1, i = t2; // Re-use registers; we're done with them now + cbz(t0, post); { + bind(again); { + mov(i, zr); + mov(cnt, len); + ldr(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord))); + ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord))); + subs(zr, zr, zr); // set carry flag, i.e. no borrow + align(16); + bind(loop); { + sbcs(Rm, Rm, Rn); + str(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord))); + add(i, i, 1); + ldr(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord))); + ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord))); + sub(cnt, cnt, 1); + } cbnz(cnt, loop); + sbc(t0, t0, zr); + } cbnz(t0, again); + } bind(post); + } + + // Move memory at s to d, reversing words. + // Increments d to end of copied memory + // Destroys tmp1, tmp2 + // Preserves len + // Leaves s pointing to the address which was in d at start + void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { + assert(tmp1 < r19 && tmp2 < r19, "register corruption"); + + lea(s, Address(s, len, Address::uxtw(LogBytesPerWord))); + mov(tmp1, len); + unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); + sub(s, d, len, ext::uxtw, LogBytesPerWord); + } + // where + void reverse1(Register d, Register s, Register tmp) { + ldr(tmp, pre(s, -wordSize)); + ror(tmp, tmp, 32); + str(tmp, post(d, wordSize)); + } + + void step_squaring() { + // An extra ACC + step(); + acc(Rhi_ab, Rlo_ab, t0, t1, t2); + } + + void last_squaring(RegisterOrConstant i) { + Label dont; + // if ((i & 1) == 0) { + tbnz(i.as_register(), 0, dont); { + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + umulh(Rhi_ab, Ra, Rb); + mul(Rlo_ab, Ra, Rb); + acc(Rhi_ab, Rlo_ab, t0, t1, t2); + } bind(dont); + } + + void extra_step_squaring() { + acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n + + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + umulh(Rhi_mn, Rm, Rn); + mul(Rlo_mn, Rm, Rn); + ldr(Rm, pre(Pm, wordSize)); + ldr(Rn, pre(Pn, -wordSize)); + } + + void post1_squaring() { + acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n + + // *Pm = Rm = t0 * inv; + mul(Rm, t0, inv); + str(Rm, Address(Pm)); + + // MACC(Rm, Rn, t0, t1, t2); + // t0 = t1; t1 = t2; t2 = 0; + umulh(Rhi_mn, Rm, Rn); + +#ifndef PRODUCT + // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply"); + { + mul(Rlo_mn, Rm, Rn); + add(Rlo_mn, t0, Rlo_mn); + Label ok; + cbz(Rlo_mn, ok); { + stop("broken Montgomery multiply"); + } bind(ok); + } +#endif + // We have very carefully set things up so that + // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate + // the lower half of Rm * Rn because we know the result already: + // it must be -t0. t0 + (-t0) must generate a carry iff + // t0 != 0. So, rather than do a mul and an adds we just set + // the carry flag iff t0 is nonzero. + // + // mul(Rlo_mn, Rm, Rn); + // adds(zr, t0, Rlo_mn); + subs(zr, t0, 1); // Set carry iff t0 is nonzero + adcs(t0, t1, Rhi_mn); + adc(t1, t2, zr); + mov(t2, zr); + } + + void acc(Register Rhi, Register Rlo, + Register t0, Register t1, Register t2) { + adds(t0, t0, Rlo); + adcs(t1, t1, Rhi); + adc(t2, t2, zr); + } + + public: + /** + * Fast Montgomery multiplication. The derivation of the + * algorithm is in A Cryptographic Library for the Motorola + * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. + * + * Arguments: + * + * Inputs for multiplication: + * c_rarg0 - int array elements a + * c_rarg1 - int array elements b + * c_rarg2 - int array elements n (the modulus) + * c_rarg3 - int length + * c_rarg4 - int inv + * c_rarg5 - int array elements m (the result) + * + * Inputs for squaring: + * c_rarg0 - int array elements a + * c_rarg1 - int array elements n (the modulus) + * c_rarg2 - int length + * c_rarg3 - int inv + * c_rarg4 - int array elements m (the result) + * + */ + address generate_multiply() { + Label argh, nothing; + bind(argh); + stop("MontgomeryMultiply total_allocation must be <= 8192"); + + align(CodeEntryAlignment); + address entry = pc(); + + cbzw(Rlen, nothing); + + enter(); + + // Make room. + cmpw(Rlen, 512); + br(Assembler::HI, argh); + sub(Ra, sp, Rlen, ext::uxtw, exact_log2(4 * sizeof (jint))); + andr(sp, Ra, -2 * wordSize); + + lsrw(Rlen, Rlen, 1); // length in longwords = len/2 + + { + // Copy input args, reversing as we go. We use Ra as a + // temporary variable. + reverse(Ra, Pa_base, Rlen, t0, t1); + if (!_squaring) + reverse(Ra, Pb_base, Rlen, t0, t1); + reverse(Ra, Pn_base, Rlen, t0, t1); + } + + // Push all call-saved registers and also Pm_base which we'll need + // at the end. + save_regs(); + +#ifndef PRODUCT + // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + { + ldr(Rn, Address(Pn_base, 0)); + mul(Rlo_mn, Rn, inv); + cmp(Rlo_mn, -1); + Label ok; + br(EQ, ok); { + stop("broken inverse in Montgomery multiply"); + } bind(ok); + } +#endif + + mov(Pm_base, Ra); + + mov(t0, zr); + mov(t1, zr); + mov(t2, zr); + + block_comment("for (int i = 0; i < len; i++) {"); + mov(Ri, zr); { + Label loop, end; + cmpw(Ri, Rlen); + br(Assembler::GE, end); + + bind(loop); + pre1(Ri); + + block_comment(" for (j = i; j; j--) {"); { + movw(Rj, Ri); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step); + } block_comment(" } // j"); + + post1(); + addw(Ri, Ri, 1); + cmpw(Ri, Rlen); + br(Assembler::LT, loop); + bind(end); + block_comment("} // i"); + } + + block_comment("for (int i = len; i < 2*len; i++) {"); + mov(Ri, Rlen); { + Label loop, end; + cmpw(Ri, Rlen, Assembler::LSL, 1); + br(Assembler::GE, end); + + bind(loop); + pre2(Ri, Rlen); + + block_comment(" for (j = len*2-i-1; j; j--) {"); { + lslw(Rj, Rlen, 1); + subw(Rj, Rj, Ri); + subw(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step); + } block_comment(" } // j"); + + post2(Ri, Rlen); + addw(Ri, Ri, 1); + cmpw(Ri, Rlen, Assembler::LSL, 1); + br(Assembler::LT, loop); + bind(end); + } + block_comment("} // i"); + + normalize(Rlen); + + mov(Ra, Pm_base); // Save Pm_base in Ra + restore_regs(); // Restore caller's Pm_base + + // Copy our result into caller's Pm_base + reverse(Pm_base, Ra, Rlen, t0, t1); + + leave(); + bind(nothing); + ret(lr); + + return entry; + } + // In C, approximately: + + // void + // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[], + // unsigned long Pn_base[], unsigned long Pm_base[], + // unsigned long inv, int len) { + // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + // unsigned long *Pa, *Pb, *Pn, *Pm; + // unsigned long Ra, Rb, Rn, Rm; + + // int i; + + // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); + + // for (i = 0; i < len; i++) { + // int j; + + // Pa = Pa_base; + // Pb = Pb_base + i; + // Pm = Pm_base; + // Pn = Pn_base + i; + + // Ra = *Pa; + // Rb = *Pb; + // Rm = *Pm; + // Rn = *Pn; + + // int iters = i; + // for (j = 0; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + + // assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // *Pm = Rm = t0 * inv; + // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + + // assert(t0 == 0, "broken Montgomery multiply"); + + // t0 = t1; t1 = t2; t2 = 0; + // } + + // for (i = len; i < 2*len; i++) { + // int j; + + // Pa = Pa_base + i-len; + // Pb = Pb_base + len; + // Pm = Pm_base + i-len; + // Pn = Pn_base + len; + + // Ra = *++Pa; + // Rb = *--Pb; + // Rm = *++Pm; + // Rn = *--Pn; + + // int iters = len*2-i-1; + // for (j = i-len+1; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + + // Pm_base[i-len] = t0; + // t0 = t1; t1 = t2; t2 = 0; + // } + + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + // } + + /** + * Fast Montgomery squaring. This uses asymptotically 25% fewer + * multiplies than Montgomery multiplication so it should be up to + * 25% faster. However, its loop control is more complex and it + * may actually run slower on some machines. + * + * Arguments: + * + * Inputs: + * c_rarg0 - int array elements a + * c_rarg1 - int array elements n (the modulus) + * c_rarg2 - int length + * c_rarg3 - int inv + * c_rarg4 - int array elements m (the result) + * + */ + address generate_square() { + Label argh; + bind(argh); + stop("MontgomeryMultiply total_allocation must be <= 8192"); + + align(CodeEntryAlignment); + address entry = pc(); + + enter(); + + // Make room. + cmpw(Rlen, 512); + br(Assembler::HI, argh); + sub(Ra, sp, Rlen, ext::uxtw, exact_log2(4 * sizeof (jint))); + andr(sp, Ra, -2 * wordSize); + + lsrw(Rlen, Rlen, 1); // length in longwords = len/2 + + { + // Copy input args, reversing as we go. We use Ra as a + // temporary variable. + reverse(Ra, Pa_base, Rlen, t0, t1); + reverse(Ra, Pn_base, Rlen, t0, t1); + } + + // Push all call-saved registers and also Pm_base which we'll need + // at the end. + save_regs(); + + mov(Pm_base, Ra); + + mov(t0, zr); + mov(t1, zr); + mov(t2, zr); + + block_comment("for (int i = 0; i < len; i++) {"); + mov(Ri, zr); { + Label loop, end; + bind(loop); + cmp(Ri, Rlen); + br(Assembler::GE, end); + + pre1(Ri); + + block_comment("for (j = (i+1)/2; j; j--) {"); { + add(Rj, Ri, 1); + lsr(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); + } block_comment(" } // j"); + + last_squaring(Ri); + + block_comment(" for (j = i/2; j; j--) {"); { + lsr(Rj, Ri, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); + } block_comment(" } // j"); + + post1_squaring(); + add(Ri, Ri, 1); + cmp(Ri, Rlen); + br(Assembler::LT, loop); + + bind(end); + block_comment("} // i"); + } + + block_comment("for (int i = len; i < 2*len; i++) {"); + mov(Ri, Rlen); { + Label loop, end; + bind(loop); + cmp(Ri, Rlen, Assembler::LSL, 1); + br(Assembler::GE, end); + + pre2(Ri, Rlen); + + block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); { + lsl(Rj, Rlen, 1); + sub(Rj, Rj, Ri); + sub(Rj, Rj, 1); + lsr(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); + } block_comment(" } // j"); + + last_squaring(Ri); + + block_comment(" for (j = (2*len-i)/2; j; j--) {"); { + lsl(Rj, Rlen, 1); + sub(Rj, Rj, Ri); + lsr(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); + } block_comment(" } // j"); + + post2(Ri, Rlen); + add(Ri, Ri, 1); + cmp(Ri, Rlen, Assembler::LSL, 1); + + br(Assembler::LT, loop); + bind(end); + block_comment("} // i"); + } + + normalize(Rlen); + + mov(Ra, Pm_base); // Save Pm_base in Ra + restore_regs(); // Restore caller's Pm_base + + // Copy our result into caller's Pm_base + reverse(Pm_base, Ra, Rlen, t0, t1); + + leave(); + ret(lr); + + return entry; + } + // In C, approximately: + + // void + // montgomery_square(unsigned long Pa_base[], unsigned long Pn_base[], + // unsigned long Pm_base[], unsigned long inv, int len) { + // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + // unsigned long *Pa, *Pb, *Pn, *Pm; + // unsigned long Ra, Rb, Rn, Rm; + + // int i; + + // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); + + // for (i = 0; i < len; i++) { + // int j; + + // Pa = Pa_base; + // Pb = Pa_base + i; + // Pm = Pm_base; + // Pn = Pn_base + i; + + // Ra = *Pa; + // Rb = *Pb; + // Rm = *Pm; + // Rn = *Pn; + + // int iters = (i+1)/2; + // for (j = 0; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pa_base[i-j], "must be"); + // MACC2(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + // if ((i & 1) == 0) { + // assert(Ra == Pa_base[j], "must be"); + // MACC(Ra, Ra, t0, t1, t2); + // } + // iters = i/2; + // assert(iters == i-j, "must be"); + // for (; iters--; j++) { + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + + // *Pm = Rm = t0 * inv; + // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + + // assert(t0 == 0, "broken Montgomery multiply"); + + // t0 = t1; t1 = t2; t2 = 0; + // } + + // for (i = len; i < 2*len; i++) { + // int start = i-len+1; + // int end = start + (len - start)/2; + // int j; + + // Pa = Pa_base + i-len; + // Pb = Pa_base + len; + // Pm = Pm_base + i-len; + // Pn = Pn_base + len; + + // Ra = *++Pa; + // Rb = *--Pb; + // Rm = *++Pm; + // Rn = *--Pn; + + // int iters = (2*len-i-1)/2; + // assert(iters == end-start, "must be"); + // for (j = start; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pa_base[i-j], "must be"); + // MACC2(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + // if ((i & 1) == 0) { + // assert(Ra == Pa_base[j], "must be"); + // MACC(Ra, Ra, t0, t1, t2); + // } + // iters = (2*len-i)/2; + // assert(iters == len-j, "must be"); + // for (; iters--; j++) { + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + // Pm_base[i-len] = t0; + // t0 = t1; t1 = t2; t2 = 0; + // } + + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + // } + }; + + // Initialization + void generate_initial() { + // Generate initial stubs and initializes the entry points + + // entry points that exist in all platforms Note: This is code + // that could be shared among different platforms - however the + // benefit seems to be smaller than the disadvantage of having a + // much more complicated generator structure. See also comment in + // stubRoutines.hpp. + + StubRoutines::_forward_exception_entry = generate_forward_exception(); + + StubRoutines::_call_stub_entry = + generate_call_stub(StubRoutines::_call_stub_return_address); + + // is referenced by megamorphic call + StubRoutines::_catch_exception_entry = generate_catch_exception(); + + // Build this early so it's available for the interpreter. + StubRoutines::_throw_StackOverflowError_entry = + generate_throw_exception("StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_StackOverflowError)); + if (UseCRC32Intrinsics) { + // set table address before stub generation which use it + StubRoutines::_crc_table_adr = (address)StubRoutines::aarch64::_crc_table; + StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); + } + } + + void generate_all() { + // support for verify_oop (must happen after universe_init) + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); + StubRoutines::_throw_AbstractMethodError_entry = + generate_throw_exception("AbstractMethodError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_AbstractMethodError)); + + StubRoutines::_throw_IncompatibleClassChangeError_entry = + generate_throw_exception("IncompatibleClassChangeError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_IncompatibleClassChangeError)); + + StubRoutines::_throw_NullPointerException_at_call_entry = + generate_throw_exception("NullPointerException at call throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_NullPointerException_at_call)); + + // arraycopy stubs used by compilers + generate_arraycopy_stubs(); + + if (UseMultiplyToLenIntrinsic) { + StubRoutines::_multiplyToLen = generate_multiplyToLen(); + } + + if (UseMontgomeryMultiplyIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); + MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); + StubRoutines::_montgomeryMultiply = g.generate_multiply(); + } + + if (UseMontgomerySquareIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); + MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); + // We use generate_multiply() rather than generate_square() + // because it's faster for the sizes of modulus we care about. + StubRoutines::_montgomerySquare = g.generate_multiply(); + } + + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); + } + + if (UseSHA1Intrinsics) { + StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); + StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); + } + if (UseSHA256Intrinsics) { + StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); + StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); + } + + // Safefetch stubs. + generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, + &StubRoutines::_safefetch32_fault_pc, + &StubRoutines::_safefetch32_continuation_pc); + generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, + &StubRoutines::_safefetchN_fault_pc, + &StubRoutines::_safefetchN_continuation_pc); + } + + public: + StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { + if (all) { + generate_all(); + } else { + generate_initial(); + } + } +}; // end class declaration + +void StubGenerator_generate(CodeBuffer* code, bool all) { + StubGenerator g(code, all); +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/stubRoutines_aarch64.cpp 2021-01-25 19:31:57.923699160 +0000 @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" + +// Implementation of the platform-specific part of StubRoutines - for +// a description of how to extend it, see the stubRoutines.hpp file. + +address StubRoutines::aarch64::_get_previous_fp_entry = NULL; +address StubRoutines::aarch64::_get_previous_sp_entry = NULL; + +address StubRoutines::aarch64::_f2i_fixup = NULL; +address StubRoutines::aarch64::_f2l_fixup = NULL; +address StubRoutines::aarch64::_d2i_fixup = NULL; +address StubRoutines::aarch64::_d2l_fixup = NULL; +address StubRoutines::aarch64::_float_sign_mask = NULL; +address StubRoutines::aarch64::_float_sign_flip = NULL; +address StubRoutines::aarch64::_double_sign_mask = NULL; +address StubRoutines::aarch64::_double_sign_flip = NULL; +address StubRoutines::aarch64::_zero_longs = NULL; + +/** + * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h + */ +juint StubRoutines::aarch64::_crc_table[] + __attribute__ ((aligned(4096))) = +{ + // Table 0 + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL, + + // Table 1 + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL, + + // Table 2 + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL, + + // Table 3 + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL, + // Constants for Neon CRC232 implementation + // k3 = 0x78ED02D5 = x^288 mod poly - bit reversed + // k4 = 0xED627DAE = x^256 mod poly - bit reversed + 0x78ED02D5UL, 0xED627DAEUL, // k4:k3 + 0xED78D502UL, 0x62EDAE7DUL, // byte swap + 0x02D578EDUL, 0x7DAEED62UL, // word swap + 0xD502ED78UL, 0xAE7D62EDUL, // byte swap of word swap +}; --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/stubRoutines_aarch64.hpp 2021-01-25 19:31:58.436704552 +0000 @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2003, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_STUBROUTINES_AARCH64_HPP +#define CPU_AARCH64_VM_STUBROUTINES_AARCH64_HPP + +// This file holds the platform specific parts of the StubRoutines +// definition. See stubRoutines.hpp for a description on how to +// extend it. + +static bool returns_to_call_stub(address return_pc) { + return return_pc == _call_stub_return_address; +} + +enum platform_dependent_constants { + code_size1 = 19000, // simply increase if too small (assembler will crash if too small) + code_size2 = 22000 // simply increase if too small (assembler will crash if too small) +}; + +class aarch64 { + friend class StubGenerator; + + private: + static address _get_previous_fp_entry; + static address _get_previous_sp_entry; + + static address _f2i_fixup; + static address _f2l_fixup; + static address _d2i_fixup; + static address _d2l_fixup; + + static address _float_sign_mask; + static address _float_sign_flip; + static address _double_sign_mask; + static address _double_sign_flip; + + static address _zero_longs; + + public: + + static address get_previous_fp_entry() + { + return _get_previous_fp_entry; + } + + static address get_previous_sp_entry() + { + return _get_previous_sp_entry; + } + + static address f2i_fixup() + { + return _f2i_fixup; + } + + static address f2l_fixup() + { + return _f2l_fixup; + } + + static address d2i_fixup() + { + return _d2i_fixup; + } + + static address d2l_fixup() + { + return _d2l_fixup; + } + + static address float_sign_mask() + { + return _float_sign_mask; + } + + static address float_sign_flip() + { + return _float_sign_flip; + } + + static address double_sign_mask() + { + return _double_sign_mask; + } + + static address double_sign_flip() + { + return _double_sign_flip; + } + + static address get_zero_longs() + { + return _zero_longs; + } + + private: + static juint _crc_table[]; + +}; + +#endif // CPU_AARCH64_VM_STUBROUTINES_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.hpp 2021-01-25 19:31:58.954709996 +0000 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_TEMPLATEINTERPRETERGENERATOR_AARCH64_HPP +#define CPU_AARCH64_VM_TEMPLATEINTERPRETERGENERATOR_AARCH64_HPP + + protected: + +void generate_fixed_frame(bool native_call); + + // address generate_asm_interpreter_entry(bool synchronized); + +#endif // CPU_AARCH64_VM_TEMPLATEINTERPRETERGENERATOR_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp 2021-01-25 19:31:59.416714852 +0000 @@ -0,0 +1,2065 @@ +/* + * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "interpreter/bytecodeTracer.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#include + +#ifndef PRODUCT +#include "oops/method.hpp" +#endif // !PRODUCT + +#define __ _masm-> + +#ifndef CC_INTERP + +//----------------------------------------------------------------------------- + +extern "C" void entry(CodeBuffer*); + +//----------------------------------------------------------------------------- + +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + address entry = __ pc(); + +#ifdef ASSERT + { + Label L; + __ ldr(rscratch1, Address(rfp, + frame::interpreter_frame_monitor_block_top_offset * + wordSize)); + __ mov(rscratch2, sp); + __ cmp(rscratch1, rscratch2); // maximal rsp for current rfp (stack + // grows negative) + __ br(Assembler::HS, L); // check if frame is complete + __ stop ("interpreter frame not set up"); + __ bind(L); + } +#endif // ASSERT + // Restore bcp under the assumption that the current frame is still + // interpreted + __ restore_bcp(); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // throw exception + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_StackOverflowError)); + return entry; +} + +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler( + const char* name) { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // setup parameters + // ??? convention: expect aberrant index in register r1 + __ movw(c_rarg2, r1); + __ mov(c_rarg1, (address)name); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + throw_ArrayIndexOutOfBoundsException), + c_rarg1, c_rarg2); + return entry; +} + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() { + address entry = __ pc(); + + // object is at TOS + __ pop(c_rarg1); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + throw_ClassCastException), + c_rarg1); + return entry; +} + +address TemplateInterpreterGenerator::generate_exception_handler_common( + const char* name, const char* message, bool pass_oop) { + assert(!pass_oop || message == NULL, "either oop or message but not both"); + address entry = __ pc(); + if (pass_oop) { + // object is at TOS + __ pop(c_rarg2); + } + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // setup parameters + __ lea(c_rarg1, Address((address)name)); + if (pass_oop) { + __ call_VM(r0, CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + create_klass_exception), + c_rarg1, c_rarg2); + } else { + // kind of lame ExternalAddress can't take NULL because + // external_word_Relocation will assert. + if (message != NULL) { + __ lea(c_rarg2, Address((address)message)); + } else { + __ mov(c_rarg2, NULL_WORD); + } + __ call_VM(r0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), + c_rarg1, c_rarg2); + } + // throw exception + __ b(address(Interpreter::throw_exception_entry())); + return entry; +} + +address TemplateInterpreterGenerator::generate_continuation_for(TosState state) { + address entry = __ pc(); + // NULL last_sp until next java call + __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ dispatch_next(state); + return entry; +} + +address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { + address entry = __ pc(); + + // Restore stack bottom in case i2c adjusted stack + __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + // and NULL it as marker that esp is now tos until next java call + __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ restore_bcp(); + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_method(rmethod); + + // Pop N words from the stack + __ get_cache_and_index_at_bcp(r1, r2, 1, index_size); + __ ldr(r1, Address(r1, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ andr(r1, r1, ConstantPoolCacheEntry::parameter_size_mask); + + __ add(esp, esp, r1, Assembler::LSL, 3); + + // Restore machine SP + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2); + __ ldr(rscratch2, + Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); + __ sub(rscratch1, rscratch2, rscratch1, ext::uxtw, 3); + __ andr(sp, rscratch1, -16); + + __ get_dispatch(); + __ dispatch_next(state, step); + + return entry; +} + +address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, + int step) { + address entry = __ pc(); + __ restore_bcp(); + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_method(rmethod); + __ get_dispatch(); + + // Calculate stack limit + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + + (EnableInvokeDynamic ? 2 : 0)); + __ ldr(rscratch2, + Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); + __ sub(rscratch1, rscratch2, rscratch1, ext::uxtx, 3); + __ andr(sp, rscratch1, -16); + + // Restore expression stack pointer + __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + // NULL last_sp until next java call + __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + + // handle exceptions + { + Label L; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + __ dispatch_next(state, step); + return entry; +} + + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : i = 4; break; + case T_LONG : i = 5; break; + case T_VOID : i = 6; break; + case T_FLOAT : i = 7; break; + case T_DOUBLE : i = 8; break; + case T_OBJECT : i = 9; break; + case T_ARRAY : i = 9; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, + "index out of bounds"); + return i; +} + + +address TemplateInterpreterGenerator::generate_result_handler_for( + BasicType type) { + address entry = __ pc(); + switch (type) { + case T_BOOLEAN: __ c2bool(r0); break; + case T_CHAR : __ uxth(r0, r0); break; + case T_BYTE : __ sxtb(r0, r0); break; + case T_SHORT : __ sxth(r0, r0); break; + case T_INT : __ uxtw(r0, r0); break; // FIXME: We almost certainly don't need this + case T_LONG : /* nothing to do */ break; + case T_VOID : /* nothing to do */ break; + case T_FLOAT : /* nothing to do */ break; + case T_DOUBLE : /* nothing to do */ break; + case T_OBJECT : + // retrieve result from frame + __ ldr(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize)); + // and verify it + __ verify_oop(r0); + break; + default : ShouldNotReachHere(); + } + __ ret(lr); // return from result handler + return entry; +} + +address TemplateInterpreterGenerator::generate_safept_entry_for( + TosState state, + address runtime_entry) { + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); + __ membar(Assembler::AnyAny); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); + return entry; +} + +// Helpers for commoning out cases in the various type of method entries. +// + + +// increment invocation count & check for overflow +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test +// +// rmethod: method +// +void InterpreterGenerator::generate_counter_incr( + Label* overflow, + Label* profile_method, + Label* profile_method_continue) { + Label done; + // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. + if (TieredCompilation) { + int increment = InvocationCounter::count_increment; + int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift; + Label no_mdo; + if (ProfileInterpreter) { + // Are we profiling? + __ ldr(r0, Address(rmethod, Method::method_data_offset())); + __ cbz(r0, no_mdo); + // Increment counter in the MDO + const Address mdo_invocation_counter(r0, in_bytes(MethodData::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, rscratch1, rscratch2, false, Assembler::EQ, overflow); + __ b(done); + } + __ bind(no_mdo); + // Increment counter in MethodCounters + const Address invocation_counter(rscratch2, + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + __ get_method_counters(rmethod, rscratch2, done); + __ increment_mask_and_jump(invocation_counter, increment, mask, rscratch1, rscratch2, false, Assembler::EQ, overflow); + __ bind(done); + } else { + const Address backedge_counter(rscratch2, + MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset()); + const Address invocation_counter(rscratch2, + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + + __ get_method_counters(rmethod, rscratch2, done); + + if (ProfileInterpreter) { // %%% Merge this into MethodData* + __ ldrw(r1, Address(rscratch2, MethodCounters::interpreter_invocation_counter_offset())); + __ addw(r1, r1, 1); + __ strw(r1, Address(rscratch2, MethodCounters::interpreter_invocation_counter_offset())); + } + // Update standard invocation counters + __ ldrw(r1, invocation_counter); + __ ldrw(r0, backedge_counter); + + __ addw(r1, r1, InvocationCounter::count_increment); + __ andw(r0, r0, InvocationCounter::count_mask_value); + + __ strw(r1, invocation_counter); + __ addw(r0, r0, r1); // add both counters + + // profile_method is non-null only for interpreted method so + // profile_method != NULL == !native_call + + if (ProfileInterpreter && profile_method != NULL) { + // Test to see if we should create a method data oop + unsigned long offset; + __ adrp(rscratch2, ExternalAddress((address)&InvocationCounter::InterpreterProfileLimit), + offset); + __ ldrw(rscratch2, Address(rscratch2, offset)); + __ cmp(r0, rscratch2); + __ br(Assembler::LT, *profile_method_continue); + + // if no method data exists, go to profile_method + __ test_method_data_pointer(rscratch2, *profile_method); + } + + { + unsigned long offset; + __ adrp(rscratch2, + ExternalAddress((address)&InvocationCounter::InterpreterInvocationLimit), + offset); + __ ldrw(rscratch2, Address(rscratch2, offset)); + __ cmpw(r0, rscratch2); + __ br(Assembler::HS, *overflow); + } + __ bind(done); + } +} + +void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { + + // Asm interpreter on entry + // On return (i.e. jump to entry_point) [ back to invocation of interpreter ] + // Everything as it was on entry + + // InterpreterRuntime::frequency_counter_overflow takes two + // arguments, the first (thread) is passed by call_VM, the second + // indicates if the counter overflow occurs at a backwards branch + // (NULL bcp). We pass zero for it. The call returns the address + // of the verified entry point for the method or NULL if the + // compilation did not complete (either went background or bailed + // out). + __ mov(c_rarg1, 0); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), + c_rarg1); + + __ b(*do_continue); +} + +// See if we've got enough room on the stack for locals plus overhead. +// The expression stack grows down incrementally, so the normal guard +// page mechanism will work for that. +// +// NOTE: Since the additional locals are also always pushed (wasn't +// obvious in generate_method_entry) so the guard should work for them +// too. +// +// Args: +// r3: number of additional locals this frame needs (what we must check) +// rmethod: Method* +// +// Kills: +// r0 +void InterpreterGenerator::generate_stack_overflow_check(void) { + + // monitor entry size: see picture of stack set + // (generate_method_entry) and frame_amd64.hpp + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + // total overhead size: entry_size + (saved rbp through expr stack + // bottom). be sure to change this if you add/subtract anything + // to/from the overhead area + const int overhead_size = + -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size; + + const int page_size = os::vm_page_size(); + + Label after_frame_check; + + // see if the frame is greater than one page in size. If so, + // then we need to verify there is enough stack space remaining + // for the additional locals. + // + // Note that we use SUBS rather than CMP here because the immediate + // field of this instruction may overflow. SUBS can cope with this + // because it is a macro that will expand to some number of MOV + // instructions and a register operation. + __ subs(rscratch1, r3, (page_size - overhead_size) / Interpreter::stackElementSize); + __ br(Assembler::LS, after_frame_check); + + // compute rsp as if this were going to be the last frame on + // the stack before the red zone + + const Address stack_base(rthread, Thread::stack_base_offset()); + const Address stack_size(rthread, Thread::stack_size_offset()); + + // locals + overhead, in bytes + __ mov(r0, overhead_size); + __ add(r0, r0, r3, Assembler::LSL, Interpreter::logStackElementSize); // 2 slots per parameter. + + __ ldr(rscratch1, stack_base); + __ ldr(rscratch2, stack_size); + +#ifdef ASSERT + Label stack_base_okay, stack_size_okay; + // verify that thread stack base is non-zero + __ cbnz(rscratch1, stack_base_okay); + __ stop("stack base is zero"); + __ bind(stack_base_okay); + // verify that thread stack size is non-zero + __ cbnz(rscratch2, stack_size_okay); + __ stop("stack size is zero"); + __ bind(stack_size_okay); +#endif + + // Add stack base to locals and subtract stack size + __ sub(rscratch1, rscratch1, rscratch2); // Stack limit + __ add(r0, r0, rscratch1); + + // Use the maximum number of pages we might bang. + const int max_pages = StackShadowPages > (StackRedPages+StackYellowPages) ? StackShadowPages : + (StackRedPages+StackYellowPages); + + // add in the red and yellow zone sizes + __ add(r0, r0, max_pages * page_size * 2); + + // check against the current stack bottom + __ cmp(sp, r0); + __ br(Assembler::HI, after_frame_check); + + // Remove the incoming args, peeling the machine SP back to where it + // was in the caller. This is not strictly necessary, but unless we + // do so the stack frame may have a garbage FP; this ensures a + // correct call stack that we can always unwind. The ANDR should be + // unnecessary because the sender SP in r13 is always aligned, but + // it doesn't hurt. + __ andr(sp, r13, -16); + + // Note: the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry())); + + // all done with frame size check + __ bind(after_frame_check); +} + +// Allocate monitor and lock method (asm interpreter) +// +// Args: +// rmethod: Method* +// rlocals: locals +// +// Kills: +// r0 +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs) +// rscratch1, rscratch2 (scratch regs) +void InterpreterGenerator::lock_method(void) { + // synchronize method + const Address access_flags(rmethod, Method::access_flags_offset()); + const Address monitor_block_top( + rfp, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + +#ifdef ASSERT + { + Label L; + __ ldrw(r0, access_flags); + __ tst(r0, JVM_ACC_SYNCHRONIZED); + __ br(Assembler::NE, L); + __ stop("method doesn't need synchronization"); + __ bind(L); + } +#endif // ASSERT + + // get synchronization object + { + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + Label done; + __ ldrw(r0, access_flags); + __ tst(r0, JVM_ACC_STATIC); + // get receiver (assume this is frequent case) + __ ldr(r0, Address(rlocals, Interpreter::local_offset_in_bytes(0))); + __ br(Assembler::EQ, done); + __ ldr(r0, Address(rmethod, Method::const_offset())); + __ ldr(r0, Address(r0, ConstMethod::constants_offset())); + __ ldr(r0, Address(r0, + ConstantPool::pool_holder_offset_in_bytes())); + __ ldr(r0, Address(r0, mirror_offset)); + +#ifdef ASSERT + { + Label L; + __ cbnz(r0, L); + __ stop("synchronization object is NULL"); + __ bind(L); + } +#endif // ASSERT + + __ bind(done); + } + + // add space for monitor & lock + __ sub(sp, sp, entry_size); // add space for a monitor entry + __ sub(esp, esp, entry_size); + __ mov(rscratch1, esp); + __ str(rscratch1, monitor_block_top); // set new monitor block top + // store object + __ str(r0, Address(esp, BasicObjectLock::obj_offset_in_bytes())); + __ mov(c_rarg1, esp); // object address + __ lock_object(c_rarg1); +} + +// Generate a fixed interpreter frame. This is identical setup for +// interpreted methods and for native methods hence the shared code. +// +// Args: +// lr: return address +// rmethod: Method* +// rlocals: pointer to locals +// rcpool: cp cache +// stack_pointer: previous sp +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + // initialize fixed part of activation frame + if (native_call) { + __ sub(esp, sp, 12 * wordSize); + __ mov(rbcp, zr); + __ stp(esp, zr, Address(__ pre(sp, -12 * wordSize))); + // add 2 zero-initialized slots for native calls + __ stp(zr, zr, Address(sp, 10 * wordSize)); + } else { + __ sub(esp, sp, 10 * wordSize); + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); // get ConstMethod + __ add(rbcp, rscratch1, in_bytes(ConstMethod::codes_offset())); // get codebase + __ stp(esp, rbcp, Address(__ pre(sp, -10 * wordSize))); + } + + if (ProfileInterpreter) { + Label method_data_continue; + __ ldr(rscratch1, Address(rmethod, Method::method_data_offset())); + __ cbz(rscratch1, method_data_continue); + __ lea(rscratch1, Address(rscratch1, in_bytes(MethodData::data_offset()))); + __ bind(method_data_continue); + __ stp(rscratch1, rmethod, Address(sp, 4 * wordSize)); // save Method* and mdp (method data pointer) + } else { + __ stp(zr, rmethod, Address(sp, 4 * wordSize)); // save Method* (no mdp) + } + + __ ldr(rcpool, Address(rmethod, Method::const_offset())); + __ ldr(rcpool, Address(rcpool, ConstMethod::constants_offset())); + __ ldr(rcpool, Address(rcpool, ConstantPool::cache_offset_in_bytes())); + __ stp(rlocals, rcpool, Address(sp, 2 * wordSize)); + + __ stp(rfp, lr, Address(sp, 8 * wordSize)); + __ lea(rfp, Address(sp, 8 * wordSize)); + + // set sender sp + // leave last_sp as null + __ stp(zr, r13, Address(sp, 6 * wordSize)); + + // Move SP out of the way + if (! native_call) { + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + + (EnableInvokeDynamic ? 2 : 0)); + __ sub(rscratch1, sp, rscratch1, ext::uxtw, 3); + __ andr(sp, rscratch1, -16); + } +} + +// End of helpers + +// Various method entries +//------------------------------------------------------------------------------------------------------------------------ +// +// + +// Call an accessor method (assuming it is resolved, otherwise drop +// into vanilla (slow path) entry +address InterpreterGenerator::generate_accessor_entry(void) { + return NULL; +} + +// Method entry for java.lang.ref.Reference.get. +address InterpreterGenerator::generate_Reference_get_entry(void) { +#if INCLUDE_ALL_GCS + // Code: _aload_0, _getfield, _areturn + // parameter size = 1 + // + // The code that gets generated by this routine is split into 2 parts: + // 1. The "intrinsified" code for G1 (or any SATB based GC), + // 2. The slow path - which is an expansion of the regular method entry. + // + // Notes:- + // * In the G1 code we do not check whether we need to block for + // a safepoint. If G1 is enabled then we must execute the specialized + // code for Reference.get (except when the Reference object is null) + // so that we can log the value in the referent field with an SATB + // update buffer. + // If the code for the getfield template is modified so that the + // G1 pre-barrier code is executed when the current method is + // Reference.get() then going through the normal method entry + // will be fine. + // * The G1 code can, however, check the receiver object (the instance + // of java.lang.Reference) and jump to the slow path if null. If the + // Reference object is null then we obviously cannot fetch the referent + // and so we don't need to call the G1 pre-barrier. Thus we can use the + // regular method entry code to generate the NPE. + // + // This code is based on generate_accessor_entry. + // + // rmethod: Method* + // r13: senderSP must preserve for slow path, set SP to it on fast path + + address entry = __ pc(); + + const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); + + if (UseG1GC) { + Label slow_path; + const Register local_0 = c_rarg0; + // Check if local 0 != NULL + // If the receiver is null then it is OK to jump to the slow path. + __ ldr(local_0, Address(esp, 0)); + __ cbz(local_0, slow_path); + + // Load the value of the referent field. + const Address field_address(local_0, referent_offset); + __ load_heap_oop(local_0, field_address); + + __ mov(r19, r13); // Move senderSP to a callee-saved register + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + __ enter(); // g1_write may call runtime + __ g1_write_barrier_pre(noreg /* obj */, + local_0 /* pre_val */, + rthread /* thread */, + rscratch2 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + __ leave(); + // areturn + __ andr(sp, r19, -16); // done with stack + __ ret(lr); + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + (void) generate_normal_entry(false); + + return entry; + } +#endif // INCLUDE_ALL_GCS + + // If G1 is not enabled then attempt to go through the accessor entry point + // Reference.get is an accessor + return generate_accessor_entry(); +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address InterpreterGenerator::generate_CRC32_update_entry() { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rmethod: Method* + // r13: senderSP must preserved for slow path + // esp: args + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + unsigned long offset; + __ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset); + __ ldrw(rscratch1, Address(rscratch1, offset)); + assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); + __ cbnz(rscratch1, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters + const Register crc = c_rarg0; // crc + const Register val = c_rarg1; // source java byte value + const Register tbl = c_rarg2; // scratch + + // Arguments are reversed on java expression stack + __ ldrw(val, Address(esp, 0)); // byte value + __ ldrw(crc, Address(esp, wordSize)); // Initial CRC + + __ adrp(tbl, ExternalAddress(StubRoutines::crc_table_addr()), offset); + __ add(tbl, tbl, offset); + + __ ornw(crc, zr, crc); // ~crc + __ update_byte_crc32(crc, val, tbl); + __ ornw(crc, zr, crc); // ~crc + + // result in c_rarg0 + + __ andr(sp, r13, -16); + __ ret(lr); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + + (void) generate_native_entry(false); + + return entry; + } + return generate_native_entry(false); +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rmethod,: Method* + // r13: senderSP must preserved for slow path + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + unsigned long offset; + __ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset); + __ ldrw(rscratch1, Address(rscratch1, offset)); + assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); + __ cbnz(rscratch1, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters + const Register crc = c_rarg0; // crc + const Register buf = c_rarg1; // source java byte array address + const Register len = c_rarg2; // length + const Register off = len; // offset (never overlaps with 'len') + + // Arguments are reversed on java expression stack + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { + __ ldr(buf, Address(esp, 2*wordSize)); // long buf + __ ldrw(off, Address(esp, wordSize)); // offset + __ add(buf, buf, off); // + offset + __ ldrw(crc, Address(esp, 4*wordSize)); // Initial CRC + } else { + __ ldr(buf, Address(esp, 2*wordSize)); // byte[] array + __ add(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ ldrw(off, Address(esp, wordSize)); // offset + __ add(buf, buf, off); // + offset + __ ldrw(crc, Address(esp, 3*wordSize)); // Initial CRC + } + // Can now load 'len' since we're finished with 'off' + __ ldrw(len, Address(esp, 0x0)); // Length + + __ andr(sp, r13, -16); // Restore the caller's SP + + // We are frameless so we can just jump to the stub. + __ b(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32())); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + + (void) generate_native_entry(false); + + return entry; + } + return generate_native_entry(false); +} + +void InterpreterGenerator::bang_stack_shadow_pages(bool native_call) { + // Bang each page in the shadow zone. We can't assume it's been done for + // an interpreter frame with greater than a page of locals, so each page + // needs to be checked. Only true for non-native. + if (UseStackBanging) { + const int start_page = native_call ? StackShadowPages : 1; + const int page_size = os::vm_page_size(); + for (int pages = start_page; pages <= StackShadowPages ; pages++) { + __ sub(rscratch2, sp, pages*page_size); + __ str(zr, Address(rscratch2)); + } + } +} + + +// Interpreter stub for calling a native method. (asm interpreter) +// This sets up a somewhat different looking stack for calling the +// native method than the typical interpreter frame setup. +address InterpreterGenerator::generate_native_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls; + + // r1: Method* + // rscratch1: sender sp + + address entry_point = __ pc(); + + const Address constMethod (rmethod, Method::const_offset()); + const Address access_flags (rmethod, Method::access_flags_offset()); + const Address size_of_parameters(r2, ConstMethod:: + size_of_parameters_offset()); + + // get parameter size (always needed) + __ ldr(r2, constMethod); + __ load_unsigned_short(r2, size_of_parameters); + + // native calls don't need the stack size check since they have no + // expression stack and the arguments are already on the stack and + // we only add a handful of words to the stack + + // rmethod: Method* + // r2: size of parameters + // rscratch1: sender sp + + // for natives the size of locals is zero + + // compute beginning of parameters (rlocals) + __ add(rlocals, esp, r2, ext::uxtx, 3); + __ add(rlocals, rlocals, -wordSize); + + // Pull SP back to minimum size: this avoids holes in the stack + __ andr(sp, esp, -16); + + // initialize fixed part of activation frame + generate_fixed_frame(true); + + // make sure method is native & not abstract +#ifdef ASSERT + __ ldrw(r0, access_flags); + { + Label L; + __ tst(r0, JVM_ACC_NATIVE); + __ br(Assembler::NE, L); + __ stop("tried to execute non-native method as native"); + __ bind(L); + } + { + Label L; + __ tst(r0, JVM_ACC_ABSTRACT); + __ br(Assembler::EQ, L); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception + // handler would try to exit the monitor of synchronized methods + // which hasn't been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation + // will check this flag. + + const Address do_not_unlock_if_synchronized(rthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + __ mov(rscratch2, true); + __ strb(rscratch2, do_not_unlock_if_synchronized); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + } + + Label continue_after_compile; + __ bind(continue_after_compile); + + bang_stack_shadow_pages(true); + + // reset the _do_not_unlock_if_synchronized flag + __ strb(zr, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { + Label L; + __ ldrw(r0, access_flags); + __ tst(r0, JVM_ACC_SYNCHRONIZED); + __ br(Assembler::EQ, L); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // start execution +#ifdef ASSERT + { + Label L; + const Address monitor_block_top(rfp, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ ldr(rscratch1, monitor_block_top); + __ cmp(esp, rscratch1); + __ br(Assembler::EQ, L); + __ stop("broken stack frame setup in interpreter"); + __ bind(L); + } +#endif + + // jvmti support + __ notify_method_entry(); + + // work registers + const Register t = r17; + const Register result_handler = r19; + + // allocate space for parameters + __ ldr(t, Address(rmethod, Method::const_offset())); + __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); + + __ sub(rscratch1, esp, t, ext::uxtx, Interpreter::logStackElementSize); + __ andr(sp, rscratch1, -16); + __ mov(esp, rscratch1); + + // get signature handler + { + Label L; + __ ldr(t, Address(rmethod, Method::signature_handler_offset())); + __ cbnz(t, L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), + rmethod); + __ ldr(t, Address(rmethod, Method::signature_handler_offset())); + __ bind(L); + } + + // call signature handler + assert(InterpreterRuntime::SignatureHandlerGenerator::from() == rlocals, + "adjust this code"); + assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp, + "adjust this code"); + assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == rscratch1, + "adjust this code"); + + // The generated handlers do not touch rmethod (the method). + // However, large signatures cannot be cached and are generated + // each time here. The slow-path generator can do a GC on return, + // so we must reload it after the call. + __ blr(t); + __ get_method(rmethod); // slow path can do a GC, reload rmethod + + + // result handler is in r0 + // set result handler + __ mov(result_handler, r0); + // pass mirror handle if static call + { + Label L; + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ldrw(t, Address(rmethod, Method::access_flags_offset())); + __ tst(t, JVM_ACC_STATIC); + __ br(Assembler::EQ, L); + // get mirror + __ ldr(t, Address(rmethod, Method::const_offset())); + __ ldr(t, Address(t, ConstMethod::constants_offset())); + __ ldr(t, Address(t, ConstantPool::pool_holder_offset_in_bytes())); + __ ldr(t, Address(t, mirror_offset)); + // copy mirror into activation frame + __ str(t, Address(rfp, frame::interpreter_frame_oop_temp_offset * wordSize)); + // pass handle to mirror + __ add(c_rarg1, rfp, frame::interpreter_frame_oop_temp_offset * wordSize); + __ bind(L); + } + + // get native function entry point in r10 + { + Label L; + __ ldr(r10, Address(rmethod, Method::native_function_offset())); + address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); + __ mov(rscratch2, unsatisfied); + __ ldr(rscratch2, rscratch2); + __ cmp(r10, rscratch2); + __ br(Assembler::NE, L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), + rmethod); + __ get_method(rmethod); + __ ldr(r10, Address(rmethod, Method::native_function_offset())); + __ bind(L); + } + + // pass JNIEnv + __ add(c_rarg0, rthread, in_bytes(JavaThread::jni_environment_offset())); + + // Set the last Java PC in the frame anchor to be the return address from + // the call to the native method: this will allow the debugger to + // generate an accurate stack trace. + Label native_return; + __ set_last_Java_frame(esp, rfp, native_return, rscratch1); + + // change thread state +#ifdef ASSERT + { + Label L; + __ ldrw(t, Address(rthread, JavaThread::thread_state_offset())); + __ cmp(t, _thread_in_Java); + __ br(Assembler::EQ, L); + __ stop("Wrong thread state in native stub"); + __ bind(L); + } +#endif + + // Change state to native + __ mov(rscratch1, _thread_in_native); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); + + // Call the native method. + __ blr(r10); + __ bind(native_return); + __ maybe_isb(); + __ get_method(rmethod); + // result potentially in r0 or v0 + + // make room for the pushes we're about to do + __ sub(rscratch1, esp, 4 * wordSize); + __ andr(sp, rscratch1, -16); + + // NOTE: The order of these pushes is known to frame::interpreter_frame_result + // in order to extract the result of a method call. If the order of these + // pushes change or anything else is added to the stack then the code in + // interpreter_frame_result must also change. + __ push(dtos); + __ push(ltos); + + // change thread state + __ mov(rscratch1, _thread_in_native_trans); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); + + if (os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ dsb(Assembler::SY); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(rthread, rscratch2); + } + } + + // check for safepoint operation in progress and/or pending suspend requests + { + Label Continue; + { + unsigned long offset; + __ adrp(rscratch2, SafepointSynchronize::address_of_state(), offset); + __ ldrw(rscratch2, Address(rscratch2, offset)); + } + assert(SafepointSynchronize::_not_synchronized == 0, + "SafepointSynchronize::_not_synchronized"); + Label L; + __ cbnz(rscratch2, L); + __ ldrw(rscratch2, Address(rthread, JavaThread::suspend_flags_offset())); + __ cbz(rscratch2, Continue); + __ bind(L); + + // Don't use call_VM as it will see a possible pending exception + // and forward it and never return here preventing us from + // clearing _last_native_pc down below. Also can't use + // call_VM_leaf either as it will check to see if r13 & r14 are + // preserved and correspond to the bcp/locals pointers. So we do a + // runtime call by hand. + // + __ mov(c_rarg0, rthread); + __ mov(rscratch2, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); + __ blr(rscratch2); + __ maybe_isb(); + __ get_method(rmethod); + __ reinit_heapbase(); + __ bind(Continue); + } + + // change thread state + __ mov(rscratch1, _thread_in_Java); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); + + // reset_last_Java_frame + __ reset_last_Java_frame(true); + + // reset handle block + __ ldr(t, Address(rthread, JavaThread::active_handles_offset())); + __ str(zr, Address(t, JNIHandleBlock::top_offset_in_bytes())); + + // If result is an oop unbox and store it in frame where gc will see it + // and result handler will pick it up + + { + Label no_oop, not_weak, store_result; + __ adr(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT))); + __ cmp(t, result_handler); + __ br(Assembler::NE, no_oop); + // Unbox oop result, e.g. JNIHandles::resolve result. + __ pop(ltos); + __ cbz(r0, store_result); // Use NULL as-is. + STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u); + __ tbz(r0, 0, not_weak); // Test for jweak tag. + // Resolve jweak. + __ ldr(r0, Address(r0, -JNIHandles::weak_tag_value)); +#if INCLUDE_ALL_GCS + if (UseG1GC) { + __ enter(); // Barrier may call runtime. + __ g1_write_barrier_pre(noreg /* obj */, + r0 /* pre_val */, + rthread /* thread */, + t /* tmp */, + true /* tosca_live */, + true /* expand_call */); + __ leave(); + } +#endif // INCLUDE_ALL_GCS + __ b(store_result); + __ bind(not_weak); + // Resolve (untagged) jobject. + __ ldr(r0, Address(r0, 0)); + __ bind(store_result); + __ str(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize)); + // keep stack depth as expected by pushing oop which will eventually be discarded + __ push(ltos); + __ bind(no_oop); + } + + { + Label no_reguard; + __ lea(rscratch1, Address(rthread, in_bytes(JavaThread::stack_guard_state_offset()))); + __ ldrb(rscratch1, Address(rscratch1)); + __ cmp(rscratch1, JavaThread::stack_guard_yellow_disabled); + __ br(Assembler::NE, no_reguard); + + __ pusha(); // XXX only save smashed registers + __ mov(c_rarg0, rthread); + __ mov(rscratch2, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); + __ blr(rscratch2); + __ popa(); // XXX only restore smashed registers + __ bind(no_reguard); + } + + // The method register is junk from after the thread_in_native transition + // until here. Also can't call_VM until the bcp has been + // restored. Need bcp for throwing exception below so get it now. + __ get_method(rmethod); + + // restore bcp to have legal interpreter frame, i.e., bci == 0 <=> + // rbcp == code_base() + __ ldr(rbcp, Address(rmethod, Method::const_offset())); // get ConstMethod* + __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset())); // get codebase + // handle exceptions (exception handling will handle unlocking!) + { + Label L; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, L); + // Note: At some point we may want to unify this with the code + // used in call_VM_base(); i.e., we should use the + // StubRoutines::forward_exception code. For now this doesn't work + // here because the rsp is not correctly set at this point. + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + // do unlocking if necessary + { + Label L; + __ ldrw(t, Address(rmethod, Method::access_flags_offset())); + __ tst(t, JVM_ACC_SYNCHRONIZED); + __ br(Assembler::EQ, L); + // the code below should be shared with interpreter macro + // assembler implementation + { + Label unlock; + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object + // has not been unlocked by an explicit monitorexit bytecode. + + // monitor expect in c_rarg1 for slow unlock path + __ lea (c_rarg1, Address(rfp, // address of first monitor + (intptr_t)(frame::interpreter_frame_initial_sp_offset * + wordSize - sizeof(BasicObjectLock)))); + + __ ldr(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ cbnz(t, unlock); + + // Entry already unlocked, need to throw exception + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + __ bind(unlock); + __ unlock_object(c_rarg1); + } + __ bind(L); + } + + // jvmti support + // Note: This must happen _after_ handling/throwing any exceptions since + // the exception handler code notifies the runtime of method exits + // too. If this happens before, method entry/exit notifications are + // not properly paired (was bug - gri 11/22/99). + __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); + + // restore potential result in r0:d0, call result handler to + // restore potential result in ST0 & handle result + + __ pop(ltos); + __ pop(dtos); + + __ blr(result_handler); + + // remove activation + __ ldr(esp, Address(rfp, + frame::interpreter_frame_sender_sp_offset * + wordSize)); // get sender sp + // remove frame anchor + __ leave(); + + // resture sender sp + __ mov(sp, esp); + + __ ret(lr); + + if (inc_counter) { + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(&continue_after_compile); + } + + return entry_point; +} + +// +// Generic interpreted method entry to (asm) interpreter +// +address InterpreterGenerator::generate_normal_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls; + + // rscratch1: sender sp + address entry_point = __ pc(); + + const Address constMethod(rmethod, Method::const_offset()); + const Address access_flags(rmethod, Method::access_flags_offset()); + const Address size_of_parameters(r3, + ConstMethod::size_of_parameters_offset()); + const Address size_of_locals(r3, ConstMethod::size_of_locals_offset()); + + // get parameter size (always needed) + // need to load the const method first + __ ldr(r3, constMethod); + __ load_unsigned_short(r2, size_of_parameters); + + // r2: size of parameters + + __ load_unsigned_short(r3, size_of_locals); // get size of locals in words + __ sub(r3, r3, r2); // r3 = no. of additional locals + + // see if we've got enough room on the stack for locals plus overhead. + generate_stack_overflow_check(); + + // compute beginning of parameters (rlocals) + __ add(rlocals, esp, r2, ext::uxtx, 3); + __ sub(rlocals, rlocals, wordSize); + + // Make room for locals + __ sub(rscratch1, esp, r3, ext::uxtx, 3); + __ andr(sp, rscratch1, -16); + + // r3 - # of additional locals + // allocate space for locals + // explicitly initialize locals + { + Label exit, loop; + __ ands(zr, r3, r3); + __ br(Assembler::LE, exit); // do nothing if r3 <= 0 + __ bind(loop); + __ str(zr, Address(__ post(rscratch1, wordSize))); + __ sub(r3, r3, 1); // until everything initialized + __ cbnz(r3, loop); + __ bind(exit); + } + + // And the base dispatch table + __ get_dispatch(); + + // initialize fixed part of activation frame + generate_fixed_frame(false); + + // make sure method is not native & not abstract +#ifdef ASSERT + __ ldrw(r0, access_flags); + { + Label L; + __ tst(r0, JVM_ACC_NATIVE); + __ br(Assembler::EQ, L); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { + Label L; + __ tst(r0, JVM_ACC_ABSTRACT); + __ br(Assembler::EQ, L); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception + // handler would try to exit the monitor of synchronized methods + // which hasn't been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation + // will check this flag. + + const Address do_not_unlock_if_synchronized(rthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + __ mov(rscratch2, true); + __ strb(rscratch2, do_not_unlock_if_synchronized); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + Label profile_method; + Label profile_method_continue; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, + &profile_method, + &profile_method_continue); + if (ProfileInterpreter) { + __ bind(profile_method_continue); + } + } + + Label continue_after_compile; + __ bind(continue_after_compile); + + bang_stack_shadow_pages(false); + + // reset the _do_not_unlock_if_synchronized flag + __ strb(zr, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + // Allocate monitor and lock method + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { + Label L; + __ ldrw(r0, access_flags); + __ tst(r0, JVM_ACC_SYNCHRONIZED); + __ br(Assembler::EQ, L); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // start execution +#ifdef ASSERT + { + Label L; + const Address monitor_block_top (rfp, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ ldr(rscratch1, monitor_block_top); + __ cmp(esp, rscratch1); + __ br(Assembler::EQ, L); + __ stop("broken stack frame setup in interpreter"); + __ bind(L); + } +#endif + + // jvmti support + __ notify_method_entry(); + + __ dispatch_next(vtos); + + // invocation counter overflow + if (inc_counter) { + if (ProfileInterpreter) { + // We have decided to profile this method in the interpreter + __ bind(profile_method); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ set_method_data_pointer_for_bcp(); + // don't think we need this + __ get_method(r1); + __ b(profile_method_continue); + } + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(&continue_after_compile); + } + + return entry_point; +} + +// Entry points +// +// Here we generate the various kind of entries into the interpreter. +// The two main entry type are generic bytecode methods and native +// call method. These both come in synchronized and non-synchronized +// versions but the frame layout they create is very similar. The +// other method entry types are really just special purpose entries +// that are really entry and interpretation all in one. These are for +// trivial methods like accessor, empty, or special math methods. +// +// When control flow reaches any of the entry types for the interpreter +// the following holds -> +// +// Arguments: +// +// rmethod: Method* +// +// Stack layout immediately at entry +// +// [ return address ] <--- rsp +// [ parameter n ] +// ... +// [ parameter 1 ] +// [ expression stack ] (caller's java expression stack) + +// Assuming that we don't go to one of the trivial specialized entries +// the stack will look like below when we are ready to execute the +// first bytecode (or call the native routine). The register usage +// will be as the template based interpreter expects (see +// interpreter_aarch64.hpp). +// +// local variables follow incoming parameters immediately; i.e. +// the return address is moved to the end of the locals). +// +// [ monitor entry ] <--- esp +// ... +// [ monitor entry ] +// [ expr. stack bottom ] +// [ saved rbcp ] +// [ current rlocals ] +// [ Method* ] +// [ saved rfp ] <--- rfp +// [ return address ] +// [ local variable m ] +// ... +// [ local variable 1 ] +// [ parameter n ] +// ... +// [ parameter 1 ] <--- rlocals + +address AbstractInterpreterGenerator::generate_method_entry( + AbstractInterpreter::MethodKind kind) { + // determine code generation flags + bool synchronized = false; + address entry_point = NULL; + + switch (kind) { + case Interpreter::zerolocals : break; + case Interpreter::zerolocals_synchronized: synchronized = true; break; + case Interpreter::native : entry_point = ((InterpreterGenerator*) this)->generate_native_entry(false); break; + case Interpreter::native_synchronized : entry_point = ((InterpreterGenerator*) this)->generate_native_entry(true); break; + case Interpreter::empty : entry_point = ((InterpreterGenerator*) this)->generate_empty_entry(); break; + case Interpreter::accessor : entry_point = ((InterpreterGenerator*) this)->generate_accessor_entry(); break; + case Interpreter::abstract : entry_point = ((InterpreterGenerator*) this)->generate_abstract_entry(); break; + + case Interpreter::java_lang_math_sin : // fall thru + case Interpreter::java_lang_math_cos : // fall thru + case Interpreter::java_lang_math_tan : // fall thru + case Interpreter::java_lang_math_abs : // fall thru + case Interpreter::java_lang_math_log : // fall thru + case Interpreter::java_lang_math_log10 : // fall thru + case Interpreter::java_lang_math_sqrt : // fall thru + case Interpreter::java_lang_math_pow : // fall thru + case Interpreter::java_lang_math_exp : entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind); break; + case Interpreter::java_lang_ref_reference_get + : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break; + case Interpreter::java_util_zip_CRC32_update + : entry_point = ((InterpreterGenerator*)this)->generate_CRC32_update_entry(); break; + case Interpreter::java_util_zip_CRC32_updateBytes + : // fall thru + case Interpreter::java_util_zip_CRC32_updateByteBuffer + : entry_point = ((InterpreterGenerator*)this)->generate_CRC32_updateBytes_entry(kind); break; + default : ShouldNotReachHere(); break; + } + + if (entry_point) { + return entry_point; + } + + return ((InterpreterGenerator*) this)-> + generate_normal_entry(synchronized); +} + + +// These should never be compiled since the interpreter will prefer +// the compiled version to the intrinsic version. +bool AbstractInterpreter::can_be_compiled(methodHandle m) { + switch (method_kind(m)) { + case Interpreter::java_lang_math_sin : // fall thru + case Interpreter::java_lang_math_cos : // fall thru + case Interpreter::java_lang_math_tan : // fall thru + case Interpreter::java_lang_math_abs : // fall thru + case Interpreter::java_lang_math_log : // fall thru + case Interpreter::java_lang_math_log10 : // fall thru + case Interpreter::java_lang_math_sqrt : // fall thru + case Interpreter::java_lang_math_pow : // fall thru + case Interpreter::java_lang_math_exp : + return false; + default: + return true; + } +} + +// How much stack a method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + const int entry_size = frame::interpreter_frame_monitor_size(); + + // total overhead size: entry_size + (saved rfp thru expr stack + // bottom). be sure to change this if you add/subtract anything + // to/from the overhead area + const int overhead_size = + -(frame::interpreter_frame_initial_sp_offset) + entry_size; + + const int stub_code = frame::entry_frame_after_call_words; + const int method_stack = (method->max_locals() + method->max_stack()) * + Interpreter::stackElementWords; + return (overhead_size + method_stack + stub_code); +} + +// asm based interpreter deoptimization helpers +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + + // fixed size of an interpreter frame: + int overhead = frame::sender_sp_offset - + frame::interpreter_frame_initial_sp_offset; + // Our locals were accounted for by the caller (or last_frame_adjust + // on the transistion) Since the callee parameters already account + // for the callee's params we only need to account for the extra + // locals. + int size = overhead + + (callee_locals - callee_params) + + monitors * frame::interpreter_frame_monitor_size() + + // On the top frame, at all times SP <= ESP, and SP is + // 16-aligned. We ensure this by adjusting SP on method + // entry and re-entry to allow room for the maximum size of + // the expression stack. When we call another method we bump + // SP so that no stack space is wasted. So, only on the top + // frame do we need to allow max_stack words. + (is_top_frame ? max_stack : temps + extra_args); + + // On AArch64 we always keep the stack pointer 16-aligned, so we + // must round up here. + size = round_to(size, 2); + + return size; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a + // skeletal state + + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * + Interpreter::stackElementWords; + +#ifdef ASSERT + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable"); +#endif + + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and + // interpreter_frame_sender_sp interpreter_frame_sender_sp is + // the original sp of the caller (the unextended_sp) and + // sender_sp is fp+8/16 (32bit/64bit) XXX + intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; + +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(monbot); + + // Set last_sp + intptr_t* esp = (intptr_t*) monbot - + tempcount*Interpreter::stackElementWords - + popframe_extra_args; + interpreter_frame->interpreter_frame_set_last_sp(esp); + + // All frames but the initial (oldest) interpreter frame we fill in have + // a value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + if (extra_locals != 0 && + interpreter_frame->sender_sp() == + interpreter_frame->interpreter_frame_sender_sp()) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + + extra_locals); + } + *interpreter_frame->interpreter_frame_cache_addr() = + method->constants()->cache(); +} + + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateInterpreterGenerator::generate_throw_exception() { + // Entry point in previous activation (i.e., if the caller was + // interpreted) + Interpreter::_rethrow_exception_entry = __ pc(); + // Restore sp to interpreter_frame_last_sp even though we are going + // to empty the expression stack for the exception processing. + __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + // r0: exception + // r3: return address/pc that threw exception + __ restore_bcp(); // rbcp points to call/send + __ restore_locals(); + __ restore_constant_pool_cache(); + __ reinit_heapbase(); // restore rheapbase as heapbase. + __ get_dispatch(); + + // Entry point for exceptions thrown within interpreter code + Interpreter::_throw_exception_entry = __ pc(); + // If we came here via a NullPointerException on the receiver of a + // method, rmethod may be corrupt. + __ get_method(rmethod); + // expression stack is undefined here + // r0: exception + // rbcp: exception bcp + __ verify_oop(r0); + __ mov(c_rarg1, r0); + + // expression stack must be empty before entering the VM in case of + // an exception + __ empty_expression_stack(); + // find exception handler address and preserve exception oop + __ call_VM(r3, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::exception_handler_for_exception), + c_rarg1); + + // Calculate stack limit + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + + (EnableInvokeDynamic ? 2 : 0) + 2); + __ ldr(rscratch2, + Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); + __ sub(rscratch1, rscratch2, rscratch1, ext::uxtx, 3); + __ andr(sp, rscratch1, -16); + + // r0: exception handler entry point + // r3: preserved exception oop + // rbcp: bcp for exception handler + __ push_ptr(r3); // push exception which is now the only value on the stack + __ br(r0); // jump to exception handler (may be _remove_activation_entry!) + + // If the exception is not handled in the current frame the frame is + // removed and the exception is rethrown (i.e. exception + // continuation is _rethrow_exception). + // + // Note: At this point the bci is still the bxi for the instruction + // which caused the exception and the expression stack is + // empty. Thus, for any VM calls at this point, GC will find a legal + // oop map (with empty expression stack). + + // + // JVMTI PopFrame support + // + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + __ empty_expression_stack(); + // Set the popframe_processing bit in pending_popframe_condition + // indicating that we are currently handling popframe, so that + // call_VMs that may happen later do not trigger new popframe + // handling cycles. + __ ldrw(r3, Address(rthread, JavaThread::popframe_condition_offset())); + __ orr(r3, r3, JavaThread::popframe_processing_bit); + __ strw(r3, Address(rthread, JavaThread::popframe_condition_offset())); + + { + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + // + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + Label caller_not_deoptimized; + __ ldr(c_rarg1, Address(rfp, frame::return_addr_offset * wordSize)); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + InterpreterRuntime::interpreter_contains), c_rarg1); + __ cbnz(r0, caller_not_deoptimized); + + // Compute size of arguments for saving when returning to + // deoptimized caller + __ get_method(r0); + __ ldr(r0, Address(r0, Method::const_offset())); + __ load_unsigned_short(r0, Address(r0, in_bytes(ConstMethod:: + size_of_parameters_offset()))); + __ lsl(r0, r0, Interpreter::logStackElementSize); + __ restore_locals(); // XXX do we need this? + __ sub(rlocals, rlocals, r0); + __ add(rlocals, rlocals, wordSize); + // Save these arguments + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + Deoptimization:: + popframe_preserve_args), + rthread, r0, rlocals); + + __ remove_activation(vtos, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Inform deoptimization that it is responsible for restoring + // these arguments + __ mov(rscratch1, JavaThread::popframe_force_deopt_reexecution_bit); + __ strw(rscratch1, Address(rthread, JavaThread::popframe_condition_offset())); + + // Continue in deoptimization handler + __ ret(lr); + + __ bind(caller_not_deoptimized); + } + + __ remove_activation(vtos, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Restore the last_sp and null it out + __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + + __ restore_bcp(); + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_method(rmethod); + __ get_dispatch(); + + // The method data pointer was incremented already during + // call profiling. We have to restore the mdp for the current bcp. + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + } + + // Clear the popframe condition flag + __ strw(zr, Address(rthread, JavaThread::popframe_condition_offset())); + assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive"); + +#if INCLUDE_JVMTI + if (EnableInvokeDynamic) { + Label L_done; + + __ ldrb(rscratch1, Address(rbcp, 0)); + __ cmpw(rscratch1, Bytecodes::_invokestatic); + __ br(Assembler::NE, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ ldr(c_rarg0, Address(rlocals, 0)); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), c_rarg0, rmethod, rbcp); + + __ cbz(r0, L_done); + + __ str(r0, Address(esp, 0)); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + + // Restore machine SP + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + + (EnableInvokeDynamic ? 2 : 0)); + __ ldr(rscratch2, + Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); + __ sub(rscratch1, rscratch2, rscratch1, ext::uxtw, 3); + __ andr(sp, rscratch1, -16); + + __ dispatch_next(vtos); + // end of PopFrame support + + Interpreter::_remove_activation_entry = __ pc(); + + // preserve exception over this code sequence + __ pop_ptr(r0); + __ str(r0, Address(rthread, JavaThread::vm_result_offset())); + // remove the activation (without doing throws on illegalMonitorExceptions) + __ remove_activation(vtos, false, true, false); + // restore exception + __ get_vm_result(r0, rthread); + + // In between activations - previous activation type unknown yet + // compute continuation point - the continuation point expects the + // following registers set up: + // + // r0: exception + // lr: return address/pc that threw exception + // esp: expression stack of caller + // rfp: fp of caller + __ stp(r0, lr, Address(__ pre(sp, -2 * wordSize))); // save exception & return address + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + SharedRuntime::exception_handler_for_return_address), + rthread, lr); + __ mov(r1, r0); // save exception handler + __ ldp(r0, lr, Address(__ post(sp, 2 * wordSize))); // restore exception & return address + // We might be returning to a deopt handler that expects r3 to + // contain the exception pc + __ mov(r3, lr); + // Note that an "issuing PC" is actually the next PC after the call + __ br(r1); // jump to exception + // handler of caller +} + + +// +// JVMTI ForceEarlyReturn support +// +address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { + address entry = __ pc(); + + __ restore_bcp(); + __ restore_locals(); + __ empty_expression_stack(); + __ load_earlyret_value(state); + + __ ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset())); + Address cond_addr(rscratch1, JvmtiThreadState::earlyret_state_offset()); + + // Clear the earlyret state + assert(JvmtiThreadState::earlyret_inactive == 0, "should be"); + __ str(zr, cond_addr); + + __ remove_activation(state, + false, /* throw_monitor_exception */ + false, /* install_monitor_exception */ + true); /* notify_jvmdi */ + __ ret(lr); + + return entry; +} // end of ForceEarlyReturn support + + + +//----------------------------------------------------------------------------- +// Helper for vtos entry point generation + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, + address& bep, + address& cep, + address& sep, + address& aep, + address& iep, + address& lep, + address& fep, + address& dep, + address& vep) { + assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + aep = __ pc(); __ push_ptr(); __ b(L); + fep = __ pc(); __ push_f(); __ b(L); + dep = __ pc(); __ push_d(); __ b(L); + lep = __ pc(); __ push_l(); __ b(L); + bep = cep = sep = + iep = __ pc(); __ push_i(); + vep = __ pc(); + __ bind(L); + generate_and_dispatch(t); +} + +//----------------------------------------------------------------------------- +// Generation of individual instructions + +// helpers for generate_and_dispatch + + +InterpreterGenerator::InterpreterGenerator(StubQueue* code) + : TemplateInterpreterGenerator(code) { + generate_all(); // down here so it can be "virtual" +} + +//----------------------------------------------------------------------------- + +// Non-product code +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) { + address entry = __ pc(); + + __ push(lr); + __ push(state); + __ push(RegSet::range(r0, r15), sp); + __ mov(c_rarg2, r0); // Pass itos + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), + c_rarg1, c_rarg2, c_rarg3); + __ pop(RegSet::range(r0, r15), sp); + __ pop(state); + __ pop(lr); + __ ret(lr); // return from result handler + + return entry; +} + +void TemplateInterpreterGenerator::count_bytecode() { + Register rscratch3 = r0; + __ push(rscratch1); + __ push(rscratch2); + __ push(rscratch3); + __ mov(rscratch3, (address) &BytecodeCounter::_counter_value); + __ atomic_add(noreg, 1, rscratch3); + __ pop(rscratch3); + __ pop(rscratch2); + __ pop(rscratch1); +} + +void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; } + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; } + + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) { + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + + assert(Interpreter::trace_code(t->tos_in()) != NULL, + "entry must have been generated"); + __ bl(Interpreter::trace_code(t->tos_in())); + __ reinit_heapbase(); +} + + +void TemplateInterpreterGenerator::stop_interpreter_at() { + Label L; + __ push(rscratch1); + __ mov(rscratch1, (address) &BytecodeCounter::_counter_value); + __ ldr(rscratch1, Address(rscratch1)); + __ mov(rscratch2, StopInterpreterAt); + __ cmpw(rscratch1, rscratch2); + __ br(Assembler::NE, L); + __ brk(0); + __ bind(L); + __ pop(rscratch1); +} + +#endif // !PRODUCT +#endif // ! CC_INTERP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/templateInterpreter_aarch64.hpp 2021-01-25 19:31:59.858719497 +0000 @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_TEMPLATEINTERPRETER_AARCH64_HPP +#define CPU_AARCH64_VM_TEMPLATEINTERPRETER_AARCH64_HPP + + + protected: + + // Size of interpreter code. Increase if too small. Interpreter will + // fail with a guarantee ("not enough space for interpreter generation"); + // if too small. + // Run with +PrintInterpreter to get the VM to print out the size. + // Max size with JVMTI + const static int InterpreterCodeSize = 200 * 1024; + +#endif // CPU_AARCH64_VM_TEMPLATEINTERPRETER_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/templateTable_aarch64.cpp 2021-01-25 19:32:00.294724080 +0000 @@ -0,0 +1,3923 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2003, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "memory/universe.inline.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" + +#ifndef CC_INTERP + +#define __ _masm-> + +// Platform-dependent initialization + +void TemplateTable::pd_initialize() { + // No amd64 specific initialization +} + +// Address computation: local variables + +static inline Address iaddress(int n) { + return Address(rlocals, Interpreter::local_offset_in_bytes(n)); +} + +static inline Address laddress(int n) { + return iaddress(n + 1); +} + +static inline Address faddress(int n) { + return iaddress(n); +} + +static inline Address daddress(int n) { + return laddress(n); +} + +static inline Address aaddress(int n) { + return iaddress(n); +} + +static inline Address iaddress(Register r) { + return Address(rlocals, r, Address::lsl(3)); +} + +static inline Address laddress(Register r, Register scratch, + InterpreterMacroAssembler* _masm) { + __ lea(scratch, Address(rlocals, r, Address::lsl(3))); + return Address(scratch, Interpreter::local_offset_in_bytes(1)); +} + +static inline Address faddress(Register r) { + return iaddress(r); +} + +static inline Address daddress(Register r, Register scratch, + InterpreterMacroAssembler* _masm) { + return laddress(r, scratch, _masm); +} + +static inline Address aaddress(Register r) { + return iaddress(r); +} + +static inline Address at_rsp() { + return Address(esp, 0); +} + +// At top of Java expression stack which may be different than esp(). It +// isn't for category 1 objects. +static inline Address at_tos () { + return Address(esp, Interpreter::expr_offset_in_bytes(0)); +} + +static inline Address at_tos_p1() { + return Address(esp, Interpreter::expr_offset_in_bytes(1)); +} + +static inline Address at_tos_p2() { + return Address(esp, Interpreter::expr_offset_in_bytes(2)); +} + +static inline Address at_tos_p3() { + return Address(esp, Interpreter::expr_offset_in_bytes(3)); +} + +static inline Address at_tos_p4() { + return Address(esp, Interpreter::expr_offset_in_bytes(4)); +} + +static inline Address at_tos_p5() { + return Address(esp, Interpreter::expr_offset_in_bytes(5)); +} + +// Condition conversion +static Assembler::Condition j_not(TemplateTable::Condition cc) { + switch (cc) { + case TemplateTable::equal : return Assembler::NE; + case TemplateTable::not_equal : return Assembler::EQ; + case TemplateTable::less : return Assembler::GE; + case TemplateTable::less_equal : return Assembler::GT; + case TemplateTable::greater : return Assembler::LE; + case TemplateTable::greater_equal: return Assembler::LT; + } + ShouldNotReachHere(); + return Assembler::EQ; +} + + +// Miscelaneous helper routines +// Store an oop (or NULL) at the Address described by obj. +// If val == noreg this means store a NULL +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address obj, + Register val, + BarrierSet::Name barrier, + bool precise) { + assert(val == noreg || val == r0, "parameter is just for looks"); + switch (barrier) { +#if INCLUDE_ALL_GCS + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + // flatten object address if needed + if (obj.index() == noreg && obj.offset() == 0) { + if (obj.base() != r3) { + __ mov(r3, obj.base()); + } + } else { + __ lea(r3, obj); + } + __ g1_write_barrier_pre(r3 /* obj */, + r1 /* pre_val */, + rthread /* thread */, + r10 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + if (val == noreg) { + __ store_heap_oop_null(Address(r3, 0)); + } else { + // G1 barrier needs uncompressed oop for region cross check. + Register new_val = val; + if (UseCompressedOops) { + new_val = rscratch2; + __ mov(new_val, val); + } + __ store_heap_oop(Address(r3, 0), val); + __ g1_write_barrier_post(r3 /* store_adr */, + new_val /* new_val */, + rthread /* thread */, + r10 /* tmp */, + r1 /* tmp2 */); + } + + } + break; +#endif // INCLUDE_ALL_GCS + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + if (val == noreg) { + __ store_heap_oop_null(obj); + } else { + __ store_heap_oop(obj, val); + // flatten object address if needed + if (!precise || (obj.index() == noreg && obj.offset() == 0)) { + __ store_check(obj.base()); + } else { + __ lea(r3, obj); + __ store_check(r3); + } + } + } + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + if (val == noreg) { + __ store_heap_oop_null(obj); + } else { + __ store_heap_oop(obj, val); + } + break; + default : + ShouldNotReachHere(); + + } +} + +Address TemplateTable::at_bcp(int offset) { + assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); + return Address(rbcp, offset); +} + +void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, + Register temp_reg, bool load_bc_into_bc_reg/*=true*/, + int byte_no) +{ + if (!RewriteBytecodes) return; + Label L_patch_done; + + switch (bc) { + case Bytecodes::_fast_aputfield: + case Bytecodes::_fast_bputfield: + case Bytecodes::_fast_zputfield: + case Bytecodes::_fast_cputfield: + case Bytecodes::_fast_dputfield: + case Bytecodes::_fast_fputfield: + case Bytecodes::_fast_iputfield: + case Bytecodes::_fast_lputfield: + case Bytecodes::_fast_sputfield: + { + // We skip bytecode quickening for putfield instructions when + // the put_code written to the constant pool cache is zero. + // This is required so that every execution of this instruction + // calls out to InterpreterRuntime::resolve_get_put to do + // additional, required work. + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + assert(load_bc_into_bc_reg, "we use bc_reg as temp"); + __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1); + __ movw(bc_reg, bc); + __ cmpw(temp_reg, (unsigned) 0); + __ br(Assembler::EQ, L_patch_done); // don't patch + } + break; + default: + assert(byte_no == -1, "sanity"); + // the pair bytecodes have already done the load. + if (load_bc_into_bc_reg) { + __ movw(bc_reg, bc); + } + } + + if (JvmtiExport::can_post_breakpoint()) { + Label L_fast_patch; + // if a breakpoint is present we can't rewrite the stream directly + __ load_unsigned_byte(temp_reg, at_bcp(0)); + __ cmpw(temp_reg, Bytecodes::_breakpoint); + __ br(Assembler::NE, L_fast_patch); + // Let breakpoint table handling rewrite to quicker bytecode + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), rmethod, rbcp, bc_reg); + __ b(L_patch_done); + __ bind(L_fast_patch); + } + +#ifdef ASSERT + Label L_okay; + __ load_unsigned_byte(temp_reg, at_bcp(0)); + __ cmpw(temp_reg, (int) Bytecodes::java_code(bc)); + __ br(Assembler::EQ, L_okay); + __ cmpw(temp_reg, bc_reg); + __ br(Assembler::EQ, L_okay); + __ stop("patching the wrong bytecode"); + __ bind(L_okay); +#endif + + // patch bytecode + __ strb(bc_reg, at_bcp(0)); + __ bind(L_patch_done); +} + + +// Individual instructions + +void TemplateTable::nop() { + transition(vtos, vtos); + // nothing to do +} + +void TemplateTable::shouldnotreachhere() { + transition(vtos, vtos); + __ stop("shouldnotreachhere bytecode"); +} + +void TemplateTable::aconst_null() +{ + transition(vtos, atos); + __ mov(r0, 0); +} + +void TemplateTable::iconst(int value) +{ + transition(vtos, itos); + __ mov(r0, value); +} + +void TemplateTable::lconst(int value) +{ + __ mov(r0, value); +} + +void TemplateTable::fconst(int value) +{ + transition(vtos, ftos); + switch (value) { + case 0: + __ fmovs(v0, zr); + break; + case 1: + __ fmovs(v0, 1.0); + break; + case 2: + __ fmovs(v0, 2.0); + break; + default: + ShouldNotReachHere(); + break; + } +} + +void TemplateTable::dconst(int value) +{ + transition(vtos, dtos); + switch (value) { + case 0: + __ fmovd(v0, zr); + break; + case 1: + __ fmovd(v0, 1.0); + break; + case 2: + __ fmovd(v0, 2.0); + break; + default: + ShouldNotReachHere(); + break; + } +} + +void TemplateTable::bipush() +{ + transition(vtos, itos); + __ load_signed_byte32(r0, at_bcp(1)); +} + +void TemplateTable::sipush() +{ + transition(vtos, itos); + __ load_unsigned_short(r0, at_bcp(1)); + __ revw(r0, r0); + __ asrw(r0, r0, 16); +} + +void TemplateTable::ldc(bool wide) +{ + transition(vtos, vtos); + Label call_ldc, notFloat, notClass, Done; + + if (wide) { + __ get_unsigned_2_byte_index_at_bcp(r1, 1); + } else { + __ load_unsigned_byte(r1, at_bcp(1)); + } + __ get_cpool_and_tags(r2, r0); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type + __ add(r3, r1, tags_offset); + __ lea(r3, Address(r0, r3)); + __ ldarb(r3, r3); + + // unresolved class - get the resolved class + __ cmp(r3, JVM_CONSTANT_UnresolvedClass); + __ br(Assembler::EQ, call_ldc); + + // unresolved class in error state - call into runtime to throw the error + // from the first resolution attempt + __ cmp(r3, JVM_CONSTANT_UnresolvedClassInError); + __ br(Assembler::EQ, call_ldc); + + // resolved class - need to call vm to get java mirror of the class + __ cmp(r3, JVM_CONSTANT_Class); + __ br(Assembler::NE, notClass); + + __ bind(call_ldc); + __ mov(c_rarg1, wide); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1); + __ push_ptr(r0); + __ verify_oop(r0); + __ b(Done); + + __ bind(notClass); + __ cmp(r3, JVM_CONSTANT_Float); + __ br(Assembler::NE, notFloat); + // ftos + __ adds(r1, r2, r1, Assembler::LSL, 3); + __ ldrs(v0, Address(r1, base_offset)); + __ push_f(); + __ b(Done); + + __ bind(notFloat); +#ifdef ASSERT + { + Label L; + __ cmp(r3, JVM_CONSTANT_Integer); + __ br(Assembler::EQ, L); + // String and Object are rewritten to fast_aldc + __ stop("unexpected tag type in ldc"); + __ bind(L); + } +#endif + // itos JVM_CONSTANT_Integer only + __ adds(r1, r2, r1, Assembler::LSL, 3); + __ ldrw(r0, Address(r1, base_offset)); + __ push_i(r0); + __ bind(Done); +} + +// Fast path for caching oop constants. +void TemplateTable::fast_aldc(bool wide) +{ + transition(vtos, atos); + + Register result = r0; + Register tmp = r1; + int index_size = wide ? sizeof(u2) : sizeof(u1); + + Label resolved; + + // We are resolved if the resolved reference cache entry contains a + // non-null object (String, MethodType, etc.) + assert_different_registers(result, tmp); + __ get_cache_index_at_bcp(tmp, 1, index_size); + __ load_resolved_reference_at_index(result, tmp); + __ cbnz(result, resolved); + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + + // first time invocation - must resolve first + __ mov(tmp, (int)bytecode()); + __ call_VM(result, entry, tmp); + + __ bind(resolved); + + if (VerifyOops) { + __ verify_oop(result); + } +} + +void TemplateTable::ldc2_w() +{ + transition(vtos, vtos); + Label Long, Done; + __ get_unsigned_2_byte_index_at_bcp(r0, 1); + + __ get_cpool_and_tags(r1, r2); + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type + __ lea(r2, Address(r2, r0, Address::lsl(0))); + __ load_unsigned_byte(r2, Address(r2, tags_offset)); + __ cmpw(r2, (int)JVM_CONSTANT_Double); + __ br(Assembler::NE, Long); + // dtos + __ lea (r2, Address(r1, r0, Address::lsl(3))); + __ ldrd(v0, Address(r2, base_offset)); + __ push_d(); + __ b(Done); + + __ bind(Long); + // ltos + __ lea(r0, Address(r1, r0, Address::lsl(3))); + __ ldr(r0, Address(r0, base_offset)); + __ push_l(); + + __ bind(Done); +} + +void TemplateTable::locals_index(Register reg, int offset) +{ + __ ldrb(reg, at_bcp(offset)); + __ neg(reg, reg); +} + +void TemplateTable::iload() +{ + transition(vtos, itos); + if (RewriteFrequentPairs) { + Label rewrite, done; + Register bc = r4; + + // get next bytecode + __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); + + // if _iload, wait to rewrite to iload2. We only want to rewrite the + // last two iloads in a pair. Comparing against fast_iload means that + // the next bytecode is neither an iload or a caload, and therefore + // an iload pair. + __ cmpw(r1, Bytecodes::_iload); + __ br(Assembler::EQ, done); + + // if _fast_iload rewrite to _fast_iload2 + __ cmpw(r1, Bytecodes::_fast_iload); + __ movw(bc, Bytecodes::_fast_iload2); + __ br(Assembler::EQ, rewrite); + + // if _caload rewrite to _fast_icaload + __ cmpw(r1, Bytecodes::_caload); + __ movw(bc, Bytecodes::_fast_icaload); + __ br(Assembler::EQ, rewrite); + + // else rewrite to _fast_iload + __ movw(bc, Bytecodes::_fast_iload); + + // rewrite + // bc: new bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_iload, bc, r1, false); + __ bind(done); + + } + + // do iload, get the local value into tos + locals_index(r1); + __ ldr(r0, iaddress(r1)); + +} + +void TemplateTable::fast_iload2() +{ + transition(vtos, itos); + locals_index(r1); + __ ldr(r0, iaddress(r1)); + __ push(itos); + locals_index(r1, 3); + __ ldr(r0, iaddress(r1)); +} + +void TemplateTable::fast_iload() +{ + transition(vtos, itos); + locals_index(r1); + __ ldr(r0, iaddress(r1)); +} + +void TemplateTable::lload() +{ + transition(vtos, ltos); + __ ldrb(r1, at_bcp(1)); + __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord); + __ ldr(r0, Address(r1, Interpreter::local_offset_in_bytes(1))); +} + +void TemplateTable::fload() +{ + transition(vtos, ftos); + locals_index(r1); + // n.b. we use ldrd here because this is a 64 bit slot + // this is comparable to the iload case + __ ldrd(v0, faddress(r1)); +} + +void TemplateTable::dload() +{ + transition(vtos, dtos); + __ ldrb(r1, at_bcp(1)); + __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord); + __ ldrd(v0, Address(r1, Interpreter::local_offset_in_bytes(1))); +} + +void TemplateTable::aload() +{ + transition(vtos, atos); + locals_index(r1); + __ ldr(r0, iaddress(r1)); +} + +void TemplateTable::locals_index_wide(Register reg) { + __ ldrh(reg, at_bcp(2)); + __ rev16w(reg, reg); + __ neg(reg, reg); +} + +void TemplateTable::wide_iload() { + transition(vtos, itos); + locals_index_wide(r1); + __ ldr(r0, iaddress(r1)); +} + +void TemplateTable::wide_lload() +{ + transition(vtos, ltos); + __ ldrh(r1, at_bcp(2)); + __ rev16w(r1, r1); + __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord); + __ ldr(r0, Address(r1, Interpreter::local_offset_in_bytes(1))); +} + +void TemplateTable::wide_fload() +{ + transition(vtos, ftos); + locals_index_wide(r1); + // n.b. we use ldrd here because this is a 64 bit slot + // this is comparable to the iload case + __ ldrd(v0, faddress(r1)); +} + +void TemplateTable::wide_dload() +{ + transition(vtos, dtos); + __ ldrh(r1, at_bcp(2)); + __ rev16w(r1, r1); + __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord); + __ ldrd(v0, Address(r1, Interpreter::local_offset_in_bytes(1))); +} + +void TemplateTable::wide_aload() +{ + transition(vtos, atos); + locals_index_wide(r1); + __ ldr(r0, aaddress(r1)); +} + +void TemplateTable::index_check(Register array, Register index) +{ + // destroys r1, rscratch1 + // check array + __ null_check(array, arrayOopDesc::length_offset_in_bytes()); + // sign extend index for use by indexed load + // __ movl2ptr(index, index); + // check index + Register length = rscratch1; + __ ldrw(length, Address(array, arrayOopDesc::length_offset_in_bytes())); + __ cmpw(index, length); + if (index != r1) { + // ??? convention: move aberrant index into r1 for exception message + assert(r1 != array, "different registers"); + __ mov(r1, index); + } + Label ok; + __ br(Assembler::LO, ok); + __ mov(rscratch1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); + __ br(rscratch1); + __ bind(ok); +} + +void TemplateTable::iaload() +{ + transition(itos, itos); + __ mov(r1, r0); + __ pop_ptr(r0); + // r0: array + // r1: index + index_check(r0, r1); // leaves index in r1, kills rscratch1 + __ lea(r1, Address(r0, r1, Address::uxtw(2))); + __ ldrw(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_INT))); +} + +void TemplateTable::laload() +{ + transition(itos, ltos); + __ mov(r1, r0); + __ pop_ptr(r0); + // r0: array + // r1: index + index_check(r0, r1); // leaves index in r1, kills rscratch1 + __ lea(r1, Address(r0, r1, Address::uxtw(3))); + __ ldr(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_LONG))); +} + +void TemplateTable::faload() +{ + transition(itos, ftos); + __ mov(r1, r0); + __ pop_ptr(r0); + // r0: array + // r1: index + index_check(r0, r1); // leaves index in r1, kills rscratch1 + __ lea(r1, Address(r0, r1, Address::uxtw(2))); + __ ldrs(v0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_FLOAT))); +} + +void TemplateTable::daload() +{ + transition(itos, dtos); + __ mov(r1, r0); + __ pop_ptr(r0); + // r0: array + // r1: index + index_check(r0, r1); // leaves index in r1, kills rscratch1 + __ lea(r1, Address(r0, r1, Address::uxtw(3))); + __ ldrd(v0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_DOUBLE))); +} + +void TemplateTable::aaload() +{ + transition(itos, atos); + __ mov(r1, r0); + __ pop_ptr(r0); + // r0: array + // r1: index + index_check(r0, r1); // leaves index in r1, kills rscratch1 + int s = (UseCompressedOops ? 2 : 3); + __ lea(r1, Address(r0, r1, Address::uxtw(s))); + __ load_heap_oop(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); +} + +void TemplateTable::baload() +{ + transition(itos, itos); + __ mov(r1, r0); + __ pop_ptr(r0); + // r0: array + // r1: index + index_check(r0, r1); // leaves index in r1, kills rscratch1 + __ lea(r1, Address(r0, r1, Address::uxtw(0))); + __ load_signed_byte(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_BYTE))); +} + +void TemplateTable::caload() +{ + transition(itos, itos); + __ mov(r1, r0); + __ pop_ptr(r0); + // r0: array + // r1: index + index_check(r0, r1); // leaves index in r1, kills rscratch1 + __ lea(r1, Address(r0, r1, Address::uxtw(1))); + __ load_unsigned_short(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_CHAR))); +} + +// iload followed by caload frequent pair +void TemplateTable::fast_icaload() +{ + transition(vtos, itos); + // load index out of locals + locals_index(r2); + __ ldr(r1, iaddress(r2)); + + __ pop_ptr(r0); + + // r0: array + // r1: index + index_check(r0, r1); // leaves index in r1, kills rscratch1 + __ lea(r1, Address(r0, r1, Address::uxtw(1))); + __ load_unsigned_short(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_CHAR))); +} + +void TemplateTable::saload() +{ + transition(itos, itos); + __ mov(r1, r0); + __ pop_ptr(r0); + // r0: array + // r1: index + index_check(r0, r1); // leaves index in r1, kills rscratch1 + __ lea(r1, Address(r0, r1, Address::uxtw(1))); + __ load_signed_short(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_SHORT))); +} + +void TemplateTable::iload(int n) +{ + transition(vtos, itos); + __ ldr(r0, iaddress(n)); +} + +void TemplateTable::lload(int n) +{ + transition(vtos, ltos); + __ ldr(r0, laddress(n)); +} + +void TemplateTable::fload(int n) +{ + transition(vtos, ftos); + __ ldrs(v0, faddress(n)); +} + +void TemplateTable::dload(int n) +{ + transition(vtos, dtos); + __ ldrd(v0, daddress(n)); +} + +void TemplateTable::aload(int n) +{ + transition(vtos, atos); + __ ldr(r0, iaddress(n)); +} + +void TemplateTable::aload_0() +{ + // According to bytecode histograms, the pairs: + // + // _aload_0, _fast_igetfield + // _aload_0, _fast_agetfield + // _aload_0, _fast_fgetfield + // + // occur frequently. If RewriteFrequentPairs is set, the (slow) + // _aload_0 bytecode checks if the next bytecode is either + // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then + // rewrites the current bytecode into a pair bytecode; otherwise it + // rewrites the current bytecode into _fast_aload_0 that doesn't do + // the pair check anymore. + // + // Note: If the next bytecode is _getfield, the rewrite must be + // delayed, otherwise we may miss an opportunity for a pair. + // + // Also rewrite frequent pairs + // aload_0, aload_1 + // aload_0, iload_1 + // These bytecodes with a small amount of code are most profitable + // to rewrite + if (RewriteFrequentPairs) { + Label rewrite, done; + const Register bc = r4; + + // get next bytecode + __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); + + // do actual aload_0 + aload(0); + + // if _getfield then wait with rewrite + __ cmpw(r1, Bytecodes::Bytecodes::_getfield); + __ br(Assembler::EQ, done); + + // if _igetfield then reqrite to _fast_iaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmpw(r1, Bytecodes::_fast_igetfield); + __ movw(bc, Bytecodes::_fast_iaccess_0); + __ br(Assembler::EQ, rewrite); + + // if _agetfield then reqrite to _fast_aaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmpw(r1, Bytecodes::_fast_agetfield); + __ movw(bc, Bytecodes::_fast_aaccess_0); + __ br(Assembler::EQ, rewrite); + + // if _fgetfield then reqrite to _fast_faccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmpw(r1, Bytecodes::_fast_fgetfield); + __ movw(bc, Bytecodes::_fast_faccess_0); + __ br(Assembler::EQ, rewrite); + + // else rewrite to _fast_aload0 + assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ movw(bc, Bytecodes::Bytecodes::_fast_aload_0); + + // rewrite + // bc: new bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_aload_0, bc, r1, false); + + __ bind(done); + } else { + aload(0); + } +} + +void TemplateTable::istore() +{ + transition(itos, vtos); + locals_index(r1); + // FIXME: We're being very pernickerty here storing a jint in a + // local with strw, which costs an extra instruction over what we'd + // be able to do with a simple str. We should just store the whole + // word. + __ lea(rscratch1, iaddress(r1)); + __ strw(r0, Address(rscratch1)); +} + +void TemplateTable::lstore() +{ + transition(ltos, vtos); + locals_index(r1); + __ str(r0, laddress(r1, rscratch1, _masm)); +} + +void TemplateTable::fstore() { + transition(ftos, vtos); + locals_index(r1); + __ lea(rscratch1, iaddress(r1)); + __ strs(v0, Address(rscratch1)); +} + +void TemplateTable::dstore() { + transition(dtos, vtos); + locals_index(r1); + __ strd(v0, daddress(r1, rscratch1, _masm)); +} + +void TemplateTable::astore() +{ + transition(vtos, vtos); + __ pop_ptr(r0); + locals_index(r1); + __ str(r0, aaddress(r1)); +} + +void TemplateTable::wide_istore() { + transition(vtos, vtos); + __ pop_i(); + locals_index_wide(r1); + __ lea(rscratch1, iaddress(r1)); + __ strw(r0, Address(rscratch1)); +} + +void TemplateTable::wide_lstore() { + transition(vtos, vtos); + __ pop_l(); + locals_index_wide(r1); + __ str(r0, laddress(r1, rscratch1, _masm)); +} + +void TemplateTable::wide_fstore() { + transition(vtos, vtos); + __ pop_f(); + locals_index_wide(r1); + __ lea(rscratch1, faddress(r1)); + __ strs(v0, rscratch1); +} + +void TemplateTable::wide_dstore() { + transition(vtos, vtos); + __ pop_d(); + locals_index_wide(r1); + __ strd(v0, daddress(r1, rscratch1, _masm)); +} + +void TemplateTable::wide_astore() { + transition(vtos, vtos); + __ pop_ptr(r0); + locals_index_wide(r1); + __ str(r0, aaddress(r1)); +} + +void TemplateTable::iastore() { + transition(itos, vtos); + __ pop_i(r1); + __ pop_ptr(r3); + // r0: value + // r1: index + // r3: array + index_check(r3, r1); // prefer index in r1 + __ lea(rscratch1, Address(r3, r1, Address::uxtw(2))); + __ strw(r0, Address(rscratch1, + arrayOopDesc::base_offset_in_bytes(T_INT))); +} + +void TemplateTable::lastore() { + transition(ltos, vtos); + __ pop_i(r1); + __ pop_ptr(r3); + // r0: value + // r1: index + // r3: array + index_check(r3, r1); // prefer index in r1 + __ lea(rscratch1, Address(r3, r1, Address::uxtw(3))); + __ str(r0, Address(rscratch1, + arrayOopDesc::base_offset_in_bytes(T_LONG))); +} + +void TemplateTable::fastore() { + transition(ftos, vtos); + __ pop_i(r1); + __ pop_ptr(r3); + // v0: value + // r1: index + // r3: array + index_check(r3, r1); // prefer index in r1 + __ lea(rscratch1, Address(r3, r1, Address::uxtw(2))); + __ strs(v0, Address(rscratch1, + arrayOopDesc::base_offset_in_bytes(T_FLOAT))); +} + +void TemplateTable::dastore() { + transition(dtos, vtos); + __ pop_i(r1); + __ pop_ptr(r3); + // v0: value + // r1: index + // r3: array + index_check(r3, r1); // prefer index in r1 + __ lea(rscratch1, Address(r3, r1, Address::uxtw(3))); + __ strd(v0, Address(rscratch1, + arrayOopDesc::base_offset_in_bytes(T_DOUBLE))); +} + +void TemplateTable::aastore() { + Label is_null, ok_is_subtype, done; + transition(vtos, vtos); + // stack: ..., array, index, value + __ ldr(r0, at_tos()); // value + __ ldr(r2, at_tos_p1()); // index + __ ldr(r3, at_tos_p2()); // array + + Address element_address(r4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + + index_check(r3, r2); // kills r1 + __ lea(r4, Address(r3, r2, Address::uxtw(UseCompressedOops? 2 : 3))); + + // do array store check - check for NULL value first + __ cbz(r0, is_null); + + // Move subklass into r1 + __ load_klass(r1, r0); + // Move superklass into r0 + __ load_klass(r0, r3); + __ ldr(r0, Address(r0, + ObjArrayKlass::element_klass_offset())); + // Compress array + index*oopSize + 12 into a single register. Frees r2. + + // Generate subtype check. Blows r2, r5 + // Superklass in r0. Subklass in r1. + __ gen_subtype_check(r1, ok_is_subtype); + + // Come here on failure + // object is at TOS + __ b(Interpreter::_throw_ArrayStoreException_entry); + + // Come here on success + __ bind(ok_is_subtype); + + // Get the value we will store + __ ldr(r0, at_tos()); + // Now store using the appropriate barrier + do_oop_store(_masm, element_address, r0, _bs->kind(), true); + __ b(done); + + // Have a NULL in r0, r3=array, r2=index. Store NULL at ary[idx] + __ bind(is_null); + __ profile_null_seen(r2); + + // Store a NULL + do_oop_store(_masm, element_address, noreg, _bs->kind(), true); + + // Pop stack arguments + __ bind(done); + __ add(esp, esp, 3 * Interpreter::stackElementSize); +} + +void TemplateTable::bastore() +{ + transition(itos, vtos); + __ pop_i(r1); + __ pop_ptr(r3); + // r0: value + // r1: index + // r3: array + index_check(r3, r1); // prefer index in r1 + + // Need to check whether array is boolean or byte + // since both types share the bastore bytecode. + __ load_klass(r2, r3); + __ ldrw(r2, Address(r2, Klass::layout_helper_offset())); + int diffbit = Klass::layout_helper_boolean_diffbit(); + __ andw(rscratch1, r2, diffbit); + Label L_skip; + __ cbzw(rscratch1, L_skip); + __ andw(r0, r0, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1 + __ bind(L_skip); + + __ lea(rscratch1, Address(r3, r1, Address::uxtw(0))); + __ strb(r0, Address(rscratch1, + arrayOopDesc::base_offset_in_bytes(T_BYTE))); +} + +void TemplateTable::castore() +{ + transition(itos, vtos); + __ pop_i(r1); + __ pop_ptr(r3); + // r0: value + // r1: index + // r3: array + index_check(r3, r1); // prefer index in r1 + __ lea(rscratch1, Address(r3, r1, Address::uxtw(1))); + __ strh(r0, Address(rscratch1, + arrayOopDesc::base_offset_in_bytes(T_CHAR))); +} + +void TemplateTable::sastore() +{ + castore(); +} + +void TemplateTable::istore(int n) +{ + transition(itos, vtos); + __ str(r0, iaddress(n)); +} + +void TemplateTable::lstore(int n) +{ + transition(ltos, vtos); + __ str(r0, laddress(n)); +} + +void TemplateTable::fstore(int n) +{ + transition(ftos, vtos); + __ strs(v0, faddress(n)); +} + +void TemplateTable::dstore(int n) +{ + transition(dtos, vtos); + __ strd(v0, daddress(n)); +} + +void TemplateTable::astore(int n) +{ + transition(vtos, vtos); + __ pop_ptr(r0); + __ str(r0, iaddress(n)); +} + +void TemplateTable::pop() +{ + transition(vtos, vtos); + __ add(esp, esp, Interpreter::stackElementSize); +} + +void TemplateTable::pop2() +{ + transition(vtos, vtos); + __ add(esp, esp, 2 * Interpreter::stackElementSize); +} + +void TemplateTable::dup() +{ + transition(vtos, vtos); + __ ldr(r0, Address(esp, 0)); + __ push(r0); + // stack: ..., a, a +} + +void TemplateTable::dup_x1() +{ + transition(vtos, vtos); + // stack: ..., a, b + __ ldr(r0, at_tos()); // load b + __ ldr(r2, at_tos_p1()); // load a + __ str(r0, at_tos_p1()); // store b + __ str(r2, at_tos()); // store a + __ push(r0); // push b + // stack: ..., b, a, b +} + +void TemplateTable::dup_x2() +{ + transition(vtos, vtos); + // stack: ..., a, b, c + __ ldr(r0, at_tos()); // load c + __ ldr(r2, at_tos_p2()); // load a + __ str(r0, at_tos_p2()); // store c in a + __ push(r0); // push c + // stack: ..., c, b, c, c + __ ldr(r0, at_tos_p2()); // load b + __ str(r2, at_tos_p2()); // store a in b + // stack: ..., c, a, c, c + __ str(r0, at_tos_p1()); // store b in c + // stack: ..., c, a, b, c +} + +void TemplateTable::dup2() +{ + transition(vtos, vtos); + // stack: ..., a, b + __ ldr(r0, at_tos_p1()); // load a + __ push(r0); // push a + __ ldr(r0, at_tos_p1()); // load b + __ push(r0); // push b + // stack: ..., a, b, a, b +} + +void TemplateTable::dup2_x1() +{ + transition(vtos, vtos); + // stack: ..., a, b, c + __ ldr(r2, at_tos()); // load c + __ ldr(r0, at_tos_p1()); // load b + __ push(r0); // push b + __ push(r2); // push c + // stack: ..., a, b, c, b, c + __ str(r2, at_tos_p3()); // store c in b + // stack: ..., a, c, c, b, c + __ ldr(r2, at_tos_p4()); // load a + __ str(r2, at_tos_p2()); // store a in 2nd c + // stack: ..., a, c, a, b, c + __ str(r0, at_tos_p4()); // store b in a + // stack: ..., b, c, a, b, c +} + +void TemplateTable::dup2_x2() +{ + transition(vtos, vtos); + // stack: ..., a, b, c, d + __ ldr(r2, at_tos()); // load d + __ ldr(r0, at_tos_p1()); // load c + __ push(r0) ; // push c + __ push(r2); // push d + // stack: ..., a, b, c, d, c, d + __ ldr(r0, at_tos_p4()); // load b + __ str(r0, at_tos_p2()); // store b in d + __ str(r2, at_tos_p4()); // store d in b + // stack: ..., a, d, c, b, c, d + __ ldr(r2, at_tos_p5()); // load a + __ ldr(r0, at_tos_p3()); // load c + __ str(r2, at_tos_p3()); // store a in c + __ str(r0, at_tos_p5()); // store c in a + // stack: ..., c, d, a, b, c, d +} + +void TemplateTable::swap() +{ + transition(vtos, vtos); + // stack: ..., a, b + __ ldr(r2, at_tos_p1()); // load a + __ ldr(r0, at_tos()); // load b + __ str(r2, at_tos()); // store a in b + __ str(r0, at_tos_p1()); // store b in a + // stack: ..., b, a +} + +void TemplateTable::iop2(Operation op) +{ + transition(itos, itos); + // r0 <== r1 op r0 + __ pop_i(r1); + switch (op) { + case add : __ addw(r0, r1, r0); break; + case sub : __ subw(r0, r1, r0); break; + case mul : __ mulw(r0, r1, r0); break; + case _and : __ andw(r0, r1, r0); break; + case _or : __ orrw(r0, r1, r0); break; + case _xor : __ eorw(r0, r1, r0); break; + case shl : __ lslvw(r0, r1, r0); break; + case shr : __ asrvw(r0, r1, r0); break; + case ushr : __ lsrvw(r0, r1, r0);break; + default : ShouldNotReachHere(); + } +} + +void TemplateTable::lop2(Operation op) +{ + transition(ltos, ltos); + // r0 <== r1 op r0 + __ pop_l(r1); + switch (op) { + case add : __ add(r0, r1, r0); break; + case sub : __ sub(r0, r1, r0); break; + case mul : __ mul(r0, r1, r0); break; + case _and : __ andr(r0, r1, r0); break; + case _or : __ orr(r0, r1, r0); break; + case _xor : __ eor(r0, r1, r0); break; + default : ShouldNotReachHere(); + } +} + +void TemplateTable::idiv() +{ + transition(itos, itos); + // explicitly check for div0 + Label no_div0; + __ cbnzw(r0, no_div0); + __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry); + __ br(rscratch1); + __ bind(no_div0); + __ pop_i(r1); + // r0 <== r1 idiv r0 + __ corrected_idivl(r0, r1, r0, /* want_remainder */ false); +} + +void TemplateTable::irem() +{ + transition(itos, itos); + // explicitly check for div0 + Label no_div0; + __ cbnzw(r0, no_div0); + __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry); + __ br(rscratch1); + __ bind(no_div0); + __ pop_i(r1); + // r0 <== r1 irem r0 + __ corrected_idivl(r0, r1, r0, /* want_remainder */ true); +} + +void TemplateTable::lmul() +{ + transition(ltos, ltos); + __ pop_l(r1); + __ mul(r0, r0, r1); +} + +void TemplateTable::ldiv() +{ + transition(ltos, ltos); + // explicitly check for div0 + Label no_div0; + __ cbnz(r0, no_div0); + __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry); + __ br(rscratch1); + __ bind(no_div0); + __ pop_l(r1); + // r0 <== r1 ldiv r0 + __ corrected_idivq(r0, r1, r0, /* want_remainder */ false); +} + +void TemplateTable::lrem() +{ + transition(ltos, ltos); + // explicitly check for div0 + Label no_div0; + __ cbnz(r0, no_div0); + __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry); + __ br(rscratch1); + __ bind(no_div0); + __ pop_l(r1); + // r0 <== r1 lrem r0 + __ corrected_idivq(r0, r1, r0, /* want_remainder */ true); +} + +void TemplateTable::lshl() +{ + transition(itos, ltos); + // shift count is in r0 + __ pop_l(r1); + __ lslv(r0, r1, r0); +} + +void TemplateTable::lshr() +{ + transition(itos, ltos); + // shift count is in r0 + __ pop_l(r1); + __ asrv(r0, r1, r0); +} + +void TemplateTable::lushr() +{ + transition(itos, ltos); + // shift count is in r0 + __ pop_l(r1); + __ lsrv(r0, r1, r0); +} + +void TemplateTable::fop2(Operation op) +{ + transition(ftos, ftos); + switch (op) { + case add: + // n.b. use ldrd because this is a 64 bit slot + __ pop_f(v1); + __ fadds(v0, v1, v0); + break; + case sub: + __ pop_f(v1); + __ fsubs(v0, v1, v0); + break; + case mul: + __ pop_f(v1); + __ fmuls(v0, v1, v0); + break; + case div: + __ pop_f(v1); + __ fdivs(v0, v1, v0); + break; + case rem: + __ fmovs(v1, v0); + __ pop_f(v0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem)); + break; + default: + ShouldNotReachHere(); + break; + } +} + +void TemplateTable::dop2(Operation op) +{ + transition(dtos, dtos); + switch (op) { + case add: + // n.b. use ldrd because this is a 64 bit slot + __ pop_d(v1); + __ faddd(v0, v1, v0); + break; + case sub: + __ pop_d(v1); + __ fsubd(v0, v1, v0); + break; + case mul: + __ pop_d(v1); + __ fmuld(v0, v1, v0); + break; + case div: + __ pop_d(v1); + __ fdivd(v0, v1, v0); + break; + case rem: + __ fmovd(v1, v0); + __ pop_d(v0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem)); + break; + default: + ShouldNotReachHere(); + break; + } +} + +void TemplateTable::ineg() +{ + transition(itos, itos); + __ negw(r0, r0); + +} + +void TemplateTable::lneg() +{ + transition(ltos, ltos); + __ neg(r0, r0); +} + +void TemplateTable::fneg() +{ + transition(ftos, ftos); + __ fnegs(v0, v0); +} + +void TemplateTable::dneg() +{ + transition(dtos, dtos); + __ fnegd(v0, v0); +} + +void TemplateTable::iinc() +{ + transition(vtos, vtos); + __ load_signed_byte(r1, at_bcp(2)); // get constant + locals_index(r2); + __ ldr(r0, iaddress(r2)); + __ addw(r0, r0, r1); + __ str(r0, iaddress(r2)); +} + +void TemplateTable::wide_iinc() +{ + transition(vtos, vtos); + // __ mov(r1, zr); + __ ldrw(r1, at_bcp(2)); // get constant and index + __ rev16(r1, r1); + __ ubfx(r2, r1, 0, 16); + __ neg(r2, r2); + __ sbfx(r1, r1, 16, 16); + __ ldr(r0, iaddress(r2)); + __ addw(r0, r0, r1); + __ str(r0, iaddress(r2)); +} + +void TemplateTable::convert() +{ + // Checking +#ifdef ASSERT + { + TosState tos_in = ilgl; + TosState tos_out = ilgl; + switch (bytecode()) { + case Bytecodes::_i2l: // fall through + case Bytecodes::_i2f: // fall through + case Bytecodes::_i2d: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_in = itos; break; + case Bytecodes::_l2i: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_l2d: tos_in = ltos; break; + case Bytecodes::_f2i: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_f2d: tos_in = ftos; break; + case Bytecodes::_d2i: // fall through + case Bytecodes::_d2l: // fall through + case Bytecodes::_d2f: tos_in = dtos; break; + default : ShouldNotReachHere(); + } + switch (bytecode()) { + case Bytecodes::_l2i: // fall through + case Bytecodes::_f2i: // fall through + case Bytecodes::_d2i: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_out = itos; break; + case Bytecodes::_i2l: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_d2l: tos_out = ltos; break; + case Bytecodes::_i2f: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_d2f: tos_out = ftos; break; + case Bytecodes::_i2d: // fall through + case Bytecodes::_l2d: // fall through + case Bytecodes::_f2d: tos_out = dtos; break; + default : ShouldNotReachHere(); + } + transition(tos_in, tos_out); + } +#endif // ASSERT + // static const int64_t is_nan = 0x8000000000000000L; + + // Conversion + switch (bytecode()) { + case Bytecodes::_i2l: + __ sxtw(r0, r0); + break; + case Bytecodes::_i2f: + __ scvtfws(v0, r0); + break; + case Bytecodes::_i2d: + __ scvtfwd(v0, r0); + break; + case Bytecodes::_i2b: + __ sxtbw(r0, r0); + break; + case Bytecodes::_i2c: + __ uxthw(r0, r0); + break; + case Bytecodes::_i2s: + __ sxthw(r0, r0); + break; + case Bytecodes::_l2i: + __ uxtw(r0, r0); + break; + case Bytecodes::_l2f: + __ scvtfs(v0, r0); + break; + case Bytecodes::_l2d: + __ scvtfd(v0, r0); + break; + case Bytecodes::_f2i: + { + Label L_Okay; + __ clear_fpsr(); + __ fcvtzsw(r0, v0); + __ get_fpsr(r1); + __ cbzw(r1, L_Okay); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i)); + __ bind(L_Okay); + } + break; + case Bytecodes::_f2l: + { + Label L_Okay; + __ clear_fpsr(); + __ fcvtzs(r0, v0); + __ get_fpsr(r1); + __ cbzw(r1, L_Okay); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l)); + __ bind(L_Okay); + } + break; + case Bytecodes::_f2d: + __ fcvts(v0, v0); + break; + case Bytecodes::_d2i: + { + Label L_Okay; + __ clear_fpsr(); + __ fcvtzdw(r0, v0); + __ get_fpsr(r1); + __ cbzw(r1, L_Okay); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i)); + __ bind(L_Okay); + } + break; + case Bytecodes::_d2l: + { + Label L_Okay; + __ clear_fpsr(); + __ fcvtzd(r0, v0); + __ get_fpsr(r1); + __ cbzw(r1, L_Okay); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l)); + __ bind(L_Okay); + } + break; + case Bytecodes::_d2f: + __ fcvtd(v0, v0); + break; + default: + ShouldNotReachHere(); + } +} + +void TemplateTable::lcmp() +{ + transition(ltos, itos); + Label done; + __ pop_l(r1); + __ cmp(r1, r0); + __ mov(r0, (u_int64_t)-1L); + __ br(Assembler::LT, done); + // __ mov(r0, 1UL); + // __ csel(r0, r0, zr, Assembler::NE); + // and here is a faster way + __ csinc(r0, zr, zr, Assembler::EQ); + __ bind(done); +} + +void TemplateTable::float_cmp(bool is_float, int unordered_result) +{ + Label done; + if (is_float) { + // XXX get rid of pop here, use ... reg, mem32 + __ pop_f(v1); + __ fcmps(v1, v0); + } else { + // XXX get rid of pop here, use ... reg, mem64 + __ pop_d(v1); + __ fcmpd(v1, v0); + } + if (unordered_result < 0) { + // we want -1 for unordered or less than, 0 for equal and 1 for + // greater than. + __ mov(r0, (u_int64_t)-1L); + // for FP LT tests less than or unordered + __ br(Assembler::LT, done); + // install 0 for EQ otherwise 1 + __ csinc(r0, zr, zr, Assembler::EQ); + } else { + // we want -1 for less than, 0 for equal and 1 for unordered or + // greater than. + __ mov(r0, 1L); + // for FP HI tests greater than or unordered + __ br(Assembler::HI, done); + // install 0 for EQ otherwise ~0 + __ csinv(r0, zr, zr, Assembler::EQ); + + } + __ bind(done); +} + +void TemplateTable::branch(bool is_jsr, bool is_wide) +{ + // We might be moving to a safepoint. The thread which calls + // Interpreter::notice_safepoints() will effectively flush its cache + // when it makes a system call, but we need to do something to + // ensure that we see the changed dispatch table. + __ membar(MacroAssembler::LoadLoad); + + __ profile_taken_branch(r0, r1); + const ByteSize be_offset = MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset(); + const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset(); + + // load branch displacement + if (!is_wide) { + __ ldrh(r2, at_bcp(1)); + __ rev16(r2, r2); + // sign extend the 16 bit value in r2 + __ sbfm(r2, r2, 0, 15); + } else { + __ ldrw(r2, at_bcp(1)); + __ revw(r2, r2); + // sign extend the 32 bit value in r2 + __ sbfm(r2, r2, 0, 31); + } + + // Handle all the JSR stuff here, then exit. + // It's much shorter and cleaner than intermingling with the non-JSR + // normal-branch stuff occurring below. + + if (is_jsr) { + // Pre-load the next target bytecode into rscratch1 + __ load_unsigned_byte(rscratch1, Address(rbcp, r2)); + // compute return address as bci + __ ldr(rscratch2, Address(rmethod, Method::const_offset())); + __ add(rscratch2, rscratch2, + in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3)); + __ sub(r1, rbcp, rscratch2); + __ push_i(r1); + // Adjust the bcp by the 16-bit displacement in r2 + __ add(rbcp, rbcp, r2); + __ dispatch_only(vtos); + return; + } + + // Normal (non-jsr) branch handling + + // Adjust the bcp by the displacement in r2 + __ add(rbcp, rbcp, r2); + + assert(UseLoopCounter || !UseOnStackReplacement, + "on-stack-replacement requires loop counters"); + Label backedge_counter_overflow; + Label profile_method; + Label dispatch; + if (UseLoopCounter) { + // increment backedge counter for backward branches + // r0: MDO + // w1: MDO bumped taken-count + // r2: target offset + __ cmp(r2, zr); + __ br(Assembler::GT, dispatch); // count only if backward branch + + // ECN: FIXME: This code smells + // check if MethodCounters exists + Label has_counters; + __ ldr(rscratch1, Address(rmethod, Method::method_counters_offset())); + __ cbnz(rscratch1, has_counters); + __ push(r0); + __ push(r1); + __ push(r2); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), rmethod); + __ pop(r2); + __ pop(r1); + __ pop(r0); + __ ldr(rscratch1, Address(rmethod, Method::method_counters_offset())); + __ cbz(rscratch1, dispatch); // No MethodCounters allocated, OutOfMemory + __ bind(has_counters); + + if (TieredCompilation) { + Label no_mdo; + int increment = InvocationCounter::count_increment; + int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; + if (ProfileInterpreter) { + // Are we profiling? + __ ldr(r1, Address(rmethod, in_bytes(Method::method_data_offset()))); + __ cbz(r1, no_mdo); + // Increment the MDO backedge counter + const Address mdo_backedge_counter(r1, in_bytes(MethodData::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, + r0, rscratch2, false, Assembler::EQ, &backedge_counter_overflow); + __ b(dispatch); + } + __ bind(no_mdo); + // Increment backedge counter in MethodCounters* + __ ldr(rscratch1, Address(rmethod, Method::method_counters_offset())); + __ increment_mask_and_jump(Address(rscratch1, be_offset), increment, mask, + r0, rscratch2, false, Assembler::EQ, &backedge_counter_overflow); + } else { + // increment counter + __ ldr(rscratch2, Address(rmethod, Method::method_counters_offset())); + __ ldrw(r0, Address(rscratch2, be_offset)); // load backedge counter + __ addw(rscratch1, r0, InvocationCounter::count_increment); // increment counter + __ strw(rscratch1, Address(rscratch2, be_offset)); // store counter + + __ ldrw(r0, Address(rscratch2, inv_offset)); // load invocation counter + __ andw(r0, r0, (unsigned)InvocationCounter::count_mask_value); // and the status bits + __ addw(r0, r0, rscratch1); // add both counters + + if (ProfileInterpreter) { + // Test to see if we should create a method data oop + __ lea(rscratch1, ExternalAddress((address) &InvocationCounter::InterpreterProfileLimit)); + __ ldrw(rscratch1, rscratch1); + __ cmpw(r0, rscratch1); + __ br(Assembler::LT, dispatch); + + // if no method data exists, go to profile method + __ test_method_data_pointer(r0, profile_method); + + if (UseOnStackReplacement) { + // check for overflow against w1 which is the MDO taken count + __ lea(rscratch1, ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit)); + __ ldrw(rscratch1, rscratch1); + __ cmpw(r1, rscratch1); + __ br(Assembler::LO, dispatch); // Intel == Assembler::below + + // When ProfileInterpreter is on, the backedge_count comes + // from the MethodData*, which value does not get reset on + // the call to frequency_counter_overflow(). To avoid + // excessive calls to the overflow routine while the method is + // being compiled, add a second test to make sure the overflow + // function is called only once every overflow_frequency. + const int overflow_frequency = 1024; + __ andsw(r1, r1, overflow_frequency - 1); + __ br(Assembler::EQ, backedge_counter_overflow); + + } + } else { + if (UseOnStackReplacement) { + // check for overflow against w0, which is the sum of the + // counters + __ lea(rscratch1, ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit)); + __ ldrw(rscratch1, rscratch1); + __ cmpw(r0, rscratch1); + __ br(Assembler::HS, backedge_counter_overflow); // Intel == Assembler::aboveEqual + } + } + } + } + __ bind(dispatch); + + // Pre-load the next target bytecode into rscratch1 + __ load_unsigned_byte(rscratch1, Address(rbcp, 0)); + + // continue with the bytecode @ target + // rscratch1: target bytecode + // rbcp: target bcp + __ dispatch_only(vtos); + + if (UseLoopCounter) { + if (ProfileInterpreter) { + // Out-of-line code to allocate method data oop. + __ bind(profile_method); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ load_unsigned_byte(r1, Address(rbcp, 0)); // restore target bytecode + __ set_method_data_pointer_for_bcp(); + __ b(dispatch); + } + + if (TieredCompilation || UseOnStackReplacement) { + // invocation counter overflow + __ bind(backedge_counter_overflow); + __ neg(r2, r2); + __ add(r2, r2, rbcp); // branch bcp + // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), + r2); + if (!UseOnStackReplacement) + __ b(dispatch); + } + + if (UseOnStackReplacement) { + __ load_unsigned_byte(r1, Address(rbcp, 0)); // restore target bytecode + + // r0: osr nmethod (osr ok) or NULL (osr not possible) + // w1: target bytecode + // r2: scratch + __ cbz(r0, dispatch); // test result -- no osr if null + // nmethod may have been invalidated (VM may block upon call_VM return) + __ ldrw(r2, Address(r0, nmethod::entry_bci_offset())); + // InvalidOSREntryBci == -2 which overflows cmpw as unsigned + // use cmnw against -InvalidOSREntryBci which does the same thing + __ cmn(r2, -InvalidOSREntryBci); + __ br(Assembler::EQ, dispatch); + + // We have the address of an on stack replacement routine in r0 + // We need to prepare to execute the OSR method. First we must + // migrate the locals and monitors off of the stack. + + __ mov(r19, r0); // save the nmethod + + call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + + // r0 is OSR buffer, move it to expected parameter location + __ mov(j_rarg0, r0); + + // remove activation + // get sender esp + __ ldr(esp, + Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); + // remove frame anchor + __ leave(); + // Ensure compiled code always sees stack at proper alignment + __ andr(sp, esp, -16); + + // and begin the OSR nmethod + __ ldr(rscratch1, Address(r19, nmethod::osr_entry_point_offset())); + __ br(rscratch1); + } + } +} + + +void TemplateTable::if_0cmp(Condition cc) +{ + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + if (cc == equal) + __ cbnzw(r0, not_taken); + else if (cc == not_equal) + __ cbzw(r0, not_taken); + else { + __ andsw(zr, r0, r0); + __ br(j_not(cc), not_taken); + } + + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(r0); +} + +void TemplateTable::if_icmp(Condition cc) +{ + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + __ pop_i(r1); + __ cmpw(r1, r0, Assembler::LSL); + __ br(j_not(cc), not_taken); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(r0); +} + +void TemplateTable::if_nullcmp(Condition cc) +{ + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + if (cc == equal) + __ cbnz(r0, not_taken); + else + __ cbz(r0, not_taken); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(r0); +} + +void TemplateTable::if_acmp(Condition cc) +{ + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + __ pop_ptr(r1); + __ cmp(r1, r0); + __ br(j_not(cc), not_taken); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(r0); +} + +void TemplateTable::ret() { + transition(vtos, vtos); + // We might be moving to a safepoint. The thread which calls + // Interpreter::notice_safepoints() will effectively flush its cache + // when it makes a system call, but we need to do something to + // ensure that we see the changed dispatch table. + __ membar(MacroAssembler::LoadLoad); + + locals_index(r1); + __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp + __ profile_ret(r1, r2); + __ ldr(rbcp, Address(rmethod, Method::const_offset())); + __ lea(rbcp, Address(rbcp, r1)); + __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset())); + __ dispatch_next(vtos); +} + +void TemplateTable::wide_ret() { + transition(vtos, vtos); + locals_index_wide(r1); + __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp + __ profile_ret(r1, r2); + __ ldr(rbcp, Address(rmethod, Method::const_offset())); + __ lea(rbcp, Address(rbcp, r1)); + __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset())); + __ dispatch_next(vtos); +} + + +void TemplateTable::tableswitch() { + Label default_case, continue_execution; + transition(itos, vtos); + // align rbcp + __ lea(r1, at_bcp(BytesPerInt)); + __ andr(r1, r1, -BytesPerInt); + // load lo & hi + __ ldrw(r2, Address(r1, BytesPerInt)); + __ ldrw(r3, Address(r1, 2 * BytesPerInt)); + __ rev32(r2, r2); + __ rev32(r3, r3); + // check against lo & hi + __ cmpw(r0, r2); + __ br(Assembler::LT, default_case); + __ cmpw(r0, r3); + __ br(Assembler::GT, default_case); + // lookup dispatch offset + __ subw(r0, r0, r2); + __ lea(r3, Address(r1, r0, Address::uxtw(2))); + __ ldrw(r3, Address(r3, 3 * BytesPerInt)); + __ profile_switch_case(r0, r1, r2); + // continue execution + __ bind(continue_execution); + __ rev32(r3, r3); + __ load_unsigned_byte(rscratch1, Address(rbcp, r3, Address::sxtw(0))); + __ add(rbcp, rbcp, r3, ext::sxtw); + __ dispatch_only(vtos); + // handle default + __ bind(default_case); + __ profile_switch_default(r0); + __ ldrw(r3, Address(r1, 0)); + __ b(continue_execution); +} + +void TemplateTable::lookupswitch() { + transition(itos, itos); + __ stop("lookupswitch bytecode should have been rewritten"); +} + +void TemplateTable::fast_linearswitch() { + transition(itos, vtos); + Label loop_entry, loop, found, continue_execution; + // bswap r0 so we can avoid bswapping the table entries + __ rev32(r0, r0); + // align rbcp + __ lea(r19, at_bcp(BytesPerInt)); // btw: should be able to get rid of + // this instruction (change offsets + // below) + __ andr(r19, r19, -BytesPerInt); + // set counter + __ ldrw(r1, Address(r19, BytesPerInt)); + __ rev32(r1, r1); + __ b(loop_entry); + // table search + __ bind(loop); + __ lea(rscratch1, Address(r19, r1, Address::lsl(3))); + __ ldrw(rscratch1, Address(rscratch1, 2 * BytesPerInt)); + __ cmpw(r0, rscratch1); + __ br(Assembler::EQ, found); + __ bind(loop_entry); + __ subs(r1, r1, 1); + __ br(Assembler::PL, loop); + // default case + __ profile_switch_default(r0); + __ ldrw(r3, Address(r19, 0)); + __ b(continue_execution); + // entry found -> get offset + __ bind(found); + __ lea(rscratch1, Address(r19, r1, Address::lsl(3))); + __ ldrw(r3, Address(rscratch1, 3 * BytesPerInt)); + __ profile_switch_case(r1, r0, r19); + // continue execution + __ bind(continue_execution); + __ rev32(r3, r3); + __ add(rbcp, rbcp, r3, ext::sxtw); + __ ldrb(rscratch1, Address(rbcp, 0)); + __ dispatch_only(vtos); +} + +void TemplateTable::fast_binaryswitch() { + transition(itos, vtos); + // Implementation using the following core algorithm: + // + // int binary_search(int key, LookupswitchPair* array, int n) { + // // Binary search according to "Methodik des Programmierens" by + // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. + // int i = 0; + // int j = n; + // while (i+1 < j) { + // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) + // // with Q: for all i: 0 <= i < n: key < a[i] + // // where a stands for the array and assuming that the (inexisting) + // // element a[n] is infinitely big. + // int h = (i + j) >> 1; + // // i < h < j + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // } + // // R: a[i] <= key < a[i+1] or Q + // // (i.e., if key is within array, i is the correct index) + // return i; + // } + + // Register allocation + const Register key = r0; // already set (tosca) + const Register array = r1; + const Register i = r2; + const Register j = r3; + const Register h = rscratch1; + const Register temp = rscratch2; + + // Find array start + __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to + // get rid of this + // instruction (change + // offsets below) + __ andr(array, array, -BytesPerInt); + + // Initialize i & j + __ mov(i, 0); // i = 0; + __ ldrw(j, Address(array, -BytesPerInt)); // j = length(array); + + // Convert j into native byteordering + __ rev32(j, j); + + // And start + Label entry; + __ b(entry); + + // binary search loop + { + Label loop; + __ bind(loop); + // int h = (i + j) >> 1; + __ addw(h, i, j); // h = i + j; + __ lsrw(h, h, 1); // h = (i + j) >> 1; + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // Convert array[h].match to native byte-ordering before compare + __ ldr(temp, Address(array, h, Address::lsl(3))); + __ rev32(temp, temp); + __ cmpw(key, temp); + // j = h if (key < array[h].fast_match()) + __ csel(j, h, j, Assembler::LT); + // i = h if (key >= array[h].fast_match()) + __ csel(i, h, i, Assembler::GE); + // while (i+1 < j) + __ bind(entry); + __ addw(h, i, 1); // i+1 + __ cmpw(h, j); // i+1 < j + __ br(Assembler::LT, loop); + } + + // end of binary search, result index is i (must check again!) + Label default_case; + // Convert array[i].match to native byte-ordering before compare + __ ldr(temp, Address(array, i, Address::lsl(3))); + __ rev32(temp, temp); + __ cmpw(key, temp); + __ br(Assembler::NE, default_case); + + // entry found -> j = offset + __ add(j, array, i, ext::uxtx, 3); + __ ldrw(j, Address(j, BytesPerInt)); + __ profile_switch_case(i, key, array); + __ rev32(j, j); + __ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0))); + __ lea(rbcp, Address(rbcp, j, Address::sxtw(0))); + __ dispatch_only(vtos); + + // default case -> j = default offset + __ bind(default_case); + __ profile_switch_default(i); + __ ldrw(j, Address(array, -2 * BytesPerInt)); + __ rev32(j, j); + __ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0))); + __ lea(rbcp, Address(rbcp, j, Address::sxtw(0))); + __ dispatch_only(vtos); +} + + +void TemplateTable::_return(TosState state) +{ + transition(state, state); + assert(_desc->calls_vm(), + "inconsistent calls_vm information"); // call in remove_activation + + if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { + assert(state == vtos, "only valid state"); + + __ ldr(c_rarg1, aaddress(0)); + __ load_klass(r3, c_rarg1); + __ ldrw(r3, Address(r3, Klass::access_flags_offset())); + __ tst(r3, JVM_ACC_HAS_FINALIZER); + Label skip_register_finalizer; + __ br(Assembler::EQ, skip_register_finalizer); + + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1); + + __ bind(skip_register_finalizer); + } + + // Issue a StoreStore barrier after all stores but before return + // from any constructor for any class with a final field. We don't + // know if this is a finalizer, so we always do so. + if (_desc->bytecode() == Bytecodes::_return) + __ membar(MacroAssembler::StoreStore); + + // Narrow result if state is itos but result type is smaller. + // Need to narrow in the return bytecode rather than in generate_return_entry + // since compiled code callers expect the result to already be narrowed. + if (state == itos) { + __ narrow(r0); + } + + __ remove_activation(state); + __ ret(lr); +} + +// ---------------------------------------------------------------------------- +// Volatile variables demand their effects be made known to all CPU's +// in order. Store buffers on most chips allow reads & writes to +// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode +// without some kind of memory barrier (i.e., it's not sufficient that +// the interpreter does not reorder volatile references, the hardware +// also must not reorder them). +// +// According to the new Java Memory Model (JMM): +// (1) All volatiles are serialized wrt to each other. ALSO reads & +// writes act as aquire & release, so: +// (2) A read cannot let unrelated NON-volatile memory refs that +// happen after the read float up to before the read. It's OK for +// non-volatile memory refs that happen before the volatile read to +// float down below it. +// (3) Similar a volatile write cannot let unrelated NON-volatile +// memory refs that happen BEFORE the write float down to after the +// write. It's OK for non-volatile memory refs that happen after the +// volatile write to float up before it. +// +// We only put in barriers around volatile refs (they are expensive), +// not _between_ memory refs (that would require us to track the +// flavor of the previous memory refs). Requirements (2) and (3) +// require some barriers before volatile stores and after volatile +// loads. These nearly cover requirement (1) but miss the +// volatile-store-volatile-load case. This final case is placed after +// volatile-stores although it could just as well go before +// volatile-loads. + +void TemplateTable::resolve_cache_and_index(int byte_no, + Register Rcache, + Register index, + size_t index_size) { + const Register temp = r19; + assert_different_registers(Rcache, index, temp); + + Label resolved; + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); + __ cmp(temp, (int) bytecode()); // have we resolved this bytecode? + __ br(Assembler::EQ, resolved); + + // resolve first time through + address entry; + switch (bytecode()) { + case Bytecodes::_getstatic: + case Bytecodes::_putstatic: + case Bytecodes::_getfield: + case Bytecodes::_putfield: + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); + break; + case Bytecodes::_invokevirtual: + case Bytecodes::_invokespecial: + case Bytecodes::_invokestatic: + case Bytecodes::_invokeinterface: + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); + break; + case Bytecodes::_invokehandle: + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle); + break; + case Bytecodes::_invokedynamic: + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); + break; + default: + fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode()))); + break; + } + __ mov(temp, (int) bytecode()); + __ call_VM(noreg, entry, temp); + + // Update registers with resolved info + __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); + // n.b. unlike x86 Rcache is now rcpool plus the indexed offset + // so all clients ofthis method must be modified accordingly + __ bind(resolved); +} + +// The Rcache and index registers must be set before call +// n.b unlike x86 cache already includes the index offset +void TemplateTable::load_field_cp_cache_entry(Register obj, + Register cache, + Register index, + Register off, + Register flags, + bool is_static = false) { + assert_different_registers(cache, index, flags, off); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + // Field offset + __ ldr(off, Address(cache, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::f2_offset()))); + // Flags + __ ldrw(flags, Address(cache, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::flags_offset()))); + + // klass overwrite register + if (is_static) { + __ ldr(obj, Address(cache, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::f1_offset()))); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ldr(obj, Address(obj, mirror_offset)); + } +} + +void TemplateTable::load_invoke_cp_cache_entry(int byte_no, + Register method, + Register itable_index, + Register flags, + bool is_invokevirtual, + bool is_invokevfinal, /*unused*/ + bool is_invokedynamic) { + // setup registers + const Register cache = rscratch2; + const Register index = r4; + assert_different_registers(method, flags); + assert_different_registers(method, cache, index); + assert_different_registers(itable_index, flags); + assert_different_registers(itable_index, cache, index); + // determine constant pool cache field offsets + assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant"); + const int method_offset = in_bytes( + ConstantPoolCache::base_offset() + + (is_invokevirtual + ? ConstantPoolCacheEntry::f2_offset() + : ConstantPoolCacheEntry::f1_offset())); + const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()); + // access constant pool cache fields + const int index_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()); + + size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2)); + resolve_cache_and_index(byte_no, cache, index, index_size); + __ ldr(method, Address(cache, method_offset)); + + if (itable_index != noreg) { + __ ldr(itable_index, Address(cache, index_offset)); + } + __ ldrw(flags, Address(cache, flags_offset)); +} + + +// The registers cache and index expected to be set before call. +// Correct values of the cache and index registers are preserved. +void TemplateTable::jvmti_post_field_access(Register cache, Register index, + bool is_static, bool has_tos) { + // do the JVMTI work here to avoid disturbing the register state below + // We use c_rarg registers here because we want to use the register used in + // the call to the VM + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label L1; + assert_different_registers(cache, index, r0); + __ lea(rscratch1, ExternalAddress((address) JvmtiExport::get_field_access_count_addr())); + __ ldrw(r0, Address(rscratch1)); + __ cbzw(r0, L1); + + __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1); + __ lea(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset()))); + + if (is_static) { + __ mov(c_rarg1, zr); // NULL object reference + } else { + __ ldr(c_rarg1, at_tos()); // get object pointer without popping it + __ verify_oop(c_rarg1); + } + // c_rarg1: object pointer or NULL + // c_rarg2: cache entry pointer + // c_rarg3: jvalue object on the stack + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_access), + c_rarg1, c_rarg2, c_rarg3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +void TemplateTable::pop_and_check_object(Register r) +{ + __ pop_ptr(r); + __ null_check(r); // for field access must check obj. + __ verify_oop(r); +} + +void TemplateTable::getfield_or_static(int byte_no, bool is_static) +{ + const Register cache = r2; + const Register index = r3; + const Register obj = r4; + const Register off = r19; + const Register flags = r0; + const Register raw_flags = r6; + const Register bc = r4; // uses same reg as obj, so don't mix them + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_access(cache, index, is_static, false); + load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static); + + if (!is_static) { + // obj is on the stack + pop_and_check_object(obj); + } + + // 8179954: We need to make sure that the code generated for + // volatile accesses forms a sequentially-consistent set of + // operations when combined with STLR and LDAR. Without a leading + // membar it's possible for a simple Dekker test to fail if loads + // use LDR;DMB but stores use STLR. This can happen if C2 compiles + // the stores in one method and we interpret the loads in another. + if (! UseBarriersForVolatile) { + Label notVolatile; + __ tbz(raw_flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::AnyAny); + __ bind(notVolatile); + } + + const Address field(obj, off); + + Label Done, notByte, notBool, notInt, notShort, notChar, + notLong, notFloat, notObj, notDouble; + + // x86 uses a shift and mask or wings it with a shift plus assert + // the mask is not needed. aarch64 just uses bitfield extract + __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift, + ConstantPoolCacheEntry::tos_state_bits); + + assert(btos == 0, "change code, btos != 0"); + __ cbnz(flags, notByte); + + // btos + __ load_signed_byte(r0, field); + __ push(btos); + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1); + } + __ b(Done); + + __ bind(notByte); + __ cmp(flags, ztos); + __ br(Assembler::NE, notBool); + + // ztos (same code as btos) + __ ldrsb(r0, field); + __ push(ztos); + // Rewrite bytecode to be faster + if (!is_static) { + // use btos rewriting, no truncating to t/f bit is needed for getfield. + patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1); + } + __ b(Done); + + __ bind(notBool); + __ cmp(flags, atos); + __ br(Assembler::NE, notObj); + // atos + __ load_heap_oop(r0, field); + __ push(atos); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + } + __ b(Done); + + __ bind(notObj); + __ cmp(flags, itos); + __ br(Assembler::NE, notInt); + // itos + __ ldrw(r0, field); + __ push(itos); + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_igetfield, bc, r1); + } + __ b(Done); + + __ bind(notInt); + __ cmp(flags, ctos); + __ br(Assembler::NE, notChar); + // ctos + __ load_unsigned_short(r0, field); + __ push(ctos); + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_cgetfield, bc, r1); + } + __ b(Done); + + __ bind(notChar); + __ cmp(flags, stos); + __ br(Assembler::NE, notShort); + // stos + __ load_signed_short(r0, field); + __ push(stos); + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_sgetfield, bc, r1); + } + __ b(Done); + + __ bind(notShort); + __ cmp(flags, ltos); + __ br(Assembler::NE, notLong); + // ltos + __ ldr(r0, field); + __ push(ltos); + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_lgetfield, bc, r1); + } + __ b(Done); + + __ bind(notLong); + __ cmp(flags, ftos); + __ br(Assembler::NE, notFloat); + // ftos + __ ldrs(v0, field); + __ push(ftos); + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_fgetfield, bc, r1); + } + __ b(Done); + + __ bind(notFloat); +#ifdef ASSERT + __ cmp(flags, dtos); + __ br(Assembler::NE, notDouble); +#endif + // dtos + __ ldrd(v0, field); + __ push(dtos); + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_dgetfield, bc, r1); + } +#ifdef ASSERT + __ b(Done); + + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + Label notVolatile; + __ tbz(raw_flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ bind(notVolatile); +} + + +void TemplateTable::getfield(int byte_no) +{ + getfield_or_static(byte_no, false); +} + +void TemplateTable::getstatic(int byte_no) +{ + getfield_or_static(byte_no, true); +} + +// The registers cache and index expected to be set before call. +// The function may destroy various registers, just not the cache and index registers. +void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { + transition(vtos, vtos); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L1; + assert_different_registers(cache, index, r0); + __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr())); + __ ldrw(r0, Address(rscratch1)); + __ cbz(r0, L1); + + __ get_cache_and_index_at_bcp(c_rarg2, rscratch1, 1); + + if (is_static) { + // Life is simple. Null out the object pointer. + __ mov(c_rarg1, zr); + } else { + // Life is harder. The stack holds the value on top, followed by + // the object. We don't know the size of the value, though; it + // could be one or two words depending on its type. As a result, + // we must find the type to determine where the object is. + __ ldrw(c_rarg3, Address(c_rarg2, + in_bytes(cp_base_offset + + ConstantPoolCacheEntry::flags_offset()))); + __ lsr(c_rarg3, c_rarg3, + ConstantPoolCacheEntry::tos_state_shift); + ConstantPoolCacheEntry::verify_tos_state_shift(); + Label nope2, done, ok; + __ ldr(c_rarg1, at_tos_p1()); // initially assume a one word jvalue + __ cmpw(c_rarg3, ltos); + __ br(Assembler::EQ, ok); + __ cmpw(c_rarg3, dtos); + __ br(Assembler::NE, nope2); + __ bind(ok); + __ ldr(c_rarg1, at_tos_p2()); // ltos (two word jvalue) + __ bind(nope2); + } + // cache entry pointer + __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset)); + // object (tos) + __ mov(c_rarg3, esp); + // c_rarg1: object pointer set up above (NULL if static) + // c_rarg2: cache entry pointer + // c_rarg3: jvalue object on the stack + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + c_rarg1, c_rarg2, c_rarg3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +void TemplateTable::putfield_or_static(int byte_no, bool is_static) { + transition(vtos, vtos); + + const Register cache = r2; + const Register index = r3; + const Register obj = r2; + const Register off = r19; + const Register flags = r0; + const Register bc = r4; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_mod(cache, index, is_static); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + Label Done; + __ mov(r5, flags); + + { + Label notVolatile; + __ tbz(r5, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); + __ bind(notVolatile); + } + + // field address + const Address field(obj, off); + + Label notByte, notBool, notInt, notShort, notChar, + notLong, notFloat, notObj, notDouble; + + // x86 uses a shift and mask or wings it with a shift plus assert + // the mask is not needed. aarch64 just uses bitfield extract + __ ubfxw(flags, flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits); + + assert(btos == 0, "change code, btos != 0"); + __ cbnz(flags, notByte); + + // btos + { + __ pop(btos); + if (!is_static) pop_and_check_object(obj); + __ strb(r0, field); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_bputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notByte); + __ cmp(flags, ztos); + __ br(Assembler::NE, notBool); + + // ztos + { + __ pop(ztos); + if (!is_static) pop_and_check_object(obj); + __ andw(r0, r0, 0x1); + __ strb(r0, field); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_zputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notBool); + __ cmp(flags, atos); + __ br(Assembler::NE, notObj); + + // atos + { + __ pop(atos); + if (!is_static) pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, r0, _bs->kind(), false); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notObj); + __ cmp(flags, itos); + __ br(Assembler::NE, notInt); + + // itos + { + __ pop(itos); + if (!is_static) pop_and_check_object(obj); + __ strw(r0, field); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_iputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notInt); + __ cmp(flags, ctos); + __ br(Assembler::NE, notChar); + + // ctos + { + __ pop(ctos); + if (!is_static) pop_and_check_object(obj); + __ strh(r0, field); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_cputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notChar); + __ cmp(flags, stos); + __ br(Assembler::NE, notShort); + + // stos + { + __ pop(stos); + if (!is_static) pop_and_check_object(obj); + __ strh(r0, field); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_sputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notShort); + __ cmp(flags, ltos); + __ br(Assembler::NE, notLong); + + // ltos + { + __ pop(ltos); + if (!is_static) pop_and_check_object(obj); + __ str(r0, field); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_lputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notLong); + __ cmp(flags, ftos); + __ br(Assembler::NE, notFloat); + + // ftos + { + __ pop(ftos); + if (!is_static) pop_and_check_object(obj); + __ strs(v0, field); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_fputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notFloat); +#ifdef ASSERT + __ cmp(flags, dtos); + __ br(Assembler::NE, notDouble); +#endif + + // dtos + { + __ pop(dtos); + if (!is_static) pop_and_check_object(obj); + __ strd(v0, field); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_dputfield, bc, r1, true, byte_no); + } + } + +#ifdef ASSERT + __ b(Done); + + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + { + Label notVolatile; + __ tbz(r5, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); + __ bind(notVolatile); + } +} + +void TemplateTable::putfield(int byte_no) +{ + putfield_or_static(byte_no, false); +} + +void TemplateTable::putstatic(int byte_no) { + putfield_or_static(byte_no, true); +} + +void TemplateTable::jvmti_post_fast_field_mod() +{ + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L2; + __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr())); + __ ldrw(c_rarg3, Address(rscratch1)); + __ cbzw(c_rarg3, L2); + __ pop_ptr(r19); // copy the object pointer from tos + __ verify_oop(r19); + __ push_ptr(r19); // put the object pointer back on tos + // Save tos values before call_VM() clobbers them. Since we have + // to do it for every data type, we use the saved values as the + // jvalue object. + switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_aputfield: __ push_ptr(r0); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ push_i(r0); break; + case Bytecodes::_fast_dputfield: __ push_d(); break; + case Bytecodes::_fast_fputfield: __ push_f(); break; + case Bytecodes::_fast_lputfield: __ push_l(r0); break; + + default: + ShouldNotReachHere(); + } + __ mov(c_rarg3, esp); // points to jvalue on the stack + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(c_rarg2, r0, 1); + __ verify_oop(r19); + // r19: object pointer copied above + // c_rarg2: cache entry pointer + // c_rarg3: jvalue object on the stack + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + r19, c_rarg2, c_rarg3); + + switch (bytecode()) { // restore tos values + case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ pop_i(r0); break; + case Bytecodes::_fast_dputfield: __ pop_d(); break; + case Bytecodes::_fast_fputfield: __ pop_f(); break; + case Bytecodes::_fast_lputfield: __ pop_l(r0); break; + } + __ bind(L2); + } +} + +void TemplateTable::fast_storefield(TosState state) +{ + transition(state, vtos); + + ByteSize base = ConstantPoolCache::base_offset(); + + jvmti_post_fast_field_mod(); + + // access constant pool cache + __ get_cache_and_index_at_bcp(r2, r1, 1); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ membar(MacroAssembler::LoadLoad); + + // test for volatile with r3 + __ ldrw(r3, Address(r2, in_bytes(base + + ConstantPoolCacheEntry::flags_offset()))); + + // replace index with field offset from cache entry + __ ldr(r1, Address(r2, in_bytes(base + ConstantPoolCacheEntry::f2_offset()))); + + { + Label notVolatile; + __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); + __ bind(notVolatile); + } + + Label notVolatile; + + // Get object from stack + pop_and_check_object(r2); + + // field address + const Address field(r2, r1); + + // access field + switch (bytecode()) { + case Bytecodes::_fast_aputfield: + do_oop_store(_masm, field, r0, _bs->kind(), false); + break; + case Bytecodes::_fast_lputfield: + __ str(r0, field); + break; + case Bytecodes::_fast_iputfield: + __ strw(r0, field); + break; + case Bytecodes::_fast_zputfield: + __ andw(r0, r0, 0x1); // boolean is true if LSB is 1 + // fall through to bputfield + case Bytecodes::_fast_bputfield: + __ strb(r0, field); + break; + case Bytecodes::_fast_sputfield: + // fall through + case Bytecodes::_fast_cputfield: + __ strh(r0, field); + break; + case Bytecodes::_fast_fputfield: + __ strs(v0, field); + break; + case Bytecodes::_fast_dputfield: + __ strd(v0, field); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); + __ bind(notVolatile); + } +} + + +void TemplateTable::fast_accessfield(TosState state) +{ + transition(atos, state); + // Do the JVMTI work here to avoid disturbing the register state below + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label L1; + __ lea(rscratch1, ExternalAddress((address) JvmtiExport::get_field_access_count_addr())); + __ ldrw(r2, Address(rscratch1)); + __ cbzw(r2, L1); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(c_rarg2, rscratch2, 1); + __ verify_oop(r0); + __ push_ptr(r0); // save object pointer before call_VM() clobbers it + __ mov(c_rarg1, r0); + // c_rarg1: object pointer copied above + // c_rarg2: cache entry pointer + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_access), + c_rarg1, c_rarg2); + __ pop_ptr(r0); // restore object pointer + __ bind(L1); + } + + // access constant pool cache + __ get_cache_and_index_at_bcp(r2, r1, 1); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ membar(MacroAssembler::LoadLoad); + + __ ldr(r1, Address(r2, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()))); + __ ldrw(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()))); + + // r0: object + __ verify_oop(r0); + __ null_check(r0); + const Address field(r0, r1); + + // 8179954: We need to make sure that the code generated for + // volatile accesses forms a sequentially-consistent set of + // operations when combined with STLR and LDAR. Without a leading + // membar it's possible for a simple Dekker test to fail if loads + // use LDR;DMB but stores use STLR. This can happen if C2 compiles + // the stores in one method and we interpret the loads in another. + if (! UseBarriersForVolatile) { + Label notVolatile; + __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::AnyAny); + __ bind(notVolatile); + } + + // access field + switch (bytecode()) { + case Bytecodes::_fast_agetfield: + __ load_heap_oop(r0, field); + __ verify_oop(r0); + break; + case Bytecodes::_fast_lgetfield: + __ ldr(r0, field); + break; + case Bytecodes::_fast_igetfield: + __ ldrw(r0, field); + break; + case Bytecodes::_fast_bgetfield: + __ load_signed_byte(r0, field); + break; + case Bytecodes::_fast_sgetfield: + __ load_signed_short(r0, field); + break; + case Bytecodes::_fast_cgetfield: + __ load_unsigned_short(r0, field); + break; + case Bytecodes::_fast_fgetfield: + __ ldrs(v0, field); + break; + case Bytecodes::_fast_dgetfield: + __ ldrd(v0, field); + break; + default: + ShouldNotReachHere(); + } + { + Label notVolatile; + __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ bind(notVolatile); + } +} + +void TemplateTable::fast_xaccess(TosState state) +{ + transition(vtos, state); + + // get receiver + __ ldr(r0, aaddress(0)); + // access constant pool cache + __ get_cache_and_index_at_bcp(r2, r3, 2); + __ ldr(r1, Address(r2, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()))); + + // 8179954: We need to make sure that the code generated for + // volatile accesses forms a sequentially-consistent set of + // operations when combined with STLR and LDAR. Without a leading + // membar it's possible for a simple Dekker test to fail if loads + // use LDR;DMB but stores use STLR. This can happen if C2 compiles + // the stores in one method and we interpret the loads in another. + if (! UseBarriersForVolatile) { + Label notVolatile; + __ ldrw(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()))); + __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::AnyAny); + __ bind(notVolatile); + } + + // make sure exception is reported in correct bcp range (getfield is + // next instruction) + __ increment(rbcp); + __ null_check(r0); + switch (state) { + case itos: + __ ldrw(r0, Address(r0, r1, Address::lsl(0))); + break; + case atos: + __ load_heap_oop(r0, Address(r0, r1, Address::lsl(0))); + __ verify_oop(r0); + break; + case ftos: + __ ldrs(v0, Address(r0, r1, Address::lsl(0))); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ ldrw(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()))); + __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ bind(notVolatile); + } + + __ decrement(rbcp); +} + + + +//----------------------------------------------------------------------------- +// Calls + +void TemplateTable::count_calls(Register method, Register temp) +{ + __ call_Unimplemented(); +} + +void TemplateTable::prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index, // itable index, MethodType, etc. + Register recv, // if caller wants to see it + Register flags // if caller wants to test it + ) { + // determine flags + Bytecodes::Code code = bytecode(); + const bool is_invokeinterface = code == Bytecodes::_invokeinterface; + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; + const bool is_invokehandle = code == Bytecodes::_invokehandle; + const bool is_invokevirtual = code == Bytecodes::_invokevirtual; + const bool is_invokespecial = code == Bytecodes::_invokespecial; + const bool load_receiver = (recv != noreg); + const bool save_flags = (flags != noreg); + assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), ""); + assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); + assert(flags == noreg || flags == r3, ""); + assert(recv == noreg || recv == r2, ""); + + // setup registers & access constant pool cache + if (recv == noreg) recv = r2; + if (flags == noreg) flags = r3; + assert_different_registers(method, index, recv, flags); + + // save 'interpreter return address' + __ save_bcp(); + + load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); + + // maybe push appendix to arguments (just before return address) + if (is_invokedynamic || is_invokehandle) { + Label L_no_push; + __ tbz(flags, ConstantPoolCacheEntry::has_appendix_shift, L_no_push); + // Push the appendix as a trailing parameter. + // This must be done before we get the receiver, + // since the parameter_size includes it. + __ push(r19); + __ mov(r19, index); + assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); + __ load_resolved_reference_at_index(index, r19); + __ pop(r19); + __ push(index); // push appendix (MethodType, CallSite, etc.) + __ bind(L_no_push); + } + + // load receiver if needed (note: no return address pushed yet) + if (load_receiver) { + __ andw(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); + // FIXME -- is this actually correct? looks like it should be 2 + // const int no_return_pc_pushed_yet = -1; // argument slot correction before we push return address + // const int receiver_is_at_end = -1; // back off one slot to get receiver + // Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); + // __ movptr(recv, recv_addr); + __ add(rscratch1, esp, recv, ext::uxtx, 3); // FIXME: uxtb here? + __ ldr(recv, Address(rscratch1, -Interpreter::expr_offset_in_bytes(1))); + __ verify_oop(recv); + } + + // compute return type + // x86 uses a shift and mask or wings it with a shift plus assert + // the mask is not needed. aarch64 just uses bitfield extract + __ ubfxw(rscratch2, flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits); + // load return address + { + const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code); + __ mov(rscratch1, table_addr); + __ ldr(lr, Address(rscratch1, rscratch2, Address::lsl(3))); + } +} + + +void TemplateTable::invokevirtual_helper(Register index, + Register recv, + Register flags) +{ + // Uses temporary registers r0, r3 + assert_different_registers(index, recv, r0, r3); + // Test for an invoke of a final method + Label notFinal; + __ tbz(flags, ConstantPoolCacheEntry::is_vfinal_shift, notFinal); + + const Register method = index; // method must be rmethod + assert(method == rmethod, + "methodOop must be rmethod for interpreter calling convention"); + + // do the call - the index is actually the method to call + // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method* + + // It's final, need a null check here! + __ null_check(recv); + + // profile this call + __ profile_final_call(r0); + __ profile_arguments_type(r0, method, r4, true); + + __ jump_from_interpreted(method, r0); + + __ bind(notFinal); + + // get receiver klass + __ null_check(recv, oopDesc::klass_offset_in_bytes()); + __ load_klass(r0, recv); + + // profile this call + __ profile_virtual_call(r0, rlocals, r3); + + // get target methodOop & entry point + __ lookup_virtual_method(r0, index, method); + __ profile_arguments_type(r3, method, r4, true); + // FIXME -- this looks completely redundant. is it? + // __ ldr(r3, Address(method, Method::interpreter_entry_offset())); + __ jump_from_interpreted(method, r3); +} + +void TemplateTable::invokevirtual(int byte_no) +{ + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + + prepare_invoke(byte_no, rmethod, noreg, r2, r3); + + // rmethod: index (actually a Method*) + // r2: receiver + // r3: flags + + invokevirtual_helper(rmethod, r2, r3); +} + +void TemplateTable::invokespecial(int byte_no) +{ + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + prepare_invoke(byte_no, rmethod, noreg, // get f1 Method* + r2); // get receiver also for null check + __ verify_oop(r2); + __ null_check(r2); + // do the call + __ profile_call(r0); + __ profile_arguments_type(r0, rmethod, rbcp, false); + __ jump_from_interpreted(rmethod, r0); +} + +void TemplateTable::invokestatic(int byte_no) +{ + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + prepare_invoke(byte_no, rmethod); // get f1 Method* + // do the call + __ profile_call(r0); + __ profile_arguments_type(r0, rmethod, r4, false); + __ jump_from_interpreted(rmethod, r0); +} + +void TemplateTable::fast_invokevfinal(int byte_no) +{ + __ call_Unimplemented(); +} + +void TemplateTable::invokeinterface(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + prepare_invoke(byte_no, r0, rmethod, // get f1 Klass*, f2 Method* + r2, r3); // recv, flags + + // r0: interface klass (from f1) + // rmethod: method (from f2) + // r2: receiver + // r3: flags + + // Special case of invokeinterface called for virtual method of + // java.lang.Object. See cpCacheOop.cpp for details. + // This code isn't produced by javac, but could be produced by + // another compliant java compiler. + Label notMethod; + __ tbz(r3, ConstantPoolCacheEntry::is_forced_virtual_shift, notMethod); + + invokevirtual_helper(rmethod, r2, r3); + __ bind(notMethod); + + // Get receiver klass into r3 - also a null check + __ restore_locals(); + __ null_check(r2, oopDesc::klass_offset_in_bytes()); + __ load_klass(r3, r2); + + Label no_such_interface, no_such_method; + + // Receiver subtype check against REFC. + // Superklass in r0. Subklass in r3. Blows rscratch2, r13. + __ lookup_interface_method(// inputs: rec. class, interface, itable index + r3, r0, noreg, + // outputs: scan temp. reg, scan temp. reg + rscratch2, r13, + no_such_interface, + /*return_method=*/false); + + // profile this call + __ profile_virtual_call(r3, r13, r19); + + // Get declaring interface class from method, and itable index + __ ldr(r0, Address(rmethod, Method::const_offset())); + __ ldr(r0, Address(r0, ConstMethod::constants_offset())); + __ ldr(r0, Address(r0, ConstantPool::pool_holder_offset_in_bytes())); + __ ldrw(rmethod, Address(rmethod, Method::itable_index_offset())); + __ subw(rmethod, rmethod, Method::itable_index_max); + __ negw(rmethod, rmethod); + + __ lookup_interface_method(// inputs: rec. class, interface, itable index + r3, r0, rmethod, + // outputs: method, scan temp. reg + rmethod, r13, + no_such_interface); + + // rmethod,: methodOop to call + // r2: receiver + // Check for abstract method error + // Note: This should be done more efficiently via a throw_abstract_method_error + // interpreter entry point and a conditional jump to it in case of a null + // method. + __ cbz(rmethod, no_such_method); + + __ profile_arguments_type(r3, rmethod, r13, true); + + // do the call + // r2: receiver + // rmethod,: methodOop + __ jump_from_interpreted(rmethod, r3); + __ should_not_reach_here(); + + // exception handling code follows... + // note: must restore interpreter registers to canonical + // state for exception handling to work correctly! + + __ bind(no_such_method); + // throw exception + __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + __ bind(no_such_interface); + // throw exception + __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_IncompatibleClassChangeError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + return; +} + +void TemplateTable::invokehandle(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + if (!EnableInvokeDynamic) { + // rewriter does not generate this bytecode + __ should_not_reach_here(); + return; + } + + prepare_invoke(byte_no, rmethod, r0, r2); + __ verify_method_ptr(r2); + __ verify_oop(r2); + __ null_check(r2); + + // FIXME: profile the LambdaForm also + + // r13 is safe to use here as a scratch reg because it is about to + // be clobbered by jump_from_interpreted(). + __ profile_final_call(r13); + __ profile_arguments_type(r13, rmethod, r4, true); + + __ jump_from_interpreted(rmethod, r0); +} + +void TemplateTable::invokedynamic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + if (!EnableInvokeDynamic) { + // We should not encounter this bytecode if !EnableInvokeDynamic. + // The verifier will stop it. However, if we get past the verifier, + // this will stop the thread in a reasonable way, without crashing the JVM. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_IncompatibleClassChangeError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + return; + } + + prepare_invoke(byte_no, rmethod, r0); + + // r0: CallSite object (from cpool->resolved_references[]) + // rmethod: MH.linkToCallSite method (from f2) + + // Note: r0_callsite is already pushed by prepare_invoke + + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(rbcp); + __ profile_arguments_type(r3, rmethod, r13, false); + + __ verify_oop(r0); + + __ jump_from_interpreted(rmethod, r0); +} + + +//----------------------------------------------------------------------------- +// Allocation + +void TemplateTable::_new() { + transition(vtos, atos); + + __ get_unsigned_2_byte_index_at_bcp(r3, 1); + Label slow_case; + Label done; + Label initialize_header; + Label initialize_object; // including clearing the fields + Label allocate_shared; + + __ get_cpool_and_tags(r4, r0); + // Make sure the class we're about to instantiate has been resolved. + // This is done before loading InstanceKlass to be consistent with the order + // how Constant Pool is updated (see ConstantPool::klass_at_put) + const int tags_offset = Array::base_offset_in_bytes(); + __ lea(rscratch1, Address(r0, r3, Address::lsl(0))); + __ lea(rscratch1, Address(rscratch1, tags_offset)); + __ ldarb(rscratch1, rscratch1); + __ cmp(rscratch1, JVM_CONSTANT_Class); + __ br(Assembler::NE, slow_case); + + // get InstanceKlass + __ lea(r4, Address(r4, r3, Address::lsl(3))); + __ ldr(r4, Address(r4, sizeof(ConstantPool))); + + // make sure klass is initialized & doesn't have finalizer + // make sure klass is fully initialized + __ ldrb(rscratch1, Address(r4, InstanceKlass::init_state_offset())); + __ cmp(rscratch1, InstanceKlass::fully_initialized); + __ br(Assembler::NE, slow_case); + + // get instance_size in InstanceKlass (scaled to a count of bytes) + __ ldrw(r3, + Address(r4, + Klass::layout_helper_offset())); + // test to see if it has a finalizer or is malformed in some way + __ tbnz(r3, exact_log2(Klass::_lh_instance_slow_path_bit), slow_case); + + // Allocate the instance + // 1) Try to allocate in the TLAB + // 2) if fail and the object is large allocate in the shared Eden + // 3) if the above fails (or is not applicable), go to a slow case + // (creates a new TLAB, etc.) + + const bool allow_shared_alloc = + Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode; + + if (UseTLAB) { + __ tlab_allocate(r0, r3, 0, noreg, r1, + allow_shared_alloc ? allocate_shared : slow_case); + + if (ZeroTLAB) { + // the fields have been already cleared + __ b(initialize_header); + } else { + // initialize both the header and fields + __ b(initialize_object); + } + } + + // Allocation in the shared Eden, if allowed. + // + // r3: instance size in bytes + if (allow_shared_alloc) { + __ bind(allocate_shared); + + __ eden_allocate(r0, r3, 0, r10, slow_case); + __ incr_allocated_bytes(rthread, r3, 0, rscratch1); + } + + if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { + // The object is initialized before the header. If the object size is + // zero, go directly to the header initialization. + __ bind(initialize_object); + __ sub(r3, r3, sizeof(oopDesc)); + __ cbz(r3, initialize_header); + + // Initialize object fields + { + __ add(r2, r0, sizeof(oopDesc)); + Label loop; + __ bind(loop); + __ str(zr, Address(__ post(r2, BytesPerLong))); + __ sub(r3, r3, BytesPerLong); + __ cbnz(r3, loop); + } + + // initialize object header only. + __ bind(initialize_header); + if (UseBiasedLocking) { + __ ldr(rscratch1, Address(r4, Klass::prototype_header_offset())); + } else { + __ mov(rscratch1, (intptr_t)markOopDesc::prototype()); + } + __ str(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes())); + __ store_klass_gap(r0, zr); // zero klass gap for compressed oops + __ store_klass(r0, r4); // store klass last + + { + SkipIfEqual skip(_masm, &DTraceAllocProbes, false); + // Trigger dtrace event for fastpath + __ push(atos); // save the return value + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), r0); + __ pop(atos); // restore the return value + + } + __ b(done); + } + + // slow case + __ bind(slow_case); + __ get_constant_pool(c_rarg1); + __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2); + __ verify_oop(r0); + + // continue + __ bind(done); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); +} + +void TemplateTable::newarray() { + transition(itos, atos); + __ load_unsigned_byte(c_rarg1, at_bcp(1)); + __ mov(c_rarg2, r0); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), + c_rarg1, c_rarg2); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); +} + +void TemplateTable::anewarray() { + transition(itos, atos); + __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); + __ get_constant_pool(c_rarg1); + __ mov(c_rarg3, r0); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), + c_rarg1, c_rarg2, c_rarg3); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); +} + +void TemplateTable::arraylength() { + transition(atos, itos); + __ null_check(r0, arrayOopDesc::length_offset_in_bytes()); + __ ldrw(r0, Address(r0, arrayOopDesc::length_offset_in_bytes())); +} + +void TemplateTable::checkcast() +{ + transition(atos, atos); + Label done, is_null, ok_is_subtype, quicked, resolved; + __ cbz(r0, is_null); + + // Get cpool & tags index + __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array + __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index + // See if bytecode has already been quicked + __ add(rscratch1, r3, Array::base_offset_in_bytes()); + __ lea(r1, Address(rscratch1, r19)); + __ ldarb(r1, r1); + __ cmp(r1, JVM_CONSTANT_Class); + __ br(Assembler::EQ, quicked); + + __ push(atos); // save receiver for result, and for GC + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + // vm_result_2 has metadata result + __ get_vm_result_2(r0, rthread); + __ pop(r3); // restore receiver + __ b(resolved); + + // Get superklass in r0 and subklass in r3 + __ bind(quicked); + __ mov(r3, r0); // Save object in r3; r0 needed for subtype check + __ lea(r0, Address(r2, r19, Address::lsl(3))); + __ ldr(r0, Address(r0, sizeof(ConstantPool))); + + __ bind(resolved); + __ load_klass(r19, r3); + + // Generate subtype check. Blows r2, r5. Object in r3. + // Superklass in r0. Subklass in r19. + __ gen_subtype_check(r19, ok_is_subtype); + + // Come here on failure + __ push(r3); + // object is at TOS + __ b(Interpreter::_throw_ClassCastException_entry); + + // Come here on success + __ bind(ok_is_subtype); + __ mov(r0, r3); // Restore object in r3 + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ b(done); + __ bind(is_null); + __ profile_null_seen(r2); + } else { + __ bind(is_null); // same as 'done' + } + __ bind(done); +} + +void TemplateTable::instanceof() { + transition(atos, itos); + Label done, is_null, ok_is_subtype, quicked, resolved; + __ cbz(r0, is_null); + + // Get cpool & tags index + __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array + __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index + // See if bytecode has already been quicked + __ add(rscratch1, r3, Array::base_offset_in_bytes()); + __ lea(r1, Address(rscratch1, r19)); + __ ldarb(r1, r1); + __ cmp(r1, JVM_CONSTANT_Class); + __ br(Assembler::EQ, quicked); + + __ push(atos); // save receiver for result, and for GC + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + // vm_result_2 has metadata result + __ get_vm_result_2(r0, rthread); + __ pop(r3); // restore receiver + __ verify_oop(r3); + __ load_klass(r3, r3); + __ b(resolved); + + // Get superklass in r0 and subklass in r3 + __ bind(quicked); + __ load_klass(r3, r0); + __ lea(r0, Address(r2, r19, Address::lsl(3))); + __ ldr(r0, Address(r0, sizeof(ConstantPool))); + + __ bind(resolved); + + // Generate subtype check. Blows r2, r5 + // Superklass in r0. Subklass in r3. + __ gen_subtype_check(r3, ok_is_subtype); + + // Come here on failure + __ mov(r0, 0); + __ b(done); + // Come here on success + __ bind(ok_is_subtype); + __ mov(r0, 1); + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ b(done); + __ bind(is_null); + __ profile_null_seen(r2); + } else { + __ bind(is_null); // same as 'done' + } + __ bind(done); + // r0 = 0: obj == NULL or obj is not an instanceof the specified klass + // r0 = 1: obj != NULL and obj is an instanceof the specified klass +} + +//----------------------------------------------------------------------------- +// Breakpoints +void TemplateTable::_breakpoint() { + // Note: We get here even if we are single stepping.. + // jbug inists on setting breakpoints at every bytecode + // even if we are in single step mode. + + transition(vtos, vtos); + + // get the unpatched byte code + __ get_method(c_rarg1); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::get_original_bytecode_at), + c_rarg1, rbcp); + __ mov(r19, r0); + + // post the breakpoint event + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), + rmethod, rbcp); + + // complete the execution of original bytecode + __ mov(rscratch1, r19); + __ dispatch_only_normal(vtos); +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateTable::athrow() { + transition(atos, vtos); + __ null_check(r0); + __ b(Interpreter::throw_exception_entry()); +} + +//----------------------------------------------------------------------------- +// Synchronization +// +// Note: monitorenter & exit are symmetric routines; which is reflected +// in the assembly code structure as well +// +// Stack layout: +// +// [expressions ] <--- esp = expression stack top +// .. +// [expressions ] +// [monitor entry] <--- monitor block top = expression stack bot +// .. +// [monitor entry] +// [frame data ] <--- monitor block bot +// ... +// [saved rbp ] <--- rbp +void TemplateTable::monitorenter() +{ + transition(atos, vtos); + + // check for NULL object + __ null_check(r0); + + const Address monitor_block_top( + rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + const Address monitor_block_bot( + rfp, frame::interpreter_frame_initial_sp_offset * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + Label allocated; + + // initialize entry pointer + __ mov(c_rarg1, zr); // points to free slot or NULL + + // find a free slot in the monitor block (result in c_rarg1) + { + Label entry, loop, exit; + __ ldr(c_rarg3, monitor_block_top); // points to current entry, + // starting with top-most entry + __ lea(c_rarg2, monitor_block_bot); // points to word before bottom + + __ b(entry); + + __ bind(loop); + // check if current entry is used + // if not used then remember entry in c_rarg1 + __ ldr(rscratch1, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes())); + __ cmp(zr, rscratch1); + __ csel(c_rarg1, c_rarg3, c_rarg1, Assembler::EQ); + // check if current entry is for same object + __ cmp(r0, rscratch1); + // if same object then stop searching + __ br(Assembler::EQ, exit); + // otherwise advance to next entry + __ add(c_rarg3, c_rarg3, entry_size); + __ bind(entry); + // check if bottom reached + __ cmp(c_rarg3, c_rarg2); + // if not at bottom then check this entry + __ br(Assembler::NE, loop); + __ bind(exit); + } + + __ cbnz(c_rarg1, allocated); // check if a slot has been found and + // if found, continue with that on + + // allocate one if there's no free slot + { + Label entry, loop; + // 1. compute new pointers // rsp: old expression stack top + __ ldr(c_rarg1, monitor_block_bot); // c_rarg1: old expression stack bottom + __ sub(esp, esp, entry_size); // move expression stack top + __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom + __ mov(c_rarg3, esp); // set start value for copy loop + __ str(c_rarg1, monitor_block_bot); // set new monitor block bottom + + __ sub(sp, sp, entry_size); // make room for the monitor + + __ b(entry); + // 2. move expression stack contents + __ bind(loop); + __ ldr(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack + // word from old location + __ str(c_rarg2, Address(c_rarg3, 0)); // and store it at new location + __ add(c_rarg3, c_rarg3, wordSize); // advance to next word + __ bind(entry); + __ cmp(c_rarg3, c_rarg1); // check if bottom reached + __ br(Assembler::NE, loop); // if not at bottom then + // copy next word + } + + // call run-time routine + // c_rarg1: points to monitor entry + __ bind(allocated); + + // Increment bcp to point to the next bytecode, so exception + // handling for async. exceptions work correctly. + // The object has already been poped from the stack, so the + // expression stack looks correct. + __ increment(rbcp); + + // store object + __ str(r0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ lock_object(c_rarg1); + + // check to make sure this monitor doesn't cause stack overflow after locking + __ save_bcp(); // in case of exception + __ generate_stack_overflow_check(0); + + // The bcp has already been incremented. Just need to dispatch to + // next instruction. + __ dispatch_next(vtos); +} + + +void TemplateTable::monitorexit() +{ + transition(atos, vtos); + + // check for NULL object + __ null_check(r0); + + const Address monitor_block_top( + rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + const Address monitor_block_bot( + rfp, frame::interpreter_frame_initial_sp_offset * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + Label found; + + // find matching slot + { + Label entry, loop; + __ ldr(c_rarg1, monitor_block_top); // points to current entry, + // starting with top-most entry + __ lea(c_rarg2, monitor_block_bot); // points to word before bottom + // of monitor block + __ b(entry); + + __ bind(loop); + // check if current entry is for same object + __ ldr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ cmp(r0, rscratch1); + // if same object then stop searching + __ br(Assembler::EQ, found); + // otherwise advance to next entry + __ add(c_rarg1, c_rarg1, entry_size); + __ bind(entry); + // check if bottom reached + __ cmp(c_rarg1, c_rarg2); + // if not at bottom then check this entry + __ br(Assembler::NE, loop); + } + + // error handling. Unlocking was not block-structured + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + // call run-time routine + __ bind(found); + __ push_ptr(r0); // make sure object is on stack (contract with oopMaps) + __ unlock_object(c_rarg1); + __ pop_ptr(r0); // discard object +} + + +// Wide instructions +void TemplateTable::wide() +{ + __ load_unsigned_byte(r19, at_bcp(1)); + __ mov(rscratch1, (address)Interpreter::_wentry_point); + __ ldr(rscratch1, Address(rscratch1, r19, Address::uxtw(3))); + __ br(rscratch1); +} + + +// Multi arrays +void TemplateTable::multianewarray() { + transition(vtos, atos); + __ load_unsigned_byte(r0, at_bcp(3)); // get number of dimensions + // last dim is on top of stack; we want address of first one: + // first_addr = last_addr + (ndims - 1) * wordSize + __ lea(c_rarg1, Address(esp, r0, Address::uxtw(3))); + __ sub(c_rarg1, c_rarg1, wordSize); + call_VM(r0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), + c_rarg1); + __ load_unsigned_byte(r1, at_bcp(3)); + __ lea(esp, Address(esp, r1, Address::uxtw(3))); +} +#endif // !CC_INTERP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/templateTable_aarch64.hpp 2021-01-25 19:32:00.743728799 +0000 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_TEMPLATETABLE_AARCH64_64_HPP +#define CPU_AARCH64_VM_TEMPLATETABLE_AARCH64_64_HPP + +static void prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index = noreg, // itable index, MethodType, etc. + Register recv = noreg, // if caller wants to see it + Register flags = noreg // if caller wants to test it + ); + static void invokevirtual_helper(Register index, Register recv, + Register flags); + + // Helpers + static void index_check(Register array, Register index); + static void index_check_without_pop(Register array, Register index); + +#endif // CPU_AARCH64_VM_TEMPLATETABLE_AARCH64_64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/vmStructs_aarch64.hpp 2021-01-25 19:32:01.179733382 +0000 @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_VMSTRUCTS_AARCH64_HPP +#define CPU_AARCH64_VM_VMSTRUCTS_AARCH64_HPP + +// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* JavaCallWrapper */ \ + /******************************/ \ + /******************************/ \ + /* JavaFrameAnchor */ \ + /******************************/ \ + volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) + + +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) + +#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // CPU_AARCH64_VM_VMSTRUCTS_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/vm_version_aarch64.cpp 2021-01-25 19:32:01.682738669 +0000 @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/java.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "vm_version_aarch64.hpp" +#ifdef TARGET_OS_FAMILY_linux +# include "os_linux.inline.hpp" +#endif + +#include +#include + +#ifndef HWCAP_AES +#define HWCAP_AES (1<<3) +#endif + +#ifndef HWCAP_SHA1 +#define HWCAP_SHA1 (1<<5) +#endif + +#ifndef HWCAP_SHA2 +#define HWCAP_SHA2 (1<<6) +#endif + +#ifndef HWCAP_CRC32 +#define HWCAP_CRC32 (1<<7) +#endif + +#ifndef HWCAP_ATOMICS +#define HWCAP_ATOMICS (1<<8) +#endif + +int VM_Version::_cpu; +int VM_Version::_model; +int VM_Version::_model2; +int VM_Version::_variant; +int VM_Version::_revision; +int VM_Version::_stepping; +int VM_Version::_cpuFeatures; +const char* VM_Version::_features_str = ""; +VM_Version::PsrInfo VM_Version::_psr_info = { 0, }; + +static BufferBlob* stub_blob; +static const int stub_size = 550; + +extern "C" { + typedef void (*getPsrInfo_stub_t)(void*); +} +static getPsrInfo_stub_t getPsrInfo_stub = NULL; + + +class VM_Version_StubGenerator: public StubCodeGenerator { + public: + + VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} + + address generate_getPsrInfo() { + StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); +# define __ _masm-> + address start = __ pc(); + + // void getPsrInfo(VM_Version::PsrInfo* psr_info); + + address entry = __ pc(); + + __ enter(); + + __ get_dczid_el0(rscratch1); + __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::dczid_el0_offset()))); + + __ get_ctr_el0(rscratch1); + __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::ctr_el0_offset()))); + + __ leave(); + __ ret(lr); + +# undef __ + + return start; + } +}; + + +void VM_Version::get_processor_features() { + _supports_cx8 = true; + _supports_atomic_getset4 = true; + _supports_atomic_getadd4 = true; + _supports_atomic_getset8 = true; + _supports_atomic_getadd8 = true; + + getPsrInfo_stub(&_psr_info); + + int dcache_line = VM_Version::dcache_line_size(); + + // Limit AllocatePrefetchDistance so that it does not exceed the + // constraint in AllocatePrefetchDistanceConstraintFunc. + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) + FLAG_SET_DEFAULT(AllocatePrefetchDistance, MIN2(512, 3*dcache_line)); + + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, dcache_line); + if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) + FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 3*dcache_line); + if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) + FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 3*dcache_line); + + if (PrefetchCopyIntervalInBytes != -1 && + ((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768))) { + warning("PrefetchCopyIntervalInBytes must be -1, or a multiple of 8 and < 32768"); + PrefetchCopyIntervalInBytes &= ~7; + if (PrefetchCopyIntervalInBytes >= 32768) + PrefetchCopyIntervalInBytes = 32760; + } + FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); + + unsigned long auxv = getauxval(AT_HWCAP); + + char buf[512]; + + strcpy(buf, "simd"); + if (auxv & HWCAP_CRC32) strcat(buf, ", crc"); + if (auxv & HWCAP_AES) strcat(buf, ", aes"); + if (auxv & HWCAP_SHA1) strcat(buf, ", sha1"); + if (auxv & HWCAP_SHA2) strcat(buf, ", sha256"); + if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse"); + + _features_str = strdup(buf); + _cpuFeatures = auxv; + + int cpu_lines = 0; + if (FILE *f = fopen("/proc/cpuinfo", "r")) { + char buf[128], *p; + while (fgets(buf, sizeof (buf), f) != NULL) { + if ((p = strchr(buf, ':')) != NULL) { + long v = strtol(p+1, NULL, 0); + if (strncmp(buf, "CPU implementer", sizeof "CPU implementer" - 1) == 0) { + _cpu = v; + cpu_lines++; + } else if (strncmp(buf, "CPU variant", sizeof "CPU variant" - 1) == 0) { + _variant = v; + } else if (strncmp(buf, "CPU part", sizeof "CPU part" - 1) == 0) { + if (_model != v) _model2 = _model; + _model = v; + } else if (strncmp(buf, "CPU revision", sizeof "CPU revision" - 1) == 0) { + _revision = v; + } + } + } + fclose(f); + } + + // Enable vendor specific features + if (_cpu == CPU_CAVIUM) { + if (_variant == 0) _cpuFeatures |= CPU_DMB_ATOMICS; + if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) { + FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true); + } + if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) { + FLAG_SET_DEFAULT(UseSIMDForMemoryOps, (_variant > 0)); + } + } + if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) _cpuFeatures |= CPU_A53MAC; + if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _cpuFeatures |= CPU_STXR_PREFETCH; + // If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07) + // we assume the worst and assume we could be on a big little system and have + // undisclosed A53 cores which we could be swapped to at any stage + if (_cpu == CPU_ARM && cpu_lines == 1 && _model == 0xd07) _cpuFeatures |= CPU_A53MAC; + + if (FLAG_IS_DEFAULT(UseCRC32)) { + UseCRC32 = (auxv & HWCAP_CRC32) != 0; + } + if (UseCRC32 && (auxv & HWCAP_CRC32) == 0) { + warning("UseCRC32 specified, but not supported on this CPU"); + } + + if (auxv & HWCAP_ATOMICS) { + if (FLAG_IS_DEFAULT(UseLSE)) + FLAG_SET_DEFAULT(UseLSE, true); + } else { + if (UseLSE) { + warning("UseLSE specified, but not supported on this CPU"); + } + } + + if (auxv & HWCAP_AES) { + UseAES = UseAES || FLAG_IS_DEFAULT(UseAES); + UseAESIntrinsics = + UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics)); + if (UseAESIntrinsics && !UseAES) { + warning("UseAESIntrinsics enabled, but UseAES not, enabling"); + UseAES = true; + } + } else { + if (UseAES) { + warning("UseAES specified, but not supported on this CPU"); + } + if (UseAESIntrinsics) { + warning("UseAESIntrinsics specified, but not supported on this CPU"); + } + } + + if (UseGHASHIntrinsics) { + warning("GHASH intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); + } + + if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { + UseCRC32Intrinsics = true; + } + + if (auxv & (HWCAP_SHA1 | HWCAP_SHA2)) { + if (FLAG_IS_DEFAULT(UseSHA)) { + FLAG_SET_DEFAULT(UseSHA, true); + } + } else if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + + if (!UseSHA) { + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } else { + if (auxv & HWCAP_SHA1) { + if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); + } + } else if (UseSHA1Intrinsics) { + warning("SHA1 instruction is not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + } + if (auxv & HWCAP_SHA2) { + if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); + } + } else if (UseSHA256Intrinsics) { + warning("SHA256 instruction (for SHA-224 and SHA-256) is not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + } + if (UseSHA512Intrinsics) { + warning("SHA512 instruction (for SHA-384 and SHA-512) is not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + } + + if (is_zva_enabled()) { + if (FLAG_IS_DEFAULT(UseBlockZeroing)) { + FLAG_SET_DEFAULT(UseBlockZeroing, true); + } + if (FLAG_IS_DEFAULT(BlockZeroingLowLimit)) { + FLAG_SET_DEFAULT(BlockZeroingLowLimit, 4 * VM_Version::zva_length()); + } + } else if (UseBlockZeroing) { + warning("DC ZVA is not available on this CPU"); + FLAG_SET_DEFAULT(UseBlockZeroing, false); + } + + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { + UseMultiplyToLenIntrinsic = true; + } + + if (FLAG_IS_DEFAULT(UseBarriersForVolatile)) { + UseBarriersForVolatile = (_cpuFeatures & CPU_DMB_ATOMICS) != 0; + } + + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { + UsePopCountInstruction = true; + } + + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + UseMontgomeryMultiplyIntrinsic = true; + } + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + UseMontgomerySquareIntrinsic = true; + } + +#ifdef COMPILER2 + if (FLAG_IS_DEFAULT(OptoScheduling)) { + OptoScheduling = true; + } +#else + if (ReservedCodeCacheSize > 128*M) { + vm_exit_during_initialization("client compiler does not support ReservedCodeCacheSize > 128M"); + } +#endif +} + +void VM_Version::initialize() { + ResourceMark rm; + + stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); + if (stub_blob == NULL) { + vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); + } + + CodeBuffer c(stub_blob); + VM_Version_StubGenerator g(&c); + getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, + g.generate_getPsrInfo()); + + get_processor_features(); +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/vm_version_aarch64.hpp 2021-01-25 19:32:02.178743882 +0000 @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_VM_VERSION_AARCH64_HPP +#define CPU_AARCH64_VM_VM_VERSION_AARCH64_HPP + +#include "runtime/globals_extension.hpp" +#include "runtime/vm_version.hpp" + +class VM_Version : public Abstract_VM_Version { +public: +protected: + static int _cpu; + static int _model; + static int _model2; + static int _variant; + static int _revision; + static int _stepping; + static int _cpuFeatures; // features returned by the "cpuid" instruction + // 0 if this instruction is not available + static const char* _features_str; + + struct PsrInfo { + uint32_t dczid_el0; + uint32_t ctr_el0; + }; + static PsrInfo _psr_info; + static void get_processor_features(); + +public: + // Initialization + static void initialize(); + + // Asserts + static void assert_is_initialized() { + } + + enum { + CPU_ARM = 'A', + CPU_BROADCOM = 'B', + CPU_CAVIUM = 'C', + CPU_DEC = 'D', + CPU_INFINEON = 'I', + CPU_MOTOROLA = 'M', + CPU_NVIDIA = 'N', + CPU_AMCC = 'P', + CPU_QUALCOM = 'Q', + CPU_MARVELL = 'V', + CPU_INTEL = 'i', + } cpuFamily; + + enum { + CPU_FP = (1<<0), + CPU_ASIMD = (1<<1), + CPU_EVTSTRM = (1<<2), + CPU_AES = (1<<3), + CPU_PMULL = (1<<4), + CPU_SHA1 = (1<<5), + CPU_SHA2 = (1<<6), + CPU_CRC32 = (1<<7), + CPU_LSE = (1<<8), + CPU_STXR_PREFETCH= (1 << 29), + CPU_A53MAC = (1 << 30), + CPU_DMB_ATOMICS = (1 << 31), + } cpuFeatureFlags; + + static const char* cpu_features() { return _features_str; } + static int cpu_family() { return _cpu; } + static int cpu_model() { return _model; } + static int cpu_variant() { return _variant; } + static int cpu_revision() { return _revision; } + static int cpu_cpuFeatures() { return _cpuFeatures; } + static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); } + static ByteSize ctr_el0_offset() { return byte_offset_of(PsrInfo, ctr_el0); } + static bool is_zva_enabled() { + // Check the DZP bit (bit 4) of dczid_el0 is zero + // and block size (bit 0~3) is not zero. + return ((_psr_info.dczid_el0 & 0x10) == 0 && + (_psr_info.dczid_el0 & 0xf) != 0); + } + static int zva_length() { + assert(is_zva_enabled(), "ZVA not available"); + return 4 << (_psr_info.dczid_el0 & 0xf); + } + static int icache_line_size() { + return (1 << (_psr_info.ctr_el0 & 0x0f)) * 4; + } + static int dcache_line_size() { + return (1 << ((_psr_info.ctr_el0 >> 16) & 0x0f)) * 4; + } +}; + +#endif // CPU_AARCH64_VM_VM_VERSION_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/vm_version_ext_aarch64.cpp 2021-01-25 19:32:02.639748727 +0000 @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "memory/allocation.hpp" +#include "memory/allocation.inline.hpp" +#include "vm_version_ext_aarch64.hpp" + +// VM_Version_Ext statics +int VM_Version_Ext::_no_of_threads = 0; +int VM_Version_Ext::_no_of_cores = 0; +int VM_Version_Ext::_no_of_sockets = 0; +bool VM_Version_Ext::_initialized = false; +char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; +char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; + +void VM_Version_Ext::initialize_cpu_information(void) { + // do nothing if cpu info has been initialized + if (_initialized) { + return; + } + + int core_id = -1; + int chip_id = -1; + int len = 0; + char* src_string = NULL; + + _no_of_cores = os::processor_count(); + _no_of_threads = _no_of_cores; + _no_of_sockets = _no_of_cores; + snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "AArch64"); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "AArch64 %s", cpu_features()); + _initialized = true; +} + +int VM_Version_Ext::number_of_threads(void) { + initialize_cpu_information(); + return _no_of_threads; +} + +int VM_Version_Ext::number_of_cores(void) { + initialize_cpu_information(); + return _no_of_cores; +} + +int VM_Version_Ext::number_of_sockets(void) { + initialize_cpu_information(); + return _no_of_sockets; +} + +const char* VM_Version_Ext::cpu_name(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); + return tmp; +} + +const char* VM_Version_Ext::cpu_description(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); + return tmp; +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/vm_version_ext_aarch64.hpp 2021-01-25 19:32:03.097753541 +0000 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_VM_VERSION_EXT_AARCH64_HPP +#define CPU_AARCH64_VM_VM_VERSION_EXT_AARCH64_HPP + +#include "utilities/macros.hpp" +#include "vm_version_aarch64.hpp" + +class VM_Version_Ext : public VM_Version { + private: + static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; + static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; + + static int _no_of_threads; + static int _no_of_cores; + static int _no_of_sockets; + static bool _initialized; + static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; + static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; + + public: + static int number_of_threads(void); + static int number_of_cores(void); + static int number_of_sockets(void); + + static const char* cpu_name(void); + static const char* cpu_description(void); + static void initialize_cpu_information(void); + +}; + +#endif // CPU_AARCH64_VM_VM_VERSION_EXT_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/vmreg_aarch64.cpp 2021-01-25 19:32:03.523758018 +0000 @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "code/vmreg.hpp" + + + +void VMRegImpl::set_regName() { + Register reg = ::as_Register(0); + int i; + for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { + regName[i++] = reg->name(); + regName[i++] = reg->name(); + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { + regName[i++] = freg->name(); + regName[i++] = freg->name(); + freg = freg->successor(); + } + + for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { + regName[i] = "NON-GPR-FPR"; + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/vmreg_aarch64.hpp 2021-01-25 19:32:03.995762979 +0000 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_VMREG_AARCH64_HPP +#define CPU_AARCH64_VM_VMREG_AARCH64_HPP + + bool is_Register(); + Register as_Register(); + + bool is_FloatRegister(); + FloatRegister as_FloatRegister(); +#endif // CPU_AARCH64_VM_VMREG_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/vmreg_aarch64.inline.hpp 2021-01-25 19:32:04.422767467 +0000 @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_VMREG_AARCH64_INLINE_HPP +#define CPU_AARCH64_VM_VMREG_AARCH64_INLINE_HPP + +inline VMReg RegisterImpl::as_VMReg() { + if( this==noreg ) return VMRegImpl::Bad(); + return VMRegImpl::as_VMReg(encoding() << 1 ); +} + +inline VMReg FloatRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); +} + +inline bool VMRegImpl::is_Register() { + return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; +} + +inline bool VMRegImpl::is_FloatRegister() { + return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; +} + +inline Register VMRegImpl::as_Register() { + + assert( is_Register(), "must be"); + // Yuk + return ::as_Register(value() >> 1); +} + +inline FloatRegister VMRegImpl::as_FloatRegister() { + assert( is_FloatRegister() && is_even(value()), "must be" ); + // Yuk + return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); +} + +inline bool VMRegImpl::is_concrete() { + assert(is_reg(), "must be"); + return is_even(value()); +} + +#endif // CPU_AARCH64_VM_VMREG_AARCH64_INLINE_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp 2021-01-25 19:32:04.850771966 +0000 @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2013, Red Hat Inc. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. + * All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "assembler_aarch64.inline.hpp" +#include "code/vtableStubs.hpp" +#include "interp_masm_aarch64.hpp" +#include "memory/resourceArea.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/instanceKlass.hpp" +#include "oops/klassVtable.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_aarch64.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +// machine-dependent part of VtableStubs: create VtableStub of correct size and +// initialize its code + +#define __ masm-> + +#ifndef PRODUCT +extern "C" void bad_compiled_vtable_index(JavaThread* thread, + oop receiver, + int index); +#endif + +VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { + const int aarch64_code_length = VtableStub::pd_code_size_limit(true); + VtableStub* s = new(aarch64_code_length) VtableStub(true, vtable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), aarch64_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + +#ifndef PRODUCT + if (CountCompiledCalls) { + __ lea(r16, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + __ incrementw(Address(r16)); + } +#endif + + // get receiver (need to skip return address on top of stack) + assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + + // get receiver klass + address npe_addr = __ pc(); + __ load_klass(r16, j_rarg0); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + // check offset vs vtable length + __ ldrw(rscratch1, Address(r16, InstanceKlass::vtable_length_offset() * wordSize)); + __ cmpw(rscratch1, vtable_index * vtableEntry::size()); + __ br(Assembler::GT, L); + __ enter(); + __ mov(r2, vtable_index); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2); + __ leave(); + __ bind(L); + } +#endif // PRODUCT + + __ lookup_virtual_method(r16, vtable_index, rmethod); + + if (DebugVtables) { + Label L; + __ cbz(rmethod, L); + __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ cbnz(rscratch1, L); + __ stop("Vtable entry is NULL"); + __ bind(L); + } + // r0: receiver klass + // rmethod: Method* + // r2: receiver + address ame_addr = __ pc(); + __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ br(rscratch1); + + __ flush(); + + if (PrintMiscellaneous && (WizardMode || Verbose)) { + tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d", + vtable_index, p2i(s->entry_point()), + (int)(s->code_end() - s->entry_point()), + (int)(s->code_end() - __ pc())); + } + guarantee(__ pc() <= s->code_end(), "overflowed buffer"); + + s->set_exception_points(npe_addr, ame_addr); + return s; +} + + +VtableStub* VtableStubs::create_itable_stub(int itable_index) { + // Note well: pd_code_size_limit is the absolute minimum we can get + // away with. If you add code here, bump the code stub size + // returned by pd_code_size_limit! + const int code_length = VtableStub::pd_code_size_limit(false); + VtableStub* s = new(code_length) VtableStub(false, itable_index); + ResourceMark rm; + CodeBuffer cb(s->entry_point(), code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + +#ifndef PRODUCT + if (CountCompiledCalls) { + __ lea(r10, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + __ incrementw(Address(r10)); + } +#endif + + // Entry arguments: + // rscratch2: CompiledICHolder + // j_rarg0: Receiver + + // Most registers are in use; we'll use r16, rmethod, r10, r11 + const Register recv_klass_reg = r10; + const Register holder_klass_reg = r16; // declaring interface klass (DECC) + const Register resolved_klass_reg = rmethod; // resolved interface klass (REFC) + const Register temp_reg = r11; + const Register icholder_reg = rscratch2; + + __ ldr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset())); + __ ldr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset())); + + Label L_no_such_interface; + + // get receiver klass (also an implicit null-check) + assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + address npe_addr = __ pc(); + __ load_klass(recv_klass_reg, j_rarg0); + + // Receiver subtype check against REFC. + // Destroys recv_klass_reg value. + __ lookup_interface_method(// inputs: rec. class, interface + recv_klass_reg, resolved_klass_reg, noreg, + // outputs: scan temp. reg1, scan temp. reg2 + recv_klass_reg, temp_reg, + L_no_such_interface, + /*return_method=*/false); + + // Get selected method from declaring class and itable index + __ load_klass(recv_klass_reg, j_rarg0); // restore recv_klass_reg + __ lookup_interface_method(// inputs: rec. class, interface, itable index + recv_klass_reg, holder_klass_reg, itable_index, + // outputs: method, scan temp. reg + rmethod, temp_reg, + L_no_such_interface); + + // method (rmethod): Method* + // j_rarg0: receiver + +#ifdef ASSERT + if (DebugVtables) { + Label L2; + __ cbz(rmethod, L2); + __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ cbnz(rscratch1, L2); + __ stop("compiler entrypoint is null"); + __ bind(L2); + } +#endif // ASSERT + + // rmethod: Method* + // j_rarg0: receiver + address ame_addr = __ pc(); + __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ br(rscratch1); + + __ bind(L_no_such_interface); + __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); + + __ flush(); + + if (PrintMiscellaneous && (WizardMode || Verbose)) { + tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d", + itable_index, p2i(s->entry_point()), + (int)(s->code_end() - s->entry_point()), + (int)(s->code_end() - __ pc())); + } + guarantee(__ pc() <= s->code_end(), "overflowed buffer"); + + s->set_exception_points(npe_addr, ame_addr); + return s; +} + + +int VtableStub::pd_code_size_limit(bool is_vtable_stub) { + int size = DebugVtables ? 216 : 0; + if (CountCompiledCalls) + size += 6 * 4; + // FIXME: vtable stubs only need 36 bytes + if (is_vtable_stub) + size += 52; + else + size += 176; + return size; + + // In order to tune these parameters, run the JVM with VM options + // +PrintMiscellaneous and +WizardMode to see information about + // actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops. + // + // If Universe::narrow_klass_base is nonzero, decoding a compressed + // class can take several instructions. + // + // The JVM98 app. _202_jess has a megamorphic interface call. + // The itable code looks like this: + + // ldr xmethod, [xscratch2,#CompiledICHolder::holder_klass_offset] + // ldr x0, [xscratch2] + // ldr w10, [x1,#oopDesc::klass_offset_in_bytes] + // mov xheapbase, #0x3c000000 // #narrow_klass_base + // movk xheapbase, #0x3f7, lsl #32 + // add x10, xheapbase, x10 + // mov xheapbase, #0xe7ff0000 // #heapbase + // movk xheapbase, #0x3f7, lsl #32 + // ldr w11, [x10,#vtable_length_offset] + // add x11, x10, x11, uxtx #3 + // add x11, x11, #itableMethodEntry::method_offset_in_bytes + // ldr x10, [x11] + // cmp xmethod, x10 + // b.eq success + // search: + // cbz x10, no_such_interface + // add x11, x11, #0x10 + // ldr x10, [x11] + // cmp xmethod, x10 + // b.ne search + // success: + // ldr w10, [x1,#oopDesc::klass_offset_in_bytes] + // mov xheapbase, #0x3c000000 // #narrow_klass_base + // movk xheapbase, #0x3f7, lsl #32 + // add x10, xheapbase, x10 + // mov xheapbase, #0xe7ff0000 // #heapbase + // movk xheapbase, #0x3f7, lsl #32 + // ldr w11, [x10,#vtable_length_offset] + // add x11, x10, x11, uxtx #3 + // add x11, x11, #itableMethodEntry::method_offset_in_bytes + // add x10, x10, #itentry_off + // ldr xmethod, [x11] + // cmp x0, xmethod + // b.eq found_method + // search2: + // cbz xmethod, 0x000003ffa872e6cc + // add x11, x11, #0x10 + // ldr xmethod, [x11] + // cmp x0, xmethod + // b.ne search2 + // ldr w11, [x11,#itableOffsetEntry::offset_offset_in_bytes] + // ldr xmethod, [x10,w11,uxtw] + // ldr xscratch1, [xmethod,#Method::from_compiled_offset] + // br xscratch1 + // no_such_interface: + // b throw_ICCE_entry + +} + +int VtableStub::pd_code_alignment() { return 4; } --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/assembler_linux_aarch64.cpp 2021-01-25 19:32:05.329777000 +0000 @@ -0,0 +1,53 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "runtime/os.hpp" +#include "runtime/threadLocalStorage.hpp" + + +// get_thread can be called anywhere inside generated code so we need +// to save whatever non-callee save context might get clobbered by the +// call to the C thread_local lookup call or, indeed, the call setup +// code. x86 appears to save C arg registers. + +void MacroAssembler::get_thread(Register dst) { + // call pthread_getspecific + // void * pthread_getspecific(pthread_key_t key); + + // Save all call-clobbered regs except dst, plus r19 and r20. + RegSet saved_regs = RegSet::range(r0, r20) + lr - dst; + push(saved_regs, sp); + mov(c_rarg0, ThreadLocalStorage::thread_index()); + mov(r19, CAST_FROM_FN_PTR(address, pthread_getspecific)); + blr(r19); + if (dst != c_rarg0) { + mov(dst, c_rarg0); + } + // restore pushed registers + pop(saved_regs, sp); +} + --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp 2021-01-25 19:32:05.779781730 +0000 @@ -0,0 +1,143 @@ +/* + * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH64_VM_ATOMIC_LINUX_AARCH64_INLINE_HPP +#define OS_CPU_LINUX_AARCH64_VM_ATOMIC_LINUX_AARCH64_INLINE_HPP + +#include "runtime/atomic.hpp" +#include "runtime/os.hpp" +#include "vm_version_aarch64.hpp" + +// Implementation of class atomic + +#define FULL_MEM_BARRIER __sync_synchronize() +#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); +#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); + +inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } +inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } +inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } +inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; } +inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; } + +inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; } +inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; } +inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; } +inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; } +inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; } + + +inline jint Atomic::add(jint add_value, volatile jint* dest) +{ + return __sync_add_and_fetch(dest, add_value); +} + +inline void Atomic::inc(volatile jint* dest) +{ + add(1, dest); +} + +inline void Atomic::inc_ptr(volatile void* dest) +{ + add_ptr(1, dest); +} + +inline void Atomic::dec (volatile jint* dest) +{ + add(-1, dest); +} + +inline void Atomic::dec_ptr(volatile void* dest) +{ + add_ptr(-1, dest); +} + +inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) +{ + jint res = __sync_lock_test_and_set (dest, exchange_value); + FULL_MEM_BARRIER; + return res; +} + +inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) +{ + return (void *) xchg_ptr((intptr_t) exchange_value, + (volatile intptr_t*) dest); +} + +inline jint Atomic::cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value) +{ + return __sync_val_compare_and_swap(dest, compare_value, exchange_value); +} + +inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; } +inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; } + +inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) +{ + return __sync_add_and_fetch(dest, add_value); +} + +inline void* Atomic::add_ptr(intptr_t add_value, volatile void* dest) +{ + return (void *) add_ptr(add_value, (volatile intptr_t *) dest); +} + +inline void Atomic::inc_ptr(volatile intptr_t* dest) +{ + add_ptr(1, dest); +} + +inline void Atomic::dec_ptr(volatile intptr_t* dest) +{ + add_ptr(-1, dest); +} + +inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) +{ + intptr_t res = __sync_lock_test_and_set (dest, exchange_value); + FULL_MEM_BARRIER; + return res; +} + +inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value) +{ + return __sync_val_compare_and_swap(dest, compare_value, exchange_value); +} + +inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) +{ + return __sync_val_compare_and_swap(dest, compare_value, exchange_value); +} + +inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) +{ + return (void *) cmpxchg_ptr((intptr_t) exchange_value, + (volatile intptr_t*) dest, + (intptr_t) compare_value); +} + +inline jlong Atomic::load(volatile jlong* src) { return *src; } + +#endif // OS_CPU_LINUX_AARCH64_VM_ATOMIC_LINUX_AARCH64_INLINE_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/bytes_linux_aarch64.inline.hpp 2021-01-25 19:32:06.224786407 +0000 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH64_VM_BYTES_LINUX_AARCH64_INLINE_HPP +#define OS_CPU_LINUX_AARCH64_VM_BYTES_LINUX_AARCH64_INLINE_HPP + +#include + +// Efficient swapping of data bytes from Java byte +// ordering to native byte ordering and vice versa. +inline u2 Bytes::swap_u2(u2 x) { + return bswap_16(x); +} + +inline u4 Bytes::swap_u4(u4 x) { + return bswap_32(x); +} + +inline u8 Bytes::swap_u8(u8 x) { + return bswap_64(x); +} + +#endif // OS_CPU_LINUX_AARCH64_VM_BYTES_LINUX_AARCH64_INLINE_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/copy_linux_aarch64.inline.hpp 2021-01-25 19:32:06.668791074 +0000 @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH64_VM_COPY_LINUX_AARCH64_INLINE_HPP +#define OS_CPU_LINUX_AARCH64_VM_COPY_LINUX_AARCH64_INLINE_HPP + +#define COPY_SMALL(from, to, count) \ +{ \ + long tmp0, tmp1, tmp2, tmp3; \ + long tmp4, tmp5, tmp6, tmp7; \ + __asm volatile( \ +" adr %[t0], 0f;" \ +" add %[t0], %[t0], %[cnt], lsl #6;" \ +" br %[t0];" \ +" .align 6;" \ +"0:" \ +" b 1f;" \ +"8:" \ +" ldr %[t0], [%[s], #0];" \ +" ldp %[t1], %[t2], [%[s], #8];" \ +" ldp %[t3], %[t4], [%[s], #24];" \ +" ldr %[t5], [%[s], #40];" \ +" tbz %[d], #3, 10f;" \ +"9:" \ +" str %[t0], [%[d], #0];" \ +" stp %[t1], %[t2], [%[d], #8];" \ +" stp %[t3], %[t4], [%[d], #24];" \ +" str %[t5], [%[d], #40];" \ +" b 1f;" \ +" .align 6;" \ +" ldr %[t0], [%[s], #0];" \ +" str %[t0], [%[d], #0];" \ +" b 1f;" \ +"2:" \ +" ldr %[t0], [%[s], #0];" \ +" ldp %[t1], %[t2], [%[s], #8];" \ +" ldp %[t3], %[t4], [%[s], #24];" \ +" ldp %[t5], %[t6], [%[s], #40];" \ +" ldr %[t7], [%[s], #56];" \ +" tbz %[d], #3, 4f;" \ +"3:" \ +" str %[t0], [%[d], #0];" \ +" stp %[t1], %[t2], [%[d], #8];" \ +" stp %[t3], %[t4], [%[d], #24];" \ +" stp %[t5], %[t6], [%[d], #40];" \ +" str %[t7], [%[d], #56];" \ +" b 1f;" \ +" .align 6;" \ +" ldr %[t0], [%[s], #0];" \ +" ldr %[t1], [%[s], #8];" \ +" str %[t0], [%[d], #0];" \ +" str %[t1], [%[d], #8];" \ +" b 1f;" \ +"5:" \ +" ldr %[t0], [%[s], #0];" \ +" ldp %[t1], %[t2], [%[s], #8];" \ +" ldp %[t3], %[t4], [%[s], #24];" \ +" ldp %[t5], %[t6], [%[s], #40];" \ +" tbz %[d], #3, 7f;" \ +"6:" \ +" str %[t0], [%[d], #0];" \ +" stp %[t1], %[t2], [%[d], #8];" \ +" stp %[t3], %[t4], [%[d], #24];" \ +" stp %[t5], %[t6], [%[d], #40];" \ +" b 1f;" \ +" .align 6;" \ +" ldr %[t0], [%[s], #0];" \ +" ldr %[t1], [%[s], #8];" \ +" ldr %[t2], [%[s], #16];" \ +" str %[t0], [%[d], #0];" \ +" str %[t1], [%[d], #8];" \ +" str %[t2], [%[d], #16];" \ +" b 1f;" \ +" .align 6;" \ +" ldr %[t0], [%[s], #0];" \ +" ldr %[t1], [%[s], #8];" \ +" ldr %[t2], [%[s], #16];" \ +" ldr %[t3], [%[s], #24];" \ +" str %[t0], [%[d], #0];" \ +" str %[t1], [%[d], #8];" \ +" str %[t2], [%[d], #16];" \ +" str %[t3], [%[d], #24];" \ +" b 1f;" \ +" .align 6;" \ +" ldr %[t0], [%[s], #0];" \ +" ldr %[t1], [%[s], #8];" \ +" ldr %[t2], [%[s], #16];" \ +" ldr %[t3], [%[s], #24];" \ +" ldr %[t4], [%[s], #32];" \ +" str %[t0], [%[d], #0];" \ +" str %[t1], [%[d], #8];" \ +" str %[t2], [%[d], #16];" \ +" str %[t3], [%[d], #24];" \ +" str %[t4], [%[d], #32];" \ +" b 1f;" \ +" .align 6;" \ +" tbnz %[s], #3, 8b;" \ +" ldp %[t0], %[t1], [%[s], #0];" \ +" ldp %[t2], %[t3], [%[s], #16];" \ +" ldp %[t4], %[t5], [%[s], #32];" \ +" tbnz %[d], #3, 9b;" \ +"10:" \ +" stp %[t0], %[t1], [%[d], #0];" \ +" stp %[t2], %[t3], [%[d], #16];" \ +" stp %[t4], %[t5], [%[d], #32];" \ +" b 1f;" \ +" .align 6;" \ +" tbnz %[s], #3, 5b;" \ +" ldp %[t0], %[t1], [%[s], #0];" \ +" ldp %[t2], %[t3], [%[s], #16];" \ +" ldp %[t4], %[t5], [%[s], #32];" \ +" ldr %[t6], [%[s], #48];" \ +" tbnz %[d], #3, 6b;" \ +"7:" \ +" stp %[t0], %[t1], [%[d], #0];" \ +" stp %[t2], %[t3], [%[d], #16];" \ +" stp %[t4], %[t5], [%[d], #32];" \ +" str %[t6], [%[d], #48];" \ +" b 1f;" \ +" .align 6;" \ +" tbnz %[s], #3, 2b;" \ +" ldp %[t0], %[t1], [%[s], #0];" \ +" ldp %[t2], %[t3], [%[s], #16];" \ +" ldp %[t4], %[t5], [%[s], #32];" \ +" ldp %[t6], %[t7], [%[s], #48];" \ +" tbnz %[d], #3, 3b;" \ +"4:" \ +" stp %[t0], %[t1], [%[d], #0];" \ +" stp %[t2], %[t3], [%[d], #16];" \ +" stp %[t4], %[t5], [%[d], #32];" \ +" stp %[t6], %[t7], [%[d], #48];" \ +"1:" \ + : [s]"+r"(from), [d]"+r"(to), [cnt]"+r"(count), \ + [t0]"=&r"(tmp0), [t1]"=&r"(tmp1), [t2]"=&r"(tmp2), [t3]"=&r"(tmp3), \ + [t4]"=&r"(tmp4), [t5]"=&r"(tmp5), [t6]"=&r"(tmp6), [t7]"=&r"(tmp7) \ + : \ + : "memory", "cc"); \ +} + +static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { + __asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory"); + if (__builtin_expect(count <= 8, 1)) { + COPY_SMALL(from, to, count); + return; + } + _Copy_conjoint_words(from, to, count); +} + +static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { + if (__builtin_constant_p(count)) { + memcpy(to, from, count * sizeof(HeapWord)); + return; + } + __asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory"); + if (__builtin_expect(count <= 8, 1)) { + COPY_SMALL(from, to, count); + return; + } + _Copy_disjoint_words(from, to, count); +} + +static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) { + __asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory"); + if (__builtin_expect(count <= 8, 1)) { + COPY_SMALL(from, to, count); + return; + } + _Copy_disjoint_words(from, to, count); +} + +static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_words(from, to, count); +} + +static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { + pd_disjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) { + pd_conjoint_bytes(from, to, count); +} + +static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { + _Copy_conjoint_jshorts_atomic(from, to, count); +} + +static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) { + _Copy_conjoint_jints_atomic(from, to, count); +} + +static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) { + _Copy_conjoint_jlongs_atomic(from, to, count); +} + +static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) { + assert(!UseCompressedOops, "foo!"); + assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); + _Copy_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); +} + +static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_bytes(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_jshorts(from, to, count); +} + +static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_jints(from, to, count); +} + +static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_jlongs(from, to, count); +} + +static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) { + assert(!UseCompressedOops, "foo!"); + assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); + _Copy_arrayof_conjoint_jlongs(from, to, count); +} + +#endif // OS_CPU_LINUX_AARCH64_VM_COPY_LINUX_AARCH64_INLINE_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/copy_linux_aarch64.s 2021-01-25 19:32:07.081795415 +0000 @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2016, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + .global _Copy_conjoint_words + .global _Copy_disjoint_words + +s .req x0 +d .req x1 +count .req x2 +t0 .req x3 +t1 .req x4 +t2 .req x5 +t3 .req x6 +t4 .req x7 +t5 .req x8 +t6 .req x9 +t7 .req x10 + + .align 6 +_Copy_disjoint_words: + // Ensure 2 word aligned + tbz s, #3, fwd_copy_aligned + ldr t0, [s], #8 + str t0, [d], #8 + sub count, count, #1 + +fwd_copy_aligned: + ldp t0, t1, [s, #0] + ldp t2, t3, [s, #16] + ldp t4, t5, [s, #32] + ldp t6, t7, [s, #48]! // Source now biased by -16 + + tbnz d, #3, unal_fwd_copy + sub d, d, #16 // and bias dest + + subs count, count, #16 + blo fwd_copy_drain + +fwd_copy_again: + prfm pldl1keep, [s, #256] + stp t0, t1, [d, #16] + ldp t0, t1, [s, #16] + stp t2, t3, [d, #32] + ldp t2, t3, [s, #32] + stp t4, t5, [d, #48] + ldp t4, t5, [s, #48] + stp t6, t7, [d, #64]! + ldp t6, t7, [s, #64]! + subs count, count, #8 + bhs fwd_copy_again + +fwd_copy_drain: + stp t0, t1, [d, #16] + stp t2, t3, [d, #32] + stp t4, t5, [d, #48] + stp t6, t7, [d, #64]! + + // count is now -8..-1 for 0..7 words to copy + adr t0, 0f + add t0, t0, count, lsl #5 + br t0 + + .align 5 + ret // -8 == 0 words + .align 5 + ldr t0, [s, #16] // -7 == 1 word + str t0, [d, #16] + ret + .align 5 + ldp t0, t1, [s, #16] // -6 = 2 words + stp t0, t1, [d, #16] + ret + .align 5 + ldp t0, t1, [s, #16] // -5 = 3 words + ldr t2, [s, #32] + stp t0, t1, [d, #16] + str t2, [d, #32] + ret + .align 5 + ldp t0, t1, [s, #16] // -4 = 4 words + ldp t2, t3, [s, #32] + stp t0, t1, [d, #16] + stp t2, t3, [d, #32] + ret + .align 5 + ldp t0, t1, [s, #16] // -3 = 5 words + ldp t2, t3, [s, #32] + ldr t4, [s, #48] + stp t0, t1, [d, #16] + stp t2, t3, [d, #32] + str t4, [d, #48] + ret + .align 5 + ldp t0, t1, [s, #16] // -2 = 6 words + ldp t2, t3, [s, #32] + ldp t4, t5, [s, #48] + stp t0, t1, [d, #16] + stp t2, t3, [d, #32] + stp t4, t5, [d, #48] + ret + .align 5 + ldp t0, t1, [s, #16] // -1 = 7 words + ldp t2, t3, [s, #32] + ldp t4, t5, [s, #48] + ldr t6, [s, #64] + stp t0, t1, [d, #16] + stp t2, t3, [d, #32] + stp t4, t5, [d, #48] + str t6, [d, #64] + // Is always aligned here, code for 7 words is one instruction + // too large so it just falls through. + .align 5 +0: + ret + +unal_fwd_copy: + // Bias dest so we only pre index on the last copy + sub d, d, #8 + subs count, count, #16 + blo unal_fwd_copy_drain + +unal_fwd_copy_again: + prfm pldl1keep, [s, #256] + str t0, [d, #8] + stp t1, t2, [d, #16] + ldp t0, t1, [s, #16] + stp t3, t4, [d, #32] + ldp t2, t3, [s, #32] + stp t5, t6, [d, #48] + ldp t4, t5, [s, #48] + str t7, [d, #64]! + ldp t6, t7, [s, #64]! + subs count, count, #8 + bhs unal_fwd_copy_again + +unal_fwd_copy_drain: + str t0, [d, #8] + stp t1, t2, [d, #16] + stp t3, t4, [d, #32] + stp t5, t6, [d, #48] + str t7, [d, #64]! + + // count is now -8..-1 for 0..7 words to copy + adr t0, 0f + add t0, t0, count, lsl #5 + br t0 + + .align 5 + ret // -8 == 0 words + .align 5 + ldr t0, [s, #16] // -7 == 1 word + str t0, [d, #8] + ret + .align 5 + ldp t0, t1, [s, #16] // -6 = 2 words + str t0, [d, #8] + str t1, [d, #16] + ret + .align 5 + ldp t0, t1, [s, #16] // -5 = 3 words + ldr t2, [s, #32] + str t0, [d, #8] + stp t1, t2, [d, #16] + ret + .align 5 + ldp t0, t1, [s, #16] // -4 = 4 words + ldp t2, t3, [s, #32] + str t0, [d, #8] + stp t1, t2, [d, #16] + str t3, [d, #32] + ret + .align 5 + ldp t0, t1, [s, #16] // -3 = 5 words + ldp t2, t3, [s, #32] + ldr t4, [s, #48] + str t0, [d, #8] + stp t1, t2, [d, #16] + stp t3, t4, [d, #32] + ret + .align 5 + ldp t0, t1, [s, #16] // -2 = 6 words + ldp t2, t3, [s, #32] + ldp t4, t5, [s, #48] + str t0, [d, #8] + stp t1, t2, [d, #16] + stp t3, t4, [d, #32] + str t5, [d, #48] + ret + .align 5 + ldp t0, t1, [s, #16] // -1 = 7 words + ldp t2, t3, [s, #32] + ldp t4, t5, [s, #48] + ldr t6, [s, #64] + str t0, [d, #8] + stp t1, t2, [d, #16] + stp t3, t4, [d, #32] + stp t5, t6, [d, #48] + // Is always aligned here, code for 7 words is one instruction + // too large so it just falls through. + .align 5 +0: + ret + + .align 6 +_Copy_conjoint_words: + sub t0, d, s + cmp t0, count, lsl #3 + bhs _Copy_disjoint_words + + add s, s, count, lsl #3 + add d, d, count, lsl #3 + + // Ensure 2 word aligned + tbz s, #3, bwd_copy_aligned + ldr t0, [s, #-8]! + str t0, [d, #-8]! + sub count, count, #1 + +bwd_copy_aligned: + ldp t0, t1, [s, #-16] + ldp t2, t3, [s, #-32] + ldp t4, t5, [s, #-48] + ldp t6, t7, [s, #-64]! + + tbnz d, #3, unal_bwd_copy + + subs count, count, #16 + blo bwd_copy_drain + +bwd_copy_again: + prfum pldl1keep, [s, #-256] + stp t0, t1, [d, #-16] + ldp t0, t1, [s, #-16] + stp t2, t3, [d, #-32] + ldp t2, t3, [s, #-32] + stp t4, t5, [d, #-48] + ldp t4, t5, [s, #-48] + stp t6, t7, [d, #-64]! + ldp t6, t7, [s, #-64]! + subs count, count, #8 + bhs bwd_copy_again + +bwd_copy_drain: + stp t0, t1, [d, #-16] + stp t2, t3, [d, #-32] + stp t4, t5, [d, #-48] + stp t6, t7, [d, #-64]! + + // count is now -8..-1 for 0..7 words to copy + adr t0, 0f + add t0, t0, count, lsl #5 + br t0 + + .align 5 + ret // -8 == 0 words + .align 5 + ldr t0, [s, #-8] // -7 == 1 word + str t0, [d, #-8] + ret + .align 5 + ldp t0, t1, [s, #-16] // -6 = 2 words + stp t0, t1, [d, #-16] + ret + .align 5 + ldp t0, t1, [s, #-16] // -5 = 3 words + ldr t2, [s, #-24] + stp t0, t1, [d, #-16] + str t2, [d, #-24] + ret + .align 5 + ldp t0, t1, [s, #-16] // -4 = 4 words + ldp t2, t3, [s, #-32] + stp t0, t1, [d, #-16] + stp t2, t3, [d, #-32] + ret + .align 5 + ldp t0, t1, [s, #-16] // -3 = 5 words + ldp t2, t3, [s, #-32] + ldr t4, [s, #-40] + stp t0, t1, [d, #-16] + stp t2, t3, [d, #-32] + str t4, [d, #-40] + ret + .align 5 + ldp t0, t1, [s, #-16] // -2 = 6 words + ldp t2, t3, [s, #-32] + ldp t4, t5, [s, #-48] + stp t0, t1, [d, #-16] + stp t2, t3, [d, #-32] + stp t4, t5, [d, #-48] + ret + .align 5 + ldp t0, t1, [s, #-16] // -1 = 7 words + ldp t2, t3, [s, #-32] + ldp t4, t5, [s, #-48] + ldr t6, [s, #-56] + stp t0, t1, [d, #-16] + stp t2, t3, [d, #-32] + stp t4, t5, [d, #-48] + str t6, [d, #-56] + // Is always aligned here, code for 7 words is one instruction + // too large so it just falls through. + .align 5 +0: + ret + +unal_bwd_copy: + subs count, count, #16 + blo unal_bwd_copy_drain + +unal_bwd_copy_again: + prfm pldl1keep, [s, #-256] + str t1, [d, #-8] + stp t3, t0, [d, #-24] + ldp t0, t1, [s, #-16] + stp t5, t2, [d, #-40] + ldp t2, t3, [s, #-32] + stp t7, t4, [d, #-56] + ldp t4, t5, [s, #-48] + str t6, [d, #-64]! + ldp t6, t7, [s, #-64]! + subs count, count, #8 + bhs unal_bwd_copy_again + +unal_bwd_copy_drain: + str t1, [d, #-8] + stp t3, t0, [d, #-24] + stp t5, t2, [d, #-40] + stp t7, t4, [d, #-56] + str t6, [d, #-64]! + + // count is now -8..-1 for 0..7 words to copy + adr t0, 0f + add t0, t0, count, lsl #5 + br t0 + + .align 5 + ret // -8 == 0 words + .align 5 + ldr t0, [s, #-8] // -7 == 1 word + str t0, [d, #-8] + ret + .align 5 + ldp t0, t1, [s, #-16] // -6 = 2 words + str t1, [d, #-8] + str t0, [d, #-16] + ret + .align 5 + ldp t0, t1, [s, #-16] // -5 = 3 words + ldr t2, [s, #-24] + str t1, [d, #-8] + stp t2, t0, [d, #-24] + ret + .align 5 + ldp t0, t1, [s, #-16] // -4 = 4 words + ldp t2, t3, [s, #-32] + str t1, [d, #-8] + stp t3, t0, [d, #-24] + str t2, [d, #-32] + ret + .align 5 + ldp t0, t1, [s, #-16] // -3 = 5 words + ldp t2, t3, [s, #-32] + ldr t4, [s, #-40] + str t1, [d, #-8] + stp t3, t0, [d, #-24] + stp t4, t2, [d, #-40] + ret + .align 5 + ldp t0, t1, [s, #-16] // -2 = 6 words + ldp t2, t3, [s, #-32] + ldp t4, t5, [s, #-48] + str t1, [d, #-8] + stp t3, t0, [d, #-24] + stp t5, t2, [d, #-40] + str t4, [d, #-48] + ret + .align 5 + ldp t0, t1, [s, #-16] // -1 = 7 words + ldp t2, t3, [s, #-32] + ldp t4, t5, [s, #-48] + ldr t6, [s, #-56] + str t1, [d, #-8] + stp t3, t0, [d, #-24] + stp t5, t2, [d, #-40] + stp t6, t4, [d, #-56] + // Is always aligned here, code for 7 words is one instruction + // too large so it just falls through. + .align 5 +0: + ret --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/globals_linux_aarch64.hpp 2021-01-25 19:32:07.548800323 +0000 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH64_VM_GLOBALS_LINUX_AARCH64_HPP +#define OS_CPU_LINUX_AARCH64_VM_GLOBALS_LINUX_AARCH64_HPP + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, DontYieldALot, false); +define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default +define_pd_global(intx, VMThreadStackSize, 2048); + +define_pd_global(intx, CompilerThreadStackSize, 0); + +define_pd_global(uintx,JVMInvokeMethodSlack, 8192); + +// Used on 64 bit platforms for UseCompressedOops base address +define_pd_global(uintx,HeapBaseMinAddress, 2*G); + +extern __thread Thread *aarch64_currentThread; + +#endif // OS_CPU_LINUX_AARCH64_VM_GLOBALS_LINUX_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/linux_aarch64.ad 2021-01-25 19:32:07.999805063 +0000 @@ -0,0 +1,68 @@ +// +// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// AMD64 Linux Architecture Description File + +//----------OS-DEPENDENT ENCODING BLOCK---------------------------------------- +// This block specifies the encoding classes used by the compiler to +// output byte streams. Encoding classes generate functions which are +// called by Machine Instruction Nodes in order to generate the bit +// encoding of the instruction. Operands specify their base encoding +// interface with the interface keyword. There are currently +// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & +// COND_INTER. REG_INTER causes an operand to generate a function +// which returns its register number when queried. CONST_INTER causes +// an operand to generate a function which returns the value of the +// constant when queried. MEMORY_INTER causes an operand to generate +// four functions which return the Base Register, the Index Register, +// the Scale Value, and the Offset Value of the operand when queried. +// COND_INTER causes an operand to generate six functions which return +// the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional +// instruction. Instructions specify two basic values for encoding. +// They use the ins_encode keyword to specify their encoding class +// (which must be one of the class names specified in the encoding +// block), and they use the opcode keyword to specify, in order, their +// primary, secondary, and tertiary opcode. Only the opcode sections +// which a particular instruction needs for encoding need to be +// specified. +encode %{ + // Build emit functions for each basic byte or larger field in the intel + // encoding scheme (opcode, rm, sib, immediate), and call them from C++ + // code in the enc_class source block. Emit functions will live in the + // main source block for now. In future, we can generalize this by + // adding a syntax that specifies the sizes of fields in an order, + // so that the adlc can build the emit functions automagically + + enc_class Java_To_Runtime(method meth) %{ + %} + +%} + + +// Platform dependent source + +source %{ + +%} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp 2021-01-25 19:32:08.451809814 +0000 @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright 2007, 2008, 2009 Red Hat, Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH64_VM_ORDERACCESS_LINUX_AARCH64_INLINE_HPP +#define OS_CPU_LINUX_AARCH64_VM_ORDERACCESS_LINUX_AARCH64_INLINE_HPP + +#include "runtime/atomic.inline.hpp" +#include "runtime/orderAccess.hpp" +#include "runtime/os.hpp" +#include "vm_version_aarch64.hpp" + +// Implementation of class OrderAccess. + +inline void OrderAccess::loadload() { acquire(); } +inline void OrderAccess::storestore() { release(); } +inline void OrderAccess::loadstore() { acquire(); } +inline void OrderAccess::storeload() { fence(); } + +inline void OrderAccess::acquire() { + READ_MEM_BARRIER; +} + +inline void OrderAccess::release() { + WRITE_MEM_BARRIER; +} + +inline void OrderAccess::fence() { + FULL_MEM_BARRIER; +} + +inline jbyte OrderAccess::load_acquire(volatile jbyte* p) +{ jbyte data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } +inline jshort OrderAccess::load_acquire(volatile jshort* p) +{ jshort data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } +inline jint OrderAccess::load_acquire(volatile jint* p) +{ jint data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } +inline jlong OrderAccess::load_acquire(volatile jlong* p) +{ jlong data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } +inline jubyte OrderAccess::load_acquire(volatile jubyte* p) +{ jubyte data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } +inline jushort OrderAccess::load_acquire(volatile jushort* p) +{ jushort data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } +inline juint OrderAccess::load_acquire(volatile juint* p) +{ juint data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } +inline julong OrderAccess::load_acquire(volatile julong* p) +{ julong data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } +inline jfloat OrderAccess::load_acquire(volatile jfloat* p) +{ jfloat data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } +inline jdouble OrderAccess::load_acquire(volatile jdouble* p) +{ jdouble data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } +inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) +{ intptr_t data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } +inline void* OrderAccess::load_ptr_acquire(volatile void* p) +{ void* data; __atomic_load((void* volatile *)p, &data, __ATOMIC_ACQUIRE); return data; } +inline void* OrderAccess::load_ptr_acquire(const volatile void* p) +{ void* data; __atomic_load((void* const volatile *)p, &data, __ATOMIC_ACQUIRE); return data; } + +inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) +{ __atomic_store(p, &v, __ATOMIC_RELEASE); } +inline void OrderAccess::release_store(volatile jshort* p, jshort v) +{ __atomic_store(p, &v, __ATOMIC_RELEASE); } +inline void OrderAccess::release_store(volatile jint* p, jint v) +{ __atomic_store(p, &v, __ATOMIC_RELEASE); } +inline void OrderAccess::release_store(volatile jlong* p, jlong v) +{ __atomic_store(p, &v, __ATOMIC_RELEASE); } +inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) +{ __atomic_store(p, &v, __ATOMIC_RELEASE); } +inline void OrderAccess::release_store(volatile jushort* p, jushort v) +{ __atomic_store(p, &v, __ATOMIC_RELEASE); } +inline void OrderAccess::release_store(volatile juint* p, juint v) +{ __atomic_store(p, &v, __ATOMIC_RELEASE); } +inline void OrderAccess::release_store(volatile julong* p, julong v) +{ __atomic_store(p, &v, __ATOMIC_RELEASE); } +inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) +{ __atomic_store(p, &v, __ATOMIC_RELEASE); } +inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) +{ __atomic_store(p, &v, __ATOMIC_RELEASE); } +inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) +{ __atomic_store(p, &v, __ATOMIC_RELEASE); } +inline void OrderAccess::release_store_ptr(volatile void* p, void* v) +{ __atomic_store((void* volatile *)p, &v, __ATOMIC_RELEASE); } + +inline void OrderAccess::store_fence(jbyte* p, jbyte v) +{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); } +inline void OrderAccess::store_fence(jshort* p, jshort v) +{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); } +inline void OrderAccess::store_fence(jint* p, jint v) +{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); } +inline void OrderAccess::store_fence(jlong* p, jlong v) +{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); } +inline void OrderAccess::store_fence(jubyte* p, jubyte v) +{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); } +inline void OrderAccess::store_fence(jushort* p, jushort v) +{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); } +inline void OrderAccess::store_fence(juint* p, juint v) +{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); } +inline void OrderAccess::store_fence(julong* p, julong v) +{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); } +inline void OrderAccess::store_fence(jfloat* p, jfloat v) +{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); } +inline void OrderAccess::store_fence(jdouble* p, jdouble v) +{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); } +inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) +{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); } +inline void OrderAccess::store_ptr_fence(void** p, void* v) +{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); } + +inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); } + +inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); } +inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { release_store_ptr(p, v); fence(); } + +#endif // OS_CPU_LINUX_AARCH64_VM_ORDERACCESS_LINUX_AARCH64_INLINE_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp 2021-01-25 19:32:08.892814449 +0000 @@ -0,0 +1,657 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// no precompiled headers +#include "asm/macroAssembler.hpp" +#include "classfile/classLoader.hpp" +#include "classfile/systemDictionary.hpp" +#include "classfile/vmSymbols.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "jvm_linux.h" +#include "memory/allocation.inline.hpp" +#include "mutex_linux.inline.hpp" +#include "os_share_linux.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm.h" +#include "prims/jvm_misc.hpp" +#include "runtime/arguments.hpp" +#include "runtime/extendedPC.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/java.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/osThread.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" +#include "utilities/events.hpp" +#include "utilities/vmError.hpp" + +// put OS-includes here +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +#define REG_FP 29 + +#define NOINLINE __attribute__ ((noinline)) + +NOINLINE address os::current_stack_pointer() { + return (address)__builtin_frame_address(0); +} + +char* os::non_memory_address_word() { + // Must never look like an address returned by reserve_memory, + // even in its subfields (as defined by the CPU immediate fields, + // if the CPU splits constants across multiple instructions). + + return (char*) 0xffffffffffff; +} + +void os::initialize_thread(Thread *thr) { +} + +address os::Linux::ucontext_get_pc(ucontext_t * uc) { + return (address)uc->uc_mcontext.pc; +} + +intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.sp; +} + +intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.regs[REG_FP]; +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread +// is currently interrupted by SIGPROF. +// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal +// frames. Currently we don't do that on Linux, so it's the same as +// os::fetch_frame_from_context(). +ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, + ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { + + assert(thread != NULL, "just checking"); + assert(ret_sp != NULL, "just checking"); + assert(ret_fp != NULL, "just checking"); + + return os::fetch_frame_from_context(uc, ret_sp, ret_fp); +} + +ExtendedPC os::fetch_frame_from_context(void* ucVoid, + intptr_t** ret_sp, intptr_t** ret_fp) { + + ExtendedPC epc; + ucontext_t* uc = (ucontext_t*)ucVoid; + + if (uc != NULL) { + epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); + if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); + if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); + } else { + // construct empty ExtendedPC for return value checking + epc = ExtendedPC(NULL); + if (ret_sp) *ret_sp = (intptr_t *)NULL; + if (ret_fp) *ret_fp = (intptr_t *)NULL; + } + + return epc; +} + +frame os::fetch_frame_from_context(void* ucVoid) { + intptr_t* sp; + intptr_t* fp; + ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); + return frame(sp, fp, epc.pc()); +} + +// By default, gcc always saves frame pointer rfp on this stack. This +// may get turned off by -fomit-frame-pointer. +frame os::get_sender_for_C_frame(frame* fr) { + return frame(fr->link(), fr->link(), fr->sender_pc()); +} + +NOINLINE frame os::current_frame() { + intptr_t *fp = *(intptr_t **)__builtin_frame_address(0); + frame myframe((intptr_t*)os::current_stack_pointer(), + (intptr_t*)fp, + CAST_FROM_FN_PTR(address, os::current_frame)); + if (os::is_first_C_frame(&myframe)) { + // stack is not walkable + return frame(); + } else { + return os::get_sender_for_C_frame(&myframe); + } +} + +// Utility functions + +// An operation in Unsafe has faulted. We're going to return to the +// instruction after the faulting load or store. We also set +// pending_unsafe_access_error so that at some point in the future our +// user will get a helpful message. +static address handle_unsafe_access(JavaThread* thread, address pc) { + // pc is the instruction which we must emulate + // doing a no-op is fine: return garbage from the load + // therefore, compute npc + address npc = pc + NativeCall::instruction_size; + + // request an async exception + thread->set_pending_unsafe_access_error(); + + // return address of next instruction to execute + return npc; +} + +extern "C" JNIEXPORT int +JVM_handle_linux_signal(int sig, + siginfo_t* info, + void* ucVoid, + int abort_if_unrecognized) { + ucontext_t* uc = (ucontext_t*) ucVoid; + + Thread* t = ThreadLocalStorage::get_thread_slow(); + + // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away + // (no destructors can be run) + os::ThreadCrashProtection::check_crash_protection(sig, t); + + SignalHandlerMark shm(t); + + // Note: it's not uncommon that JNI code uses signal/sigset to install + // then restore certain signal handler (e.g. to temporarily block SIGPIPE, + // or have a SIGILL handler when detecting CPU type). When that happens, + // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To + // avoid unnecessary crash when libjsig is not preloaded, try handle signals + // that do not require siginfo/ucontext first. + + if (sig == SIGPIPE || sig == SIGXFSZ) { + // allow chained handler to go first + if (os::Linux::chained_handler(sig, info, ucVoid)) { + return true; + } else { + if (PrintMiscellaneous && (WizardMode || Verbose)) { + char buf[64]; + warning("Ignoring %s - see bugs 4229104 or 646499219", + os::exception_name(sig, buf, sizeof(buf))); + } + return true; + } + } + + JavaThread* thread = NULL; + VMThread* vmthread = NULL; + if (os::Linux::signal_handlers_are_installed) { + if (t != NULL ){ + if(t->is_Java_thread()) { + thread = (JavaThread*)t; + } + else if(t->is_VM_thread()){ + vmthread = (VMThread *)t; + } + } + } +/* + NOTE: does not seem to work on linux. + if (info == NULL || info->si_code <= 0 || info->si_code == SI_NOINFO) { + // can't decode this kind of signal + info = NULL; + } else { + assert(sig == info->si_signo, "bad siginfo"); + } +*/ + // decide if this trap can be handled by a stub + address stub = NULL; + + address pc = NULL; + + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { + pc = (address) os::Linux::ucontext_get_pc(uc); + + if (StubRoutines::is_safefetch_fault(pc)) { + uc->uc_mcontext.pc = intptr_t(StubRoutines::continuation_for_safefetch_fault(pc)); + return 1; + } + +#ifndef AMD64 + // Halt if SI_KERNEL before more crashes get misdiagnosed as Java bugs + // This can happen in any running code (currently more frequently in + // interpreter code but has been seen in compiled code) + if (sig == SIGSEGV && info->si_addr == 0 && info->si_code == SI_KERNEL) { + fatal("An irrecoverable SI_KERNEL SIGSEGV has occurred due " + "to unstable signal handling in this distribution."); + } +#endif // AMD64 + + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { + address addr = (address) info->si_addr; + + // check if fault address is within thread stack + if (addr < thread->stack_base() && + addr >= thread->stack_base() - thread->stack_size()) { + // stack overflow + if (thread->in_stack_yellow_zone(addr)) { + thread->disable_stack_yellow_zone(); + if (thread->thread_state() == _thread_in_Java) { + // Throw a stack overflow exception. Guard pages will be reenabled + // while unwinding the stack. + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); + } else { + // Thread was in the vm or native code. Return and try to finish. + return 1; + } + } else if (thread->in_stack_red_zone(addr)) { + // Fatal red zone violation. Disable the guard pages and fall through + // to handle_unexpected_exception way down below. + thread->disable_stack_red_zone(); + tty->print_raw_cr("An irrecoverable stack overflow has occurred."); + + // This is a likely cause, but hard to verify. Let's just print + // it as a hint. + tty->print_raw_cr("Please check if any of your loaded .so files has " + "enabled executable stack (see man page execstack(8))"); + } else { + // Accessing stack address below sp may cause SEGV if current + // thread has MAP_GROWSDOWN stack. This should only happen when + // current thread was created by user code with MAP_GROWSDOWN flag + // and then attached to VM. See notes in os_linux.cpp. + if (thread->osthread()->expanding_stack() == 0) { + thread->osthread()->set_expanding_stack(); + if (os::Linux::manually_expand_stack(thread, addr)) { + thread->osthread()->clear_expanding_stack(); + return 1; + } + thread->osthread()->clear_expanding_stack(); + } else { + fatal("recursive segv. expanding stack."); + } + } + } + } + + if (thread->thread_state() == _thread_in_Java) { + // Java thread running in Java code => find exception handler if any + // a fault inside compiled code, the interpreter, or a stub + + // Handle signal from NativeJump::patch_verified_entry(). + if ((sig == SIGILL || sig == SIGTRAP) + && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { + if (TraceTraps) { + tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); + } + stub = SharedRuntime::get_handle_wrong_method_stub(); + } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { + stub = SharedRuntime::get_poll_stub(pc); + } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { + // BugId 4454115: A read from a MappedByteBuffer can fault + // here if the underlying file has been truncated. + // Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + nmethod* nm = (cb != NULL && cb->is_nmethod()) ? (nmethod*)cb : NULL; + if (nm != NULL && nm->has_unsafe_access()) { + stub = handle_unsafe_access(thread, pc); + } + } + else + + if (sig == SIGFPE && + (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { + stub = + SharedRuntime:: + continuation_for_implicit_exception(thread, + pc, + SharedRuntime:: + IMPLICIT_DIVIDE_BY_ZERO); + } else if (sig == SIGSEGV && + !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + } + } else if (thread->thread_state() == _thread_in_vm && + sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ + thread->doing_unsafe_access()) { + stub = handle_unsafe_access(thread, pc); + } + + // jni_fast_GetField can trap at certain pc's if a GC kicks in + // and the heap gets shrunk before the field access. + if ((sig == SIGSEGV) || (sig == SIGBUS)) { + address addr = JNI_FastGetField::find_slowcase_pc(pc); + if (addr != (address)-1) { + stub = addr; + } + } + + // Check to see if we caught the safepoint code in the + // process of write protecting the memory serialization page. + // It write enables the page immediately after protecting it + // so we can just return to retry the write. + if ((sig == SIGSEGV) && + os::is_memory_serialize_page(thread, (address) info->si_addr)) { + // Block current thread until the memory serialize page permission restored. + os::block_on_serialize_page_trap(); + return true; + } + } + + if (stub != NULL) { + // save all thread context in case we need to restore it + if (thread != NULL) thread->set_saved_exception_pc(pc); + + uc->uc_mcontext.pc = (__u64)stub; + return true; + } + + // signal-chaining + if (os::Linux::chained_handler(sig, info, ucVoid)) { + return true; + } + + if (!abort_if_unrecognized) { + // caller wants another chance, so give it to him + return false; + } + + if (pc == NULL && uc != NULL) { + pc = os::Linux::ucontext_get_pc(uc); + } + + // unmask current signal + sigset_t newset; + sigemptyset(&newset); + sigaddset(&newset, sig); + sigprocmask(SIG_UNBLOCK, &newset, NULL); + + VMError err(t, sig, pc, info, ucVoid); + err.report_and_die(); + + ShouldNotReachHere(); + return true; // Mute compiler +} + +void os::Linux::init_thread_fpu_state(void) { +} + +int os::Linux::get_fpu_control_word(void) { + return 0; +} + +void os::Linux::set_fpu_control_word(int fpu_control) { +} + +// Check that the linux kernel version is 2.4 or higher since earlier +// versions do not support SSE without patches. +bool os::supports_sse() { + return true; +} + +bool os::is_allocatable(size_t bytes) { + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +// thread stack + +size_t os::Linux::min_stack_allowed = 64 * K; + +// amd64: pthread on amd64 is always in floating stack mode +bool os::Linux::supports_variable_stack_size() { return true; } + +// return default stack size for thr_type +size_t os::Linux::default_stack_size(os::ThreadType thr_type) { + // default stack size (compiler thread needs larger stack) + size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M); + return s; +} + +size_t os::Linux::default_guard_size(os::ThreadType thr_type) { + // Creating guard page is very expensive. Java thread has HotSpot + // guard page, only enable glibc guard page for non-Java threads. + return (thr_type == java_thread ? 0 : page_size()); +} + +// Java thread: +// +// Low memory addresses +// +------------------------+ +// | |\ JavaThread created by VM does not have glibc +// | glibc guard page | - guard, attached Java thread usually has +// | |/ 1 page glibc guard. +// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() +// | |\ +// | HotSpot Guard Pages | - red and yellow pages +// | |/ +// +------------------------+ JavaThread::stack_yellow_zone_base() +// | |\ +// | Normal Stack | - +// | |/ +// P2 +------------------------+ Thread::stack_base() +// +// Non-Java thread: +// +// Low memory addresses +// +------------------------+ +// | |\ +// | glibc guard page | - usually 1 page +// | |/ +// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() +// | |\ +// | Normal Stack | - +// | |/ +// P2 +------------------------+ Thread::stack_base() +// +// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from +// pthread_attr_getstack() + +static void current_stack_region(address * bottom, size_t * size) { + if (os::is_primordial_thread()) { + // primordial thread needs special handling because pthread_getattr_np() + // may return bogus value. + *bottom = os::Linux::initial_thread_stack_bottom(); + *size = os::Linux::initial_thread_stack_size(); + } else { + pthread_attr_t attr; + + int rslt = pthread_getattr_np(pthread_self(), &attr); + + // JVM needs to know exact stack location, abort if it fails + if (rslt != 0) { + if (rslt == ENOMEM) { + vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np"); + } else { + fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt)); + } + } + + if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) { + fatal("Can not locate current stack attributes!"); + } + + pthread_attr_destroy(&attr); + + } + assert(os::current_stack_pointer() >= *bottom && + os::current_stack_pointer() < *bottom + *size, "just checking"); +} + +address os::current_stack_base() { + address bottom; + size_t size; + current_stack_region(&bottom, &size); + return (bottom + size); +} + +size_t os::current_stack_size() { + // stack size includes normal stack and HotSpot guard pages + address bottom; + size_t size; + current_stack_region(&bottom, &size); + return size; +} + +///////////////////////////////////////////////////////////////////////////// +// helper functions for fatal error handler + +void os::print_context(outputStream *st, void *context) { + if (context == NULL) return; + + ucontext_t *uc = (ucontext_t*)context; + st->print_cr("Registers:"); + for (int r = 0; r < 31; r++) + st->print_cr( "R%d=" INTPTR_FORMAT, r, (int64_t)uc->uc_mcontext.regs[r]); + st->cr(); + + intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); + st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); + print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t)); + st->cr(); + + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. + address pc = os::Linux::ucontext_get_pc(uc); + st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); + print_hex_dump(st, pc - 32, pc + 32, sizeof(char)); +} + +void os::print_register_info(outputStream *st, void *context) { + if (context == NULL) return; + + ucontext_t *uc = (ucontext_t*)context; + + st->print_cr("Register to memory mapping:"); + st->cr(); + + // this is horrendously verbose but the layout of the registers in the + // context does not match how we defined our abstract Register set, so + // we can't just iterate through the gregs area + + // this is only for the "general purpose" registers + + for (int r = 0; r < 31; r++) + st->print_cr( "R%d=" INTPTR_FORMAT, r, (int64_t)uc->uc_mcontext.regs[r]); + st->cr(); +} + +void os::setup_fpu() { +} + +#ifndef PRODUCT +void os::verify_stack_alignment() { + assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); +} +#endif + +extern "C" { + int SpinPause() { + return 0; + } + + void _Copy_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { + if (from > to) { + const jshort *end = from + count; + while (from < end) + *(to++) = *(from++); + } + else if (from < to) { + const jshort *end = from; + from += count - 1; + to += count - 1; + while (from >= end) + *(to--) = *(from--); + } + } + void _Copy_conjoint_jints_atomic(jint* from, jint* to, size_t count) { + if (from > to) { + const jint *end = from + count; + while (from < end) + *(to++) = *(from++); + } + else if (from < to) { + const jint *end = from; + from += count - 1; + to += count - 1; + while (from >= end) + *(to--) = *(from--); + } + } + void _Copy_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) { + if (from > to) { + const jlong *end = from + count; + while (from < end) + os::atomic_copy64(from++, to++); + } + else if (from < to) { + const jlong *end = from; + from += count - 1; + to += count - 1; + while (from >= end) + os::atomic_copy64(from--, to--); + } + } + + void _Copy_arrayof_conjoint_bytes(HeapWord* from, + HeapWord* to, + size_t count) { + memmove(to, from, count); + } + void _Copy_arrayof_conjoint_jshorts(HeapWord* from, + HeapWord* to, + size_t count) { + memmove(to, from, count * 2); + } + void _Copy_arrayof_conjoint_jints(HeapWord* from, + HeapWord* to, + size_t count) { + memmove(to, from, count * 4); + } + void _Copy_arrayof_conjoint_jlongs(HeapWord* from, + HeapWord* to, + size_t count) { + memmove(to, from, count * 8); + } +}; --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.hpp 2021-01-25 19:32:09.340819158 +0000 @@ -0,0 +1,58 @@ +/* + * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH64_VM_OS_LINUX_AARCH64_HPP +#define OS_CPU_LINUX_AARCH64_VM_OS_LINUX_AARCH64_HPP + + static void setup_fpu(); + static bool supports_sse(); + + static jlong rdtsc(); + + static bool is_allocatable(size_t bytes); + + // Used to register dynamic code cache area with the OS + // Note: Currently only used in 64 bit Windows implementations + static bool register_code_area(char *low, char *high) { return true; } + + // Atomically copy 64 bits of data + static void atomic_copy64(volatile void *src, volatile void *dst) { +#if defined(PPC) && !defined(_LP64) + double tmp; + asm volatile ("lfd %0, 0(%1)\n" + "stfd %0, 0(%2)\n" + : "=f"(tmp) + : "b"(src), "b"(dst)); +#elif defined(S390) && !defined(_LP64) + double tmp; + asm volatile ("ld %0, 0(%1)\n" + "std %0, 0(%2)\n" + : "=r"(tmp) + : "a"(src), "a"(dst)); +#else + *(jlong *) dst = *(jlong *) src; +#endif + } + +#endif // OS_CPU_LINUX_AARCH64_VM_OS_LINUX_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.inline.hpp 2021-01-25 19:32:09.784823825 +0000 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH64_VM_OS_LINUX_AARCH64_INLINE_HPP +#define OS_CPU_LINUX_AARCH64_VM_OS_LINUX_AARCH64_INLINE_HPP + +#include "runtime/os.hpp" + +// See http://www.technovelty.org/code/c/reading-rdtsc.htl for details +inline jlong os::rdtsc() { + uint64_t res; + uint32_t ts1, ts2; + __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2)); + res = ((uint64_t)ts1 | (uint64_t)ts2 << 32); + return (jlong)res; +} + +#endif // OS_CPU_LINUX_AARCH64_VM_OS_LINUX_AARCH64_INLINE_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/prefetch_linux_aarch64.inline.hpp 2021-01-25 19:32:10.224828449 +0000 @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH64_VM_PREFETCH_LINUX_AARCH64_INLINE_HPP +#define OS_CPU_LINUX_AARCH64_VM_PREFETCH_LINUX_AARCH64_INLINE_HPP + +#include "runtime/prefetch.hpp" + + +inline void Prefetch::read (void *loc, intx interval) { + if (interval >= 0) + asm("prfm PLDL1KEEP, [%0, %1]" : : "r"(loc), "r"(interval)); +} + +inline void Prefetch::write(void *loc, intx interval) { + if (interval >= 0) + asm("prfm PSTL1KEEP, [%0, %1]" : : "r"(loc), "r"(interval)); +} + +#endif // OS_CPU_LINUX_AARCH64_VM_PREFETCH_LINUX_AARCH64_INLINE_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.cpp 2021-01-25 19:32:10.685833294 +0000 @@ -0,0 +1,41 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/threadLocalStorage.hpp" +#include "runtime/thread.inline.hpp" + +void ThreadLocalStorage::generate_code_for_get_thread() { + // nothing we can do here for user-level thread +} + +void ThreadLocalStorage::pd_init() { +} + +__thread Thread *aarch64_currentThread; + +void ThreadLocalStorage::pd_set_thread(Thread* thread) { + os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread); + aarch64_currentThread = thread; +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.hpp 2021-01-25 19:32:11.142838098 +0000 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH64_VM_THREADLS_LINUX_AARCH64_HPP +#define OS_CPU_LINUX_AARCH64_VM_THREADLS_LINUX_AARCH64_HPP + + // Processor dependent parts of ThreadLocalStorage + +public: + + static Thread *thread() { + return aarch64_currentThread; + } + +#endif // OS_CPU_LINUX_AARCH64_VM_THREADLS_LINUX_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/thread_linux_aarch64.cpp 2021-01-25 19:32:11.575842649 +0000 @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/thread.inline.hpp" + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread is +// currently interrupted by SIGPROF +bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, + void* ucontext, bool isInJava) { + + assert(Thread::current() == this, "caller must be current thread"); + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { + assert(this->is_Java_thread(), "must be JavaThread"); + JavaThread* jt = (JavaThread *)this; + + // If we have a last_Java_frame, then we should use it even if + // isInJava == true. It should be more reliable than ucontext info. + if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { + *fr_addr = jt->pd_last_frame(); + return true; + } + + // At this point, we don't have a last_Java_frame, so + // we try to glean some information out of the ucontext + // if we were running Java code when SIGPROF came in. + if (isInJava) { + ucontext_t* uc = (ucontext_t*) ucontext; + + intptr_t* ret_fp; + intptr_t* ret_sp; + ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, + &ret_sp, &ret_fp); + if (addr.pc() == NULL || ret_sp == NULL ) { + // ucontext wasn't useful + return false; + } + + frame ret_frame(ret_sp, ret_fp, addr.pc()); + if (!ret_frame.safe_for_sender(jt)) { +#ifdef COMPILER2 + // C2 uses ebp as a general register see if NULL fp helps + frame ret_frame2(ret_sp, NULL, addr.pc()); + if (!ret_frame2.safe_for_sender(jt)) { + // nothing else to try if the frame isn't good + return false; + } + ret_frame = ret_frame2; +#else + // nothing else to try if the frame isn't good + return false; +#endif /* COMPILER2 */ + } + *fr_addr = ret_frame; + return true; + } + + // nothing else to try + return false; +} + +void JavaThread::cache_global_variables() { } + --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/thread_linux_aarch64.hpp 2021-01-25 19:32:12.014847263 +0000 @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH64_VM_THREAD_LINUX_AARCH64_HPP +#define OS_CPU_LINUX_AARCH64_VM_THREAD_LINUX_AARCH64_HPP + + private: +#ifdef ASSERT + // spill stack holds N callee-save registers at each Java call and + // grows downwards towards limit + // we need limit to check we have space for a spill and base so we + // can identify all live spill frames at GC (eventually) + address _spill_stack; + address _spill_stack_base; + address _spill_stack_limit; +#endif // ASSERT + + void pd_initialize() { + _anchor.clear(); + } + + frame pd_last_frame() { + assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); + return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); + } + + public: + // Mutators are highly dangerous.... + intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } + void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } + + void set_base_of_stack_pointer(intptr_t* base_sp) { + } + + static ByteSize last_Java_fp_offset() { + return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); + } + + intptr_t* base_of_stack_pointer() { + return NULL; + } + void record_base_of_stack_pointer() { + } + + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, + bool isInJava); + + bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); +private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); +public: + + // These routines are only used on cpu architectures that + // have separate register stacks (Itanium). + static bool register_stack_overflow() { return false; } + static void enable_register_stack_guard() {} + static void disable_register_stack_guard() {} + +#endif // OS_CPU_LINUX_AARCH64_VM_THREAD_LINUX_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/vmStructs_linux_aarch64.hpp 2021-01-25 19:32:12.453851877 +0000 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH64_VM_VMSTRUCTS_LINUX_AARCH64_HPP +#define OS_CPU_LINUX_AARCH64_VM_VMSTRUCTS_LINUX_AARCH64_HPP + +// These are the OS and CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ + nonstatic_field(OSThread, _pthread_id, pthread_t) + + +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Posix Thread IDs */ \ + /**********************/ \ + \ + declare_integer_type(OSThread::thread_id_t) \ + declare_unsigned_integer_type(pthread_t) + +#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // OS_CPU_LINUX_AARCH64_VM_VMSTRUCTS_LINUX_AARCH64_HPP --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/src/os_cpu/linux_aarch64/vm/vm_version_linux_aarch64.cpp 2021-01-25 19:32:12.900856575 +0000 @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/os.hpp" +#include "vm_version_aarch64.hpp" + --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/aarch64/DoubleArithTests.java 2021-01-25 19:32:13.326861053 +0000 @@ -0,0 +1,49 @@ +public class DoubleArithTests { + + private static double test_neg(double a) { + return -a; + } + + private static double test_add(double a, double b) { + return a + b; + } + + private static double test_sub(double a, double b) { + return a - b; + } + + private static double test_mul(double a, double b) { + return a * b; + } + + private static double test_div(double a, double b) { + return a / b; + } + + private static double test_rem(double a, double b) { + return a % b; + } + + private static void assertThat(boolean assertion) { + if (! assertion) { + throw new AssertionError(); + } + } + + public static void main(String[] args) { + assertThat(test_neg(10.0) == -10.0); + assertThat(test_add(3.0, 2.0) == 5.0); + + assertThat(test_sub(40.0, 13.0) == 27.0); + + assertThat(test_mul(5.0, 200.0) == 1000.0); + + assertThat(test_div(30.0, 3.0) == 10.0); + assertThat(test_div(30.0, 0.0) == Double.POSITIVE_INFINITY); + + assertThat(test_rem(30.0, 3.0) == 0.0); + assertThat(test_rem(29.0, 3.0) == 2.0); + assertThat(Double.isNaN(test_rem(30.0, 0.0))); + + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/aarch64/DoubleCmpTests.java 2021-01-25 19:32:13.799866024 +0000 @@ -0,0 +1,102 @@ + +public class DoubleCmpTests { + + private static boolean test_isEq(double a, double b) { + return a == b; + } + + private static boolean test_isNe(double a, double b) { + return a != b; + } + + private static boolean test_isLt(double a, double b) { + return a < b; + } + + private static boolean test_isLe(double a, double b) { + return a <= b; + } + + private static boolean test_isGe(double a, double b) { + return a >= b; + } + + private static boolean test_isGt(double a, double b) { + return a > b; + } + + private static boolean test_isEqC(double a) { + return a == 7.; + } + + private static boolean test_isNeC(double a) { + return a != 7.; + } + + private static boolean test_isLtC(double a) { + return a < 7.; + } + + private static boolean test_isLeC(double a) { + return a <= 7.; + } + + private static boolean test_isGeC(double a) { + return a >= 7.; + } + + private static boolean test_isGtC(double a) { + return a > 7.; + } + + private static void assertThat(boolean assertion) { + if (! assertion) { + throw new AssertionError(); + } + } + + public static void main(String[] args) { + assertThat(test_isEq(7., 7.)); + assertThat(! test_isEq(70., 7.)); + assertThat(! test_isNe(7., 7.)); + assertThat(test_isNe(70., 7.)); + + assertThat(test_isLt(7., 70.)); + assertThat(! test_isLt(70., 7.)); + assertThat(! test_isLt(7., 7.)); + + assertThat(test_isLe(7., 70.)); + assertThat(! test_isLe(70., 7.)); + assertThat(test_isLe(7., 7.)); + + assertThat(!test_isGe(7., 70.)); + assertThat(test_isGe(70., 7.)); + assertThat(test_isGe(7., 7.)); + + assertThat(!test_isGt(7., 70.)); + assertThat(test_isGt(70., 7.)); + assertThat(! test_isGt(7., 7.)); + + + assertThat(test_isEqC(7.)); + assertThat(! test_isEqC(70.)); + assertThat(! test_isNeC(7.)); + assertThat(test_isNeC(70.)); + + assertThat(test_isLtC(6.)); + assertThat(! test_isLtC(70.)); + assertThat(! test_isLtC(7.)); + + assertThat(test_isLeC(6.)); + assertThat(! test_isLeC(70.)); + assertThat(test_isLeC(7.)); + + assertThat(!test_isGeC(6.)); + assertThat(test_isGeC(70.)); + assertThat(test_isGeC(7.)); + + assertThat(!test_isGtC(6.)); + assertThat(test_isGtC(70.)); + assertThat(! test_isGtC(7.)); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/aarch64/FloatArithTests.java 2021-01-25 19:32:14.235870607 +0000 @@ -0,0 +1,49 @@ +public class FloatArithTests { + + private static float test_neg(float a) { + return -a; + } + + private static float test_add(float a, float b) { + return a + b; + } + + private static float test_sub(float a, float b) { + return a - b; + } + + private static float test_mul(float a, float b) { + return a * b; + } + + private static float test_div(float a, float b) { + return a / b; + } + + private static float test_rem(float a, float b) { + return a % b; + } + + private static void assertThat(boolean assertion) { + if (! assertion) { + throw new AssertionError(); + } + } + + public static void main(String[] args) { + assertThat(test_neg(10F) == -10F); + assertThat(test_add(3F, 2F) == 5F); + + assertThat(test_sub(40F, 13F) == 27F); + + assertThat(test_mul(5F, 200F) == 1000F); + + assertThat(test_div(30F, 3F) == 10F); + assertThat(test_div(30, 0) == Float.POSITIVE_INFINITY); + + assertThat(test_rem(30F, 3F) == 0); + assertThat(test_rem(29F, 3F) == 2F); + assertThat(Float.isNaN(test_rem(30F, 0F))); + + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/aarch64/FloatCmpTests.java 2021-01-25 19:32:14.666875137 +0000 @@ -0,0 +1,102 @@ + +public class FloatCmpTests { + + private static boolean test_isEq(float a, float b) { + return a == b; + } + + private static boolean test_isNe(float a, float b) { + return a != b; + } + + private static boolean test_isLt(float a, float b) { + return a < b; + } + + private static boolean test_isLe(float a, float b) { + return a <= b; + } + + private static boolean test_isGe(float a, float b) { + return a >= b; + } + + private static boolean test_isGt(float a, float b) { + return a > b; + } + + private static boolean test_isEqC(float a) { + return a == 7F; + } + + private static boolean test_isNeC(float a) { + return a != 7F; + } + + private static boolean test_isLtC(float a) { + return a < 7F; + } + + private static boolean test_isLeC(float a) { + return a <= 7F; + } + + private static boolean test_isGeC(float a) { + return a >= 7F; + } + + private static boolean test_isGtC(float a) { + return a > 7F; + } + + private static void assertThat(boolean assertion) { + if (! assertion) { + throw new AssertionError(); + } + } + + public static void main(String[] args) { + assertThat(test_isEq(7F, 7F)); + assertThat(! test_isEq(70F, 7F)); + assertThat(! test_isNe(7F, 7F)); + assertThat(test_isNe(70F, 7F)); + + assertThat(test_isLt(7F, 70F)); + assertThat(! test_isLt(70F, 7F)); + assertThat(! test_isLt(7F, 7F)); + + assertThat(test_isLe(7F, 70F)); + assertThat(! test_isLe(70F, 7F)); + assertThat(test_isLe(7F, 7F)); + + assertThat(!test_isGe(7F, 70F)); + assertThat(test_isGe(70F, 7F)); + assertThat(test_isGe(7F, 7F)); + + assertThat(!test_isGt(7F, 70F)); + assertThat(test_isGt(70F, 7F)); + assertThat(! test_isGt(7F, 7F)); + + + assertThat(test_isEqC(7F)); + assertThat(! test_isEqC(70F)); + assertThat(! test_isNeC(7F)); + assertThat(test_isNeC(70F)); + + assertThat(test_isLtC(6F)); + assertThat(! test_isLtC(70F)); + assertThat(! test_isLtC(7F)); + + assertThat(test_isLeC(6F)); + assertThat(! test_isLeC(70F)); + assertThat(test_isLeC(7F)); + + assertThat(!test_isGeC(6F)); + assertThat(test_isGeC(70F)); + assertThat(test_isGeC(7F)); + + assertThat(!test_isGtC(6F)); + assertThat(test_isGtC(70F)); + assertThat(! test_isGtC(7F)); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/aarch64/IntArithTests.java 2021-01-25 19:32:15.079879478 +0000 @@ -0,0 +1,131 @@ +public class IntArithTests { + + private static final int IIMM12_0 = 0x1; // first imm value + private static final int IIMM12_1 = 0xfff; // last 12bit imm value + private static final int IIMM12_2 = 0x1001; // Should not encode as imm + private static final int IIMM24_3 = 0x1000; // first 12 bit shifted imm + private static final int IIMM24_4 = 0xfff000; // Last 12 bit shifted imm + private static final int IIMM24_5 = 0x1001000; // Should not encode as imm + + private static int test_neg(int a) { + return -a; + } + + private static int test_addi(int a, int b) { + return a + b; + } + + private static int test_addic0(int a) { + return a + IIMM12_0; + } + + private static int test_addic1(int a) { + return a + IIMM12_1; + } + + private static int test_addic2(int a) { + return a + IIMM12_2; + } + + private static int test_addic3(int a) { + return a + IIMM24_3; + } + + private static int test_addic4(int a) { + return a + IIMM24_4; + } + + private static int test_addic5(int a) { + return a + IIMM24_5; + } + + private static int test_subi(int a, int b) { + return a - b; + } + + private static int test_subc1(int a) { + return a - 11; + } + + private static int test_mulic1(int a) { + // Generates shl. + return a * 8; + } + + private static int test_mulic2(int a) { + // Generates shl followed by add. + return a * 9; + } + + private static int test_mulic3(int a) { + // Generates shl followed by sub. + return a * 7; + } + + private static int test_mulic4(int a) { + // Generates normal mul. + return a * 10; + } + + private static int test_muli(int a, int b) { + // Generates normal mul. + return a * b; + } + + private static int test_divi(int a, int b) { + return a / b; + } + + private static int test_remi(int a, int b) { + return a % b; + } + + private static void assertThat(boolean assertion) { + if (! assertion) { + throw new AssertionError(); + } + } + + public static void main(String[] args) { + assertThat(test_neg(10) == -10); + assertThat(test_addi(3, 2) == 5); + assertThat(test_addi(Integer.MAX_VALUE, 1) == Integer.MIN_VALUE); + assertThat(test_addic0(3) == 4); + assertThat(test_addic1(3) == 0x1002); + assertThat(test_addic2(3) == 0x1004); + assertThat(test_addic3(3) == 0x1003); + assertThat(test_addic4(3) == 0xfff003); + assertThat(test_addic5(3) == 0x1001003); + + assertThat(test_subi(40, 13) == 27); + assertThat(test_subi(Integer.MIN_VALUE, 1) == Integer.MAX_VALUE); + assertThat(test_subc1(40) == 29); + + assertThat(test_mulic1(5) == 40); + assertThat(test_mulic2(5) == 45); + assertThat(test_mulic3(5) == 35); + assertThat(test_mulic4(5) == 50); + assertThat(test_muli(5, 200) == 1000); + + assertThat(test_divi(30, 3) == 10); + assertThat(test_divi(29, 3) == 9); + assertThat(test_divi(Integer.MIN_VALUE, -1) == Integer.MIN_VALUE); + try { + test_divi(30, 0); + throw new AssertionError(); + } catch (ArithmeticException ex) { + // Pass. + } + + assertThat(test_remi(30, 3) == 0); + assertThat(test_remi(29, 3) == 2); + assertThat(test_remi(Integer.MIN_VALUE, -1) == 0); + try { + test_remi(30, 0); + throw new AssertionError(); + } catch (ArithmeticException ex) { + // Pass. + } + } +} + --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/aarch64/IntCmpTests.java 2021-01-25 19:32:15.511884018 +0000 @@ -0,0 +1,102 @@ + +public class IntCmpTests { + + private static boolean test_isEq(int a, int b) { + return a == b; + } + + private static boolean test_isNe(int a, int b) { + return a != b; + } + + private static boolean test_isLt(int a, int b) { + return a < b; + } + + private static boolean test_isLe(int a, int b) { + return a <= b; + } + + private static boolean test_isGe(int a, int b) { + return a >= b; + } + + private static boolean test_isGt(int a, int b) { + return a > b; + } + + private static boolean test_isEqC(int a) { + return a == 7; + } + + private static boolean test_isNeC(int a) { + return a != 7; + } + + private static boolean test_isLtC(int a) { + return a < 7; + } + + private static boolean test_isLeC(int a) { + return a <= 7; + } + + private static boolean test_isGeC(int a) { + return a >= 7; + } + + private static boolean test_isGtC(int a) { + return a > 7; + } + + private static void assertThat(boolean assertion) { + if (! assertion) { + throw new AssertionError(); + } + } + + public static void main(String[] args) { + assertThat(test_isEq(7, 7)); + assertThat(! test_isEq(70, 7)); + assertThat(! test_isNe(7, 7)); + assertThat(test_isNe(70, 7)); + + assertThat(test_isLt(7, 70)); + assertThat(! test_isLt(70, 7)); + assertThat(! test_isLt(7, 7)); + + assertThat(test_isLe(7, 70)); + assertThat(! test_isLe(70, 7)); + assertThat(test_isLe(7, 7)); + + assertThat(!test_isGe(7, 70)); + assertThat(test_isGe(70, 7)); + assertThat(test_isGe(7, 7)); + + assertThat(!test_isGt(7, 70)); + assertThat(test_isGt(70, 7)); + assertThat(! test_isGt(7, 7)); + + assertThat(test_isEqC(7)); + assertThat(! test_isEqC(70)); + assertThat(! test_isNeC(7)); + assertThat(test_isNeC(70)); + + assertThat(test_isLtC(6)); + assertThat(! test_isLtC(70)); + assertThat(! test_isLtC(7)); + + assertThat(test_isLeC(6)); + assertThat(! test_isLeC(70)); + assertThat(test_isLeC(7)); + + assertThat(!test_isGeC(6)); + assertThat(test_isGeC(70)); + assertThat(test_isGeC(7)); + + assertThat(!test_isGtC(6)); + assertThat(test_isGtC(70)); + assertThat(! test_isGtC(7)); + + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/aarch64/IntLogicTests.java 2021-01-25 19:32:15.973888874 +0000 @@ -0,0 +1,66 @@ +public class IntLogicTests { + + private static int test_and(int a, int b) { + return a & b; + } + + private static int test_andc1(int a) { + // Generates immediate instruction. + return a & 0xf0f0f0f0; + } + + private static int test_andc2(int a) { + // Generates non-immediate instruction. + return a & 0x123456d5; + } + + private static int test_or(int a, int b) { + return a | b; + } + + private static int test_orc1(int a) { + // Generates immediate instruction. + return a | 0xf0f0f0f0; + } + + private static int test_orc2(int a) { + // Generates non-immediate instruction. + return a | 0x123456d5; + } + + private static int test_xor(int a, int b) { + return a ^ b; + } + + private static int test_xorc1(int a) { + // Generates immediate instruction. + return a ^ 0xf0f0f0f0; + } + + private static int test_xorc2(int a) { + // Generates non-immediate instruction. + return a ^ 0x123456d5; + } + + private static void assertThat(boolean assertion) { + if (! assertion) { + throw new AssertionError(); + } + } + + public static void main(String[] args) { + + assertThat(test_and(0x21, 0x31) == 0x21); + assertThat(test_andc1(0xaaaaaaaa) == 0xa0a0a0a0); + assertThat(test_andc2(0xaaaaaaaa) == 0x02200280); + + assertThat(test_or(0x21, 0x31) == 0x31); + assertThat(test_orc1(0xaaaaaaaa) == 0xfafafafa); + assertThat(test_orc2(0xaaaaaaaa) == 0xbabefeff); + + assertThat(test_xor(0x21, 0x31) == 16); + assertThat(test_xorc1(0xaaaaaaaa) == 0x5a5a5a5a); + assertThat(test_xorc2(0xaaaaaaaa) == 0xb89efc7f); + } +} + --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/aarch64/IntShiftTests.java 2021-01-25 19:32:16.394893299 +0000 @@ -0,0 +1,78 @@ +public class IntShiftTests { + + private static int test_shl(int a, int b) { + return a << b; + } + + private static int test_shlc1(int a) { + return a << 1; + } + + private static int test_shlc33(int a) { + return a << 33; + } + + private static int test_shr(int a, int b) { + return a >> b; + } + + private static int test_shrc1(int a) { + return a >> 1; + } + + private static int test_shrc33(int a) { + return a >> 33; + } + + private static int test_ushr(int a, int b) { + return a >>> b; + } + + private static int test_ushrc1(int a) { + return a >>> 1; + } + + private static int test_ushrc33(int a) { + return a >>> 33; + } + + private static void assertThat(boolean assertion) { + if (! assertion) { + throw new AssertionError(); + } + } + + public static void main(String[] args) { + + assertThat(test_shl(32, 2) == 128); + assertThat(test_shl(0x80000000, 1) == 0); + assertThat(test_shl(0x40000000, 1) == 0x80000000); + assertThat(test_shl(0x40000000, 33) == 0x80000000); + + assertThat(test_shr(32, 2) == 8); + assertThat(test_shr(1, 1) == 0); + assertThat(test_shr(0x80000000, 1) == 0xc0000000); + assertThat(test_shr(0x40000000, 33) == 0x20000000); + + assertThat(test_ushr(32, 2) == 8); + assertThat(test_ushr(1, 1) == 0); + assertThat(test_ushr(0x80000000, 1) == 0x40000000); + assertThat(test_ushr(0x40000000, 33) == 0x20000000); + + assertThat(test_shlc1(32) == 64); + assertThat(test_shlc1(0x80000000) == 0); + assertThat(test_shlc1(0x40000000) == 0x80000000); + assertThat(test_shlc33(0x40000000) == 0x80000000); + + assertThat(test_shrc1(32) == 16); + assertThat(test_shrc1(1) == 0); + assertThat(test_shrc1(0x80000000) == 0xc0000000); + assertThat(test_shrc33(0x40000000) == 0x20000000); + + assertThat(test_ushrc1(32) == 16); + assertThat(test_ushrc1(1) == 0); + assertThat(test_ushrc1(0x80000000) == 0x40000000); + assertThat(test_ushrc33(0x40000000) == 0x20000000); + } +} + --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/aarch64/LongArithTests.java 2021-01-25 19:32:16.844898029 +0000 @@ -0,0 +1,132 @@ +public class LongArithTests { + + + private static final long IIMM12_0 = 0x1; // first imm value + private static final long IIMM12_1 = 0xfff; // last 12bit imm value + private static final long IIMM12_2 = 0x1001; // Should not encode as imm + private static final long IIMM24_3 = 0x1000; // first 12 bit shifted imm + private static final long IIMM24_4 = 0xfff000; // Last 12 bit shifted imm + private static final long IIMM24_5 = 0x1001000; // Should not encode as imm + + private static long test_neg(long a) { + return -a; + } + + private static long test_add(long a, long b) { + return a + b; + } + + private static long test_addc0(long a) { + return a + IIMM12_0; + } + + private static long test_addc1(long a) { + return a + IIMM12_1; + } + + private static long test_addc2(long a) { + return a + IIMM12_2; + } + + private static long test_addc3(long a) { + return a + IIMM24_3; + } + + private static long test_addc4(long a) { + return a + IIMM24_4; + } + + private static long test_addc5(long a) { + return a + IIMM24_5; + } + + private static long test_sub(long a, long b) { + return a - b; + } + + private static long test_subc1(long a) { + return a - 11; + } + + private static long test_mulc1(long a) { + // Generates shl. + return a * 8; + } + + private static long test_mulc2(long a) { + // Generates shl followed by add. + return a * 9; + } + + private static long test_mulc3(long a) { + // Generates shl followed by sub. + return a * 7; + } + + private static long test_mulc4(long a) { + // Generates normal mul. + return a * 10; + } + + private static long test_mul(long a, long b) { + // Generates normal mul. + return a * b; + } + + private static long test_div(long a, long b) { + return a / b; + } + + private static long test_rem(long a, long b) { + return a % b; + } + + private static void assertThat(boolean assertion) { + if (! assertion) { + throw new AssertionError(); + } + } + + public static void main(String[] args) { + assertThat(test_neg(10) == -10); + assertThat(test_add(3, 2) == 5); + assertThat(test_add(Long.MAX_VALUE, 1) == Long.MIN_VALUE); + assertThat(test_addc0(3) == 4); + assertThat(test_addc1(3) == 0x1002); + assertThat(test_addc2(3) == 0x1004); + assertThat(test_addc3(3) == 0x1003); + assertThat(test_addc4(3) == 0xfff003); + assertThat(test_addc5(3) == 0x1001003); + + assertThat(test_sub(40, 13) == 27); + assertThat(test_sub(Long.MIN_VALUE, 1) == Long.MAX_VALUE); + assertThat(test_subc1(40) == 29); + + assertThat(test_mulc1(5) == 40); + assertThat(test_mulc2(5) == 45); + assertThat(test_mulc3(5) == 35); + assertThat(test_mulc4(5) == 50); + assertThat(test_mul(5, 200) == 1000); + + assertThat(test_div(30, 3) == 10); + assertThat(test_div(29, 3) == 9); + assertThat(test_div(Long.MIN_VALUE, -1) == Long.MIN_VALUE); + try { + test_div(30, 0); + throw new AssertionError(); + } catch (ArithmeticException ex) { + // Pass. + } + + assertThat(test_rem(30, 3) == 0); + assertThat(test_rem(29, 3) == 2); + assertThat(test_rem(Long.MIN_VALUE, -1) == 0); + try { + test_rem(30, 0); + throw new AssertionError(); + } catch (ArithmeticException ex) { + // Pass. + } + + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/aarch64/LongCmpTests.java 2021-01-25 19:32:17.267902475 +0000 @@ -0,0 +1,102 @@ + +public class LongCmpTests { + + private static boolean test_isEq(long a, long b) { + return a == b; + } + + private static boolean test_isNe(long a, long b) { + return a != b; + } + + private static boolean test_isLt(long a, long b) { + return a < b; + } + + private static boolean test_isLe(long a, long b) { + return a <= b; + } + + private static boolean test_isGe(long a, long b) { + return a >= b; + } + + private static boolean test_isGt(long a, long b) { + return a > b; + } + + private static boolean test_isEqC(long a) { + return a == 7L; + } + + private static boolean test_isNeC(long a) { + return a != 7L; + } + + private static boolean test_isLtC(long a) { + return a < 7L; + } + + private static boolean test_isLeC(long a) { + return a <= 7L; + } + + private static boolean test_isGeC(long a) { + return a >= 7L; + } + + private static boolean test_isGtC(long a) { + return a > 7L; + } + + private static void assertThat(boolean assertion) { + if (! assertion) { + throw new AssertionError(); + } + } + + public static void main(String[] args) { + assertThat(test_isEq(7L, 7L)); + assertThat(! test_isEq(70L, 7L)); + assertThat(! test_isNe(7L, 7L)); + assertThat(test_isNe(70L, 7L)); + + assertThat(test_isLt(7L, 70L)); + assertThat(! test_isLt(70L, 7L)); + assertThat(! test_isLt(7L, 7L)); + + assertThat(test_isLe(7L, 70L)); + assertThat(! test_isLe(70L, 7L)); + assertThat(test_isLe(7L, 7L)); + + assertThat(!test_isGe(7L, 70L)); + assertThat(test_isGe(70L, 7L)); + assertThat(test_isGe(7L, 7L)); + + assertThat(!test_isGt(7L, 70L)); + assertThat(test_isGt(70L, 7L)); + assertThat(! test_isGt(7L, 7L)); + + assertThat(test_isEqC(7L)); + assertThat(! test_isEqC(70L)); + assertThat(! test_isNeC(7L)); + assertThat(test_isNeC(70L)); + + assertThat(test_isLtC(6L)); + assertThat(! test_isLtC(70L)); + assertThat(! test_isLtC(7L)); + + assertThat(test_isLeC(6L)); + assertThat(! test_isLeC(70L)); + assertThat(test_isLeC(7L)); + + assertThat(!test_isGeC(6L)); + assertThat(test_isGeC(70L)); + assertThat(test_isGeC(7L)); + + assertThat(!test_isGtC(6L)); + assertThat(test_isGtC(70L)); + assertThat(! test_isGtC(7L)); + + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/aarch64/LongLogicTests.java 2021-01-25 19:32:17.696906984 +0000 @@ -0,0 +1,68 @@ +public class LongLogicTests { + + private static final long IMM = 0xf0f0f0f0f0f0f0f0L; + private static final long NO_IMM = 0x123456d5123456d5L; + private static long test_and(long a, long b) { + return a & b; + } + + private static long test_andc1(long a) { + // Generates immediate instruction. + return a & IMM; + } + + private static long test_andc2(long a) { + // Generates non-immediate instruction. + return a & NO_IMM; + } + + private static long test_or(long a, long b) { + return a | b; + } + + private static long test_orc1(long a) { + // Generates immediate instruction. + return a | IMM; + } + + private static long test_orc2(long a) { + // Generates non-immediate instruction. + return a | NO_IMM; + } + + private static long test_xor(long a, long b) { + return a ^ b; + } + + private static long test_xorc1(long a) { + // Generates immediate instruction. + return a ^ IMM; + } + + private static long test_xorc2(long a) { + // Generates non-immediate instruction. + return a ^ NO_IMM; + } + + private static void assertThat(boolean assertion) { + if (! assertion) { + throw new AssertionError(); + } + } + + public static void main(String[] args) { + + assertThat(test_and(0x21, 0x31) == 0x21); + assertThat(test_andc1(0xaaaaaaaaaaaaaaaaL) == 0xa0a0a0a0a0a0a0a0L); + assertThat(test_andc2(0xaaaaaaaaaaaaaaaaL) == 0x0220028002200280L); + + assertThat(test_or(0x21, 0x31) == 0x31); + assertThat(test_orc1(0xaaaaaaaaaaaaaaaaL) == 0xfafafafafafafafaL); + assertThat(test_orc2(0xaaaaaaaaaaaaaaaaL) == 0xbabefeffbabefeffL); + + assertThat(test_xor(0x21, 0x31) == 16); + assertThat(test_xorc1(0xaaaaaaaaaaaaaaaaL) == 0x5a5a5a5a5a5a5a5aL); + assertThat(test_xorc2(0xaaaaaaaaaaaaaaaaL) == 0xb89efc7fb89efc7fL); + } +} + --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/aarch64/LongShiftTests.java 2021-01-25 19:32:18.110911335 +0000 @@ -0,0 +1,77 @@ +public class LongShiftTests { + + private static long test_shl(long a, long b) { + return a << b; + } + + private static long test_shlc1(long a) { + return a << 1; + } + + private static long test_shlc65(long a) { + return a << 65; + } + + private static long test_shr(long a, long b) { + return a >> b; + } + + private static long test_shrc1(long a) { + return a >> 1; + } + + private static long test_shrc65(long a) { + return a >> 65; + } + + private static long test_ushr(long a, long b) { + return a >>> b; + } + + private static long test_ushrc1(long a) { + return a >>> 1; + } + + private static long test_ushrc65(long a) { + return a >>> 65; + } + + private static void assertThat(boolean assertion) { + if (! assertion) { + throw new AssertionError(); + } + } + + public static void main(String[] args) { + + assertThat(test_shl(32, 2) == 128); + assertThat(test_shl(0x8000000000000000L, 1) == 0); + assertThat(test_shl(0x4000000000000000L, 1) == 0x8000000000000000L); + assertThat(test_shl(0x4000000000000000L, 65) == 0x8000000000000000L); + + assertThat(test_shr(32, 2) == 8); + assertThat(test_shr(1, 1) == 0); + assertThat(test_shr(0x8000000000000000L, 1) == 0xc000000000000000L); + assertThat(test_shr(0x4000000000000000L, 65) == 0x2000000000000000L); + + assertThat(test_ushr(32, 2) == 8); + assertThat(test_ushr(1, 1) == 0); + assertThat(test_ushr(0x8000000000000000L, 1) == 0x4000000000000000L); + assertThat(test_ushr(0x4000000000000000L, 65) == 0x2000000000000000L); + + assertThat(test_shlc1(32) == 64); + assertThat(test_shlc1(0x8000000000000000L) == 0); + assertThat(test_shlc1(0x4000000000000000L) == 0x8000000000000000L); + assertThat(test_shlc65(0x4000000000000000L) == 0x8000000000000000L); + + assertThat(test_shrc1(32) == 16); + assertThat(test_shrc1(1) == 0); + assertThat(test_shrc1(0x8000000000000000L) == 0xc000000000000000L); + assertThat(test_shrc65(0x4000000000000000L) == 0x2000000000000000L); + + assertThat(test_ushrc1(32) == 16); + assertThat(test_ushrc1(1) == 0); + assertThat(test_ushrc1(0x8000000000000000L) == 0x4000000000000000L); + assertThat(test_ushrc65(0x4000000000000000L) == 0x2000000000000000L); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/c2/cr6340864/TestByteVect.java 2021-01-25 19:32:18.555916012 +0000 @@ -0,0 +1,1491 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 6340864 + * @summary Implement vectorization optimizations in hotspot-server + * + * @run main/othervm/timeout=400 -Xbatch -Xmx128m compiler.c2.cr6340864.TestByteVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=8 compiler.c2.cr6340864.TestByteVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=16 compiler.c2.cr6340864.TestByteVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=32 compiler.c2.cr6340864.TestByteVect + */ + +package compiler.c2.cr6340864; + +public class TestByteVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int ADD_INIT = 63; + private static final int BIT_MASK = 0xB7; + private static final int VALUE = 3; + private static final int SHIFT = 8; + + public static void main(String args[]) { + System.out.println("Testing Byte vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + byte[] a0 = new byte[ARRLEN]; + byte[] a1 = new byte[ARRLEN]; + byte[] a2 = new byte[ARRLEN]; + byte[] a3 = new byte[ARRLEN]; + byte[] a4 = new byte[ARRLEN]; + short[] p2 = new short[ARRLEN/2]; + int[] p4 = new int[ARRLEN/4]; + long[] p8 = new long[ARRLEN/8]; + // Initialize + int gold_sum = 0; + for (int i=0; i>>VALUE)); + } + test_srlv(a0, a1, VALUE); + for (int i=0; i>>VALUE)); + } + + test_srac(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav(a0, a1, VALUE); + for (int i=0; i>VALUE)); + } + + test_sllc_n(a0, a1); + for (int i=0; i>>(-VALUE))); + } + test_srlv(a0, a1, -VALUE); + for (int i=0; i>>(-VALUE))); + } + + test_srac_n(a0, a1); + for (int i=0; i>(-VALUE))); + } + test_srav(a0, a1, -VALUE); + for (int i=0; i>(-VALUE))); + } + + test_sllc_o(a0, a1); + for (int i=0; i>>SHIFT)); + } + test_srlv(a0, a1, SHIFT); + for (int i=0; i>>SHIFT)); + } + + test_srac_o(a0, a1); + for (int i=0; i>SHIFT)); + } + test_srav(a0, a1, SHIFT); + for (int i=0; i>SHIFT)); + } + + test_sllc_on(a0, a1); + for (int i=0; i>>(-SHIFT))); + } + test_srlv(a0, a1, -SHIFT); + for (int i=0; i>>(-SHIFT))); + } + + test_srac_on(a0, a1); + for (int i=0; i>(-SHIFT))); + } + test_srav(a0, a1, -SHIFT); + for (int i=0; i>(-SHIFT))); + } + + test_sllc_add(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_add(a0, a1, ADD_INIT); + for (int i=0; i>>VALUE)); + } + + test_srac_add(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_add(a0, a1, ADD_INIT); + for (int i=0; i>VALUE)); + } + + test_sllc_and(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_and(a0, a1, BIT_MASK); + for (int i=0; i>>VALUE)); + } + + test_srac_and(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_and(a0, a1, BIT_MASK); + for (int i=0; i>VALUE)); + } + + test_pack2(p2, a1); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i>>VALUE); + } + } + static void test_srlc_n(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>>(-VALUE)); + } + } + static void test_srlc_o(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>>SHIFT); + } + } + static void test_srlc_on(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>>(-SHIFT)); + } + } + static void test_srlv(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>>b); + } + } + static void test_srlc_add(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE); + } + } + static void test_srlv_add(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] + b)>>>VALUE); + } + } + static void test_srlc_and(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE); + } + } + static void test_srlv_and(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] & b)>>>VALUE); + } + } + + static void test_srac(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>VALUE); + } + } + static void test_srac_n(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>(-VALUE)); + } + } + static void test_srac_o(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>SHIFT); + } + } + static void test_srac_on(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>(-SHIFT)); + } + } + static void test_srav(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>b); + } + } + static void test_srac_add(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE); + } + } + static void test_srav_add(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] + b)>>VALUE); + } + } + static void test_srac_and(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE); + } + } + static void test_srav_and(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] & b)>>VALUE); + } + } + + static void test_pack2(short[] p2, byte[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + short l0 = (short)a1[i*2+0]; + short l1 = (short)a1[i*2+1]; + p2[i] = (short)((l1 << 8) | (l0 & 0xFF)); + } + } + static void test_unpack2(byte[] a0, short[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + short l = p2[i]; + a0[i*2+0] = (byte)(l & 0xFF); + a0[i*2+1] = (byte)(l >> 8); + } + } + static void test_pack2_swap(short[] p2, byte[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + short l0 = (short)a1[i*2+0]; + short l1 = (short)a1[i*2+1]; + p2[i] = (short)((l0 << 8) | (l1 & 0xFF)); + } + } + static void test_unpack2_swap(byte[] a0, short[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + short l = p2[i]; + a0[i*2+0] = (byte)(l >> 8); + a0[i*2+1] = (byte)(l & 0xFF); + } + } + + static void test_pack4(int[] p4, byte[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + int l0 = (int)a1[i*4+0]; + int l1 = (int)a1[i*4+1]; + int l2 = (int)a1[i*4+2]; + int l3 = (int)a1[i*4+3]; + p4[i] = (l0 & 0xFF) | + ((l1 & 0xFF) << 8) | + ((l2 & 0xFF) << 16) | + ((l3 & 0xFF) << 24); + } + } + static void test_unpack4(byte[] a0, int[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + int l = p4[i]; + a0[i*4+0] = (byte)(l & 0xFF); + a0[i*4+1] = (byte)(l >> 8); + a0[i*4+2] = (byte)(l >> 16); + a0[i*4+3] = (byte)(l >> 24); + } + } + static void test_pack4_swap(int[] p4, byte[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + int l0 = (int)a1[i*4+0]; + int l1 = (int)a1[i*4+1]; + int l2 = (int)a1[i*4+2]; + int l3 = (int)a1[i*4+3]; + p4[i] = (l3 & 0xFF) | + ((l2 & 0xFF) << 8) | + ((l1 & 0xFF) << 16) | + ((l0 & 0xFF) << 24); + } + } + static void test_unpack4_swap(byte[] a0, int[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + int l = p4[i]; + a0[i*4+0] = (byte)(l >> 24); + a0[i*4+1] = (byte)(l >> 16); + a0[i*4+2] = (byte)(l >> 8); + a0[i*4+3] = (byte)(l & 0xFF); + } + } + + static void test_pack8(long[] p8, byte[] a1) { + if (p8.length*8 > a1.length) return; + for (int i = 0; i < p8.length; i+=1) { + long l0 = (long)a1[i*8+0]; + long l1 = (long)a1[i*8+1]; + long l2 = (long)a1[i*8+2]; + long l3 = (long)a1[i*8+3]; + long l4 = (long)a1[i*8+4]; + long l5 = (long)a1[i*8+5]; + long l6 = (long)a1[i*8+6]; + long l7 = (long)a1[i*8+7]; + p8[i] = (l0 & 0xFFl) | + ((l1 & 0xFFl) << 8) | + ((l2 & 0xFFl) << 16) | + ((l3 & 0xFFl) << 24) | + ((l4 & 0xFFl) << 32) | + ((l5 & 0xFFl) << 40) | + ((l6 & 0xFFl) << 48) | + ((l7 & 0xFFl) << 56); + } + } + static void test_unpack8(byte[] a0, long[] p8) { + if (p8.length*8 > a0.length) return; + for (int i = 0; i < p8.length; i+=1) { + long l = p8[i]; + a0[i*8+0] = (byte)(l & 0xFFl); + a0[i*8+1] = (byte)(l >> 8); + a0[i*8+2] = (byte)(l >> 16); + a0[i*8+3] = (byte)(l >> 24); + a0[i*8+4] = (byte)(l >> 32); + a0[i*8+5] = (byte)(l >> 40); + a0[i*8+6] = (byte)(l >> 48); + a0[i*8+7] = (byte)(l >> 56); + } + } + static void test_pack8_swap(long[] p8, byte[] a1) { + if (p8.length*8 > a1.length) return; + for (int i = 0; i < p8.length; i+=1) { + long l0 = (long)a1[i*8+0]; + long l1 = (long)a1[i*8+1]; + long l2 = (long)a1[i*8+2]; + long l3 = (long)a1[i*8+3]; + long l4 = (long)a1[i*8+4]; + long l5 = (long)a1[i*8+5]; + long l6 = (long)a1[i*8+6]; + long l7 = (long)a1[i*8+7]; + p8[i] = (l7 & 0xFFl) | + ((l6 & 0xFFl) << 8) | + ((l5 & 0xFFl) << 16) | + ((l4 & 0xFFl) << 24) | + ((l3 & 0xFFl) << 32) | + ((l2 & 0xFFl) << 40) | + ((l1 & 0xFFl) << 48) | + ((l0 & 0xFFl) << 56); + } + } + static void test_unpack8_swap(byte[] a0, long[] p8) { + if (p8.length*8 > a0.length) return; + for (int i = 0; i < p8.length; i+=1) { + long l = p8[i]; + a0[i*8+0] = (byte)(l >> 56); + a0[i*8+1] = (byte)(l >> 48); + a0[i*8+2] = (byte)(l >> 40); + a0[i*8+3] = (byte)(l >> 32); + a0[i*8+4] = (byte)(l >> 24); + a0[i*8+5] = (byte)(l >> 16); + a0[i*8+6] = (byte)(l >> 8); + a0[i*8+7] = (byte)(l & 0xFFl); + } + } + + static int verify(String text, int i, byte elem, byte val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + + static int verify(String text, int i, short elem, short val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val)); + return 1; + } + return 0; + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val)); + return 1; + } + return 0; + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/c2/cr6340864/TestDoubleVect.java 2021-01-25 19:32:19.057921289 +0000 @@ -0,0 +1,564 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 6340864 + * @summary Implement vectorization optimizations in hotspot-server + * + * @run main/othervm/timeout=400 -Xbatch -Xmx128m compiler.c2.cr6340864.TestDoubleVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=8 compiler.c2.cr6340864.TestDoubleVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=16 compiler.c2.cr6340864.TestDoubleVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=32 compiler.c2.cr6340864.TestDoubleVect + */ + +package compiler.c2.cr6340864; + +public class TestDoubleVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final double ADD_INIT = -7500.; + private static final double VALUE = 15.; + + public static void main(String args[]) { + System.out.println("Testing Double vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + double[] a0 = new double[ARRLEN]; + double[] a1 = new double[ARRLEN]; + double[] a2 = new double[ARRLEN]; + double[] a3 = new double[ARRLEN]; + // Initialize + double gold_sum = 0; + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + float[] a0 = new float[ARRLEN]; + float[] a1 = new float[ARRLEN]; + float[] a2 = new float[ARRLEN]; + float[] a3 = new float[ARRLEN]; + // Initialize + float gold_sum = 0; + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + int[] a0 = new int[ARRLEN]; + int[] a1 = new int[ARRLEN]; + int[] a2 = new int[ARRLEN]; + int[] a3 = new int[ARRLEN]; + int[] a4 = new int[ARRLEN]; + long[] p2 = new long[ARRLEN/2]; + // Initialize + int gold_sum = 0; + for (int i=0; i>>VALUE)); + } + test_srlv(a0, a1, VALUE); + for (int i=0; i>>VALUE)); + } + + test_srac(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav(a0, a1, VALUE); + for (int i=0; i>VALUE)); + } + + test_sllc_n(a0, a1); + for (int i=0; i>>(-VALUE))); + } + test_srlv(a0, a1, -VALUE); + for (int i=0; i>>(-VALUE))); + } + + test_srac_n(a0, a1); + for (int i=0; i>(-VALUE))); + } + test_srav(a0, a1, -VALUE); + for (int i=0; i>(-VALUE))); + } + + test_sllc_o(a0, a1); + for (int i=0; i>>SHIFT)); + } + test_srlv(a0, a1, SHIFT); + for (int i=0; i>>SHIFT)); + } + + test_srac_o(a0, a1); + for (int i=0; i>SHIFT)); + } + test_srav(a0, a1, SHIFT); + for (int i=0; i>SHIFT)); + } + + test_sllc_on(a0, a1); + for (int i=0; i>>(-SHIFT))); + } + test_srlv(a0, a1, -SHIFT); + for (int i=0; i>>(-SHIFT))); + } + + test_srac_on(a0, a1); + for (int i=0; i>(-SHIFT))); + } + test_srav(a0, a1, -SHIFT); + for (int i=0; i>(-SHIFT))); + } + + test_sllc_add(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_add(a0, a1, ADD_INIT); + for (int i=0; i>>VALUE)); + } + + test_srac_add(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_add(a0, a1, ADD_INIT); + for (int i=0; i>VALUE)); + } + + test_sllc_and(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_and(a0, a1, BIT_MASK); + for (int i=0; i>>VALUE)); + } + + test_srac_and(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_and(a0, a1, BIT_MASK); + for (int i=0; i>VALUE)); + } + + test_pack2(p2, a1); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i>>VALUE); + } + } + static void test_srlc_n(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>>(-VALUE)); + } + } + static void test_srlc_o(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>>SHIFT); + } + } + static void test_srlc_on(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>>(-SHIFT)); + } + } + static void test_srlv(int[] a0, int[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>>b); + } + } + static void test_srlc_add(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] + ADD_INIT)>>>VALUE); + } + } + static void test_srlv_add(int[] a0, int[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] + b)>>>VALUE); + } + } + static void test_srlc_and(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] & BIT_MASK)>>>VALUE); + } + } + static void test_srlv_and(int[] a0, int[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] & b)>>>VALUE); + } + } + + static void test_srac(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>VALUE); + } + } + static void test_srac_n(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>(-VALUE)); + } + } + static void test_srac_o(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>SHIFT); + } + } + static void test_srac_on(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>(-SHIFT)); + } + } + static void test_srav(int[] a0, int[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>b); + } + } + static void test_srac_add(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] + ADD_INIT)>>VALUE); + } + } + static void test_srav_add(int[] a0, int[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] + b)>>VALUE); + } + } + static void test_srac_and(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] & BIT_MASK)>>VALUE); + } + } + static void test_srav_and(int[] a0, int[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] & b)>>VALUE); + } + } + + static void test_pack2(long[] p2, int[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + long l0 = (long)a1[i*2+0]; + long l1 = (long)a1[i*2+1]; + p2[i] = (l1 << 32) | (l0 & 0xFFFFFFFFl); + } + } + static void test_unpack2(int[] a0, long[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + long l = p2[i]; + a0[i*2+0] = (int)(l & 0xFFFFFFFFl); + a0[i*2+1] = (int)(l >> 32); + } + } + static void test_pack2_swap(long[] p2, int[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + long l0 = (long)a1[i*2+0]; + long l1 = (long)a1[i*2+1]; + p2[i] = (l0 << 32) | (l1 & 0xFFFFFFFFl); + } + } + static void test_unpack2_swap(int[] a0, long[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + long l = p2[i]; + a0[i*2+0] = (int)(l >> 32); + a0[i*2+1] = (int)(l & 0xFFFFFFFFl); + } + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val)); + return 1; + } + return 0; + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/c2/cr6340864/TestLongVect.java 2021-01-25 19:32:20.385935247 +0000 @@ -0,0 +1,1133 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 6340864 + * @summary Implement vectorization optimizations in hotspot-server + * + * @run main/othervm/timeout=400 -Xbatch -Xmx128m compiler.c2.cr6340864.TestLongVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=8 compiler.c2.cr6340864.TestLongVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=16 compiler.c2.cr6340864.TestLongVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=32 compiler.c2.cr6340864.TestLongVect + */ + +package compiler.c2.cr6340864; + +public class TestLongVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final long ADD_INIT = Long.MAX_VALUE-500; + private static final long BIT_MASK = 0xEC80F731EC80F731L; + private static final int VALUE = 31; + private static final int SHIFT = 64; + + public static void main(String args[]) { + System.out.println("Testing Long vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + long[] a0 = new long[ARRLEN]; + long[] a1 = new long[ARRLEN]; + long[] a2 = new long[ARRLEN]; + long[] a3 = new long[ARRLEN]; + long[] a4 = new long[ARRLEN]; + // Initialize + long gold_sum = 0; + for (int i=0; i>>VALUE)); + } + test_srlv(a0, a1, VALUE); + for (int i=0; i>>VALUE)); + } + + test_srac(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav(a0, a1, VALUE); + for (int i=0; i>VALUE)); + } + + test_sllc_n(a0, a1); + for (int i=0; i>>(-VALUE))); + } + test_srlv(a0, a1, -VALUE); + for (int i=0; i>>(-VALUE))); + } + + test_srac_n(a0, a1); + for (int i=0; i>(-VALUE))); + } + test_srav(a0, a1, -VALUE); + for (int i=0; i>(-VALUE))); + } + + test_sllc_o(a0, a1); + for (int i=0; i>>SHIFT)); + } + test_srlv(a0, a1, SHIFT); + for (int i=0; i>>SHIFT)); + } + + test_srac_o(a0, a1); + for (int i=0; i>SHIFT)); + } + test_srav(a0, a1, SHIFT); + for (int i=0; i>SHIFT)); + } + + test_sllc_on(a0, a1); + for (int i=0; i>>(-SHIFT))); + } + test_srlv(a0, a1, -SHIFT); + for (int i=0; i>>(-SHIFT))); + } + + test_srac_on(a0, a1); + for (int i=0; i>(-SHIFT))); + } + test_srav(a0, a1, -SHIFT); + for (int i=0; i>(-SHIFT))); + } + + test_sllc_add(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_add(a0, a1, ADD_INIT); + for (int i=0; i>>VALUE)); + } + + test_srac_add(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_add(a0, a1, ADD_INIT); + for (int i=0; i>VALUE)); + } + + test_sllc_and(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_and(a0, a1, BIT_MASK); + for (int i=0; i>>VALUE)); + } + + test_srac_and(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_and(a0, a1, BIT_MASK); + for (int i=0; i>VALUE)); + } + + } + + if (errn > 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i>>VALUE); + } + } + static void test_srlc_n(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>>(-VALUE)); + } + } + static void test_srlc_o(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>>SHIFT); + } + } + static void test_srlc_on(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>>(-SHIFT)); + } + } + static void test_srlv(long[] a0, long[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>>b); + } + } + static void test_srlc_add(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] + ADD_INIT)>>>VALUE); + } + } + static void test_srlv_add(long[] a0, long[] a1, long b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] + b)>>>VALUE); + } + } + static void test_srlc_and(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] & BIT_MASK)>>>VALUE); + } + } + static void test_srlv_and(long[] a0, long[] a1, long b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] & b)>>>VALUE); + } + } + + static void test_srac(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>VALUE); + } + } + static void test_srac_n(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>(-VALUE)); + } + } + static void test_srac_o(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>SHIFT); + } + } + static void test_srac_on(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>(-SHIFT)); + } + } + static void test_srav(long[] a0, long[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>b); + } + } + static void test_srac_add(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] + ADD_INIT)>>VALUE); + } + } + static void test_srav_add(long[] a0, long[] a1, long b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] + b)>>VALUE); + } + } + static void test_srac_and(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] & BIT_MASK)>>VALUE); + } + } + static void test_srav_and(long[] a0, long[] a1, long b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] & b)>>VALUE); + } + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/c2/cr6340864/TestShortVect.java 2021-01-25 19:32:20.806939672 +0000 @@ -0,0 +1,1344 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 6340864 + * @summary Implement vectorization optimizations in hotspot-server + * + * @run main/othervm/timeout=400 -Xbatch -Xmx128m compiler.c2.cr6340864.TestShortVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=8 compiler.c2.cr6340864.TestShortVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=16 compiler.c2.cr6340864.TestShortVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=32 compiler.c2.cr6340864.TestShortVect + */ + +package compiler.c2.cr6340864; + +public class TestShortVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int ADD_INIT = Short.MAX_VALUE-500; + private static final int BIT_MASK = 0xB731; + private static final int VALUE = 7; + private static final int SHIFT = 16; + + public static void main(String args[]) { + System.out.println("Testing Short vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + short[] a0 = new short[ARRLEN]; + short[] a1 = new short[ARRLEN]; + short[] a2 = new short[ARRLEN]; + short[] a3 = new short[ARRLEN]; + short[] a4 = new short[ARRLEN]; + int[] p2 = new int[ARRLEN/2]; + long[] p4 = new long[ARRLEN/4]; + // Initialize + int gold_sum = 0; + for (int i=0; i>>VALUE)); + } + test_srlv(a0, a1, VALUE); + for (int i=0; i>>VALUE)); + } + + test_srac(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav(a0, a1, VALUE); + for (int i=0; i>VALUE)); + } + + test_sllc_n(a0, a1); + for (int i=0; i>>(-VALUE))); + } + test_srlv(a0, a1, -VALUE); + for (int i=0; i>>(-VALUE))); + } + + test_srac_n(a0, a1); + for (int i=0; i>(-VALUE))); + } + test_srav(a0, a1, -VALUE); + for (int i=0; i>(-VALUE))); + } + + test_sllc_o(a0, a1); + for (int i=0; i>>SHIFT)); + } + test_srlv(a0, a1, SHIFT); + for (int i=0; i>>SHIFT)); + } + + test_srac_o(a0, a1); + for (int i=0; i>SHIFT)); + } + test_srav(a0, a1, SHIFT); + for (int i=0; i>SHIFT)); + } + + test_sllc_on(a0, a1); + for (int i=0; i>>(-SHIFT))); + } + test_srlv(a0, a1, -SHIFT); + for (int i=0; i>>(-SHIFT))); + } + + test_srac_on(a0, a1); + for (int i=0; i>(-SHIFT))); + } + test_srav(a0, a1, -SHIFT); + for (int i=0; i>(-SHIFT))); + } + + test_sllc_add(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_add(a0, a1, ADD_INIT); + for (int i=0; i>>VALUE)); + } + + test_srac_add(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_add(a0, a1, ADD_INIT); + for (int i=0; i>VALUE)); + } + + test_sllc_and(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_and(a0, a1, BIT_MASK); + for (int i=0; i>>VALUE)); + } + + test_srac_and(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_and(a0, a1, BIT_MASK); + for (int i=0; i>VALUE)); + } + + test_pack2(p2, a1); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i>>VALUE); + } + } + static void test_srlc_n(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>>(-VALUE)); + } + } + static void test_srlc_o(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>>SHIFT); + } + } + static void test_srlc_on(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>>(-SHIFT)); + } + } + static void test_srlv(short[] a0, short[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>>b); + } + } + static void test_srlc_add(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] + ADD_INIT)>>>VALUE); + } + } + static void test_srlv_add(short[] a0, short[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] + b)>>>VALUE); + } + } + static void test_srlc_and(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] & BIT_MASK)>>>VALUE); + } + } + static void test_srlv_and(short[] a0, short[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] & b)>>>VALUE); + } + } + + static void test_srac(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>VALUE); + } + } + static void test_srac_n(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>(-VALUE)); + } + } + static void test_srac_o(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>SHIFT); + } + } + static void test_srac_on(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>(-SHIFT)); + } + } + static void test_srav(short[] a0, short[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>b); + } + } + static void test_srac_add(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] + ADD_INIT)>>VALUE); + } + } + static void test_srav_add(short[] a0, short[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] + b)>>VALUE); + } + } + static void test_srac_and(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] & BIT_MASK)>>VALUE); + } + } + static void test_srav_and(short[] a0, short[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] & b)>>VALUE); + } + } + + static void test_pack2(int[] p2, short[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l0 = (int)a1[i*2+0]; + int l1 = (int)a1[i*2+1]; + p2[i] = (l1 << 16) | (l0 & 0xFFFF); + } + } + static void test_unpack2(short[] a0, int[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l = p2[i]; + a0[i*2+0] = (short)(l & 0xFFFF); + a0[i*2+1] = (short)(l >> 16); + } + } + static void test_pack2_swap(int[] p2, short[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l0 = (int)a1[i*2+0]; + int l1 = (int)a1[i*2+1]; + p2[i] = (l0 << 16) | (l1 & 0xFFFF); + } + } + static void test_unpack2_swap(short[] a0, int[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l = p2[i]; + a0[i*2+0] = (short)(l >> 16); + a0[i*2+1] = (short)(l & 0xFFFF); + } + } + + static void test_pack4(long[] p4, short[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l0 = (long)a1[i*4+0]; + long l1 = (long)a1[i*4+1]; + long l2 = (long)a1[i*4+2]; + long l3 = (long)a1[i*4+3]; + p4[i] = (l0 & 0xFFFFl) | + ((l1 & 0xFFFFl) << 16) | + ((l2 & 0xFFFFl) << 32) | + ((l3 & 0xFFFFl) << 48); + } + } + static void test_unpack4(short[] a0, long[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l = p4[i]; + a0[i*4+0] = (short)(l & 0xFFFFl); + a0[i*4+1] = (short)(l >> 16); + a0[i*4+2] = (short)(l >> 32); + a0[i*4+3] = (short)(l >> 48); + } + } + static void test_pack4_swap(long[] p4, short[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l0 = (long)a1[i*4+0]; + long l1 = (long)a1[i*4+1]; + long l2 = (long)a1[i*4+2]; + long l3 = (long)a1[i*4+3]; + p4[i] = (l3 & 0xFFFFl) | + ((l2 & 0xFFFFl) << 16) | + ((l1 & 0xFFFFl) << 32) | + ((l0 & 0xFFFFl) << 48); + } + } + static void test_unpack4_swap(short[] a0, long[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l = p4[i]; + a0[i*4+0] = (short)(l >> 48); + a0[i*4+1] = (short)(l >> 32); + a0[i*4+2] = (short)(l >> 16); + a0[i*4+3] = (short)(l & 0xFFFFl); + } + } + + static int verify(String text, int i, short elem, short val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val)); + return 1; + } + return 0; + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val)); + return 1; + } + return 0; + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/codegen/8144028/BitTests.java 2021-01-25 19:32:21.238944212 +0000 @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2015, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8144028 + * @summary Use AArch64 bit-test instructions in C2 + * @modules java.base + * @run main/othervm -Xbatch -XX:CompileCommand=dontinline,BitTests::* -XX:-TieredCompilation BitTests + * @run main/othervm -Xbatch -XX:+TieredCompilation -XX:TieredStopAtLevel=1 BitTests + * @run main/othervm -Xbatch -XX:+TieredCompilation BitTests + * + */ + +// Try to ensure that the bit test instructions TBZ/TBNZ, TST/TSTW +// don't generate incorrect code. We can't guarantee that C2 will use +// bit test instructions for this test and it's not a bug if it +// doesn't. However, these test cases are ideal candidates for each +// of the instruction forms. +public class BitTests { + + private final XorShift r = new XorShift(); + + private final long increment(long ctr) { + return ctr + 1; + } + + private final int increment(int ctr) { + return ctr + 1; + } + + private final long testIntSignedBranch(long counter) { + if ((int)r.nextLong() < 0) { + counter = increment(counter); + } + return counter; + } + + private final long testLongSignedBranch(long counter) { + if (r.nextLong() < 0) { + counter = increment(counter); + } + return counter; + } + + private final long testIntBitBranch(long counter) { + if (((int)r.nextLong() & (1 << 27)) != 0) { + counter = increment(counter); + } + if (((int)r.nextLong() & (1 << 27)) != 0) { + counter = increment(counter); + } + return counter; + } + + private final long testLongBitBranch(long counter) { + if ((r.nextLong() & (1l << 50)) != 0) { + counter = increment(counter); + } + if ((r.nextLong() & (1l << 50)) != 0) { + counter = increment(counter); + } + return counter; + } + + private final long testLongMaskBranch(long counter) { + if (((r.nextLong() & 0x0800000000l) != 0)) { + counter++; + } + return counter; + } + + private final long testIntMaskBranch(long counter) { + if ((((int)r.nextLong() & 0x08) != 0)) { + counter++; + } + return counter; + } + + private final long testLongMaskBranch(long counter, long mask) { + if (((r.nextLong() & mask) != 0)) { + counter++; + } + return counter; + } + + private final long testIntMaskBranch(long counter, int mask) { + if ((((int)r.nextLong() & mask) != 0)) { + counter++; + } + return counter; + } + + private final long step(long counter) { + counter = testIntSignedBranch(counter); + counter = testLongSignedBranch(counter); + counter = testIntBitBranch(counter); + counter = testLongBitBranch(counter); + counter = testIntMaskBranch(counter); + counter = testLongMaskBranch(counter); + counter = testIntMaskBranch(counter, 0x8000); + counter = testLongMaskBranch(counter, 0x800000000l); + return counter; + } + + + private final long finalBits = 3; + + private long bits = 7; + + public static void main(String[] args) { + BitTests t = new BitTests(); + + long counter = 0; + for (int i = 0; i < 10000000; i++) { + counter = t.step((int) counter); + } + if (counter != 50001495) { + System.err.println("FAILED: counter = " + counter + ", should be 50001495."); + System.exit(97); + } + System.out.println("PASSED"); + } + +} + +// Marsaglia's xor-shift generator, used here because it is +// reproducible across all Java implementations. It is also very +// fast. +class XorShift { + + private long y; + + XorShift() { + y = 2463534242l; + } + + public long nextLong() { + y ^= (y << 13); + y ^= (y >>> 17); + return (y ^= (y << 5)); + + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/codegen/TestCharVect2.java 2021-01-25 19:32:21.665948700 +0000 @@ -0,0 +1,1336 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8001183 + * @summary incorrect results of char vectors right shift operaiton + * + * @run main/othervm/timeout=400 -Xbatch -Xmx128m compiler.codegen.TestCharVect2 + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=8 compiler.codegen.TestCharVect2 + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=16 compiler.codegen.TestCharVect2 + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=32 compiler.codegen.TestCharVect2 + */ + +package compiler.codegen; + +public class TestCharVect2 { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int ADD_INIT = Character.MAX_VALUE-500; + private static final int BIT_MASK = 0xB731; + private static final int VALUE = 7; + private static final int SHIFT = 16; + + public static void main(String args[]) { + System.out.println("Testing Char vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + char[] a0 = new char[ARRLEN]; + char[] a1 = new char[ARRLEN]; + short[] a2 = new short[ARRLEN]; + short[] a3 = new short[ARRLEN]; + short[] a4 = new short[ARRLEN]; + int[] p2 = new int[ARRLEN/2]; + long[] p4 = new long[ARRLEN/4]; + // Initialize + int gold_sum = 0; + for (int i=0; i>>VALUE)); + } + test_srlv(a0, a1, VALUE); + for (int i=0; i>>VALUE)); + } + + test_srac(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav(a0, a1, VALUE); + for (int i=0; i>VALUE)); + } + + test_sllc_n(a0, a1); + for (int i=0; i>>(-VALUE))); + } + test_srlv(a0, a1, -VALUE); + for (int i=0; i>>(-VALUE))); + } + + test_srac_n(a0, a1); + for (int i=0; i>(-VALUE))); + } + test_srav(a0, a1, -VALUE); + for (int i=0; i>(-VALUE))); + } + + test_sllc_o(a0, a1); + for (int i=0; i>>SHIFT)); + } + test_srlv(a0, a1, SHIFT); + for (int i=0; i>>SHIFT)); + } + + test_srac_o(a0, a1); + for (int i=0; i>SHIFT)); + } + test_srav(a0, a1, SHIFT); + for (int i=0; i>SHIFT)); + } + + test_sllc_on(a0, a1); + for (int i=0; i>>(-SHIFT))); + } + test_srlv(a0, a1, -SHIFT); + for (int i=0; i>>(-SHIFT))); + } + + test_srac_on(a0, a1); + for (int i=0; i>(-SHIFT))); + } + test_srav(a0, a1, -SHIFT); + for (int i=0; i>(-SHIFT))); + } + + test_sllc_add(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_add(a0, a1, ADD_INIT); + for (int i=0; i>>VALUE)); + } + + test_srac_add(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_add(a0, a1, ADD_INIT); + for (int i=0; i>VALUE)); + } + + test_sllc_and(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_and(a0, a1, BIT_MASK); + for (int i=0; i>>VALUE)); + } + + test_srac_and(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_and(a0, a1, BIT_MASK); + for (int i=0; i>VALUE)); + } + + test_pack2(p2, a1); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i>>VALUE); + } + } + static void test_srlc_n(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>>(-VALUE)); + } + } + static void test_srlc_o(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>>SHIFT); + } + } + static void test_srlc_on(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>>(-SHIFT)); + } + } + static void test_srlv(char[] a0, char[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>>b); + } + } + static void test_srlc_add(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] + ADD_INIT)>>>VALUE); + } + } + static void test_srlv_add(char[] a0, char[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] + b)>>>VALUE); + } + } + static void test_srlc_and(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] & BIT_MASK)>>>VALUE); + } + } + static void test_srlv_and(char[] a0, char[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] & b)>>>VALUE); + } + } + + static void test_srac(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>VALUE); + } + } + static void test_srac_n(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>(-VALUE)); + } + } + static void test_srac_o(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>SHIFT); + } + } + static void test_srac_on(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>(-SHIFT)); + } + } + static void test_srav(char[] a0, char[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>b); + } + } + static void test_srac_add(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] + ADD_INIT)>>VALUE); + } + } + static void test_srav_add(char[] a0, char[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] + b)>>VALUE); + } + } + static void test_srac_and(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] & BIT_MASK)>>VALUE); + } + } + static void test_srav_and(char[] a0, char[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] & b)>>VALUE); + } + } + + static void test_pack2(int[] p2, char[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l0 = (int)a1[i*2+0]; + int l1 = (int)a1[i*2+1]; + p2[i] = (l1 << 16) | (l0 & 0xFFFF); + } + } + static void test_unpack2(char[] a0, int[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l = p2[i]; + a0[i*2+0] = (char)(l & 0xFFFF); + a0[i*2+1] = (char)(l >> 16); + } + } + static void test_pack2_swap(int[] p2, char[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l0 = (int)a1[i*2+0]; + int l1 = (int)a1[i*2+1]; + p2[i] = (l0 << 16) | (l1 & 0xFFFF); + } + } + static void test_unpack2_swap(char[] a0, int[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l = p2[i]; + a0[i*2+0] = (char)(l >> 16); + a0[i*2+1] = (char)(l & 0xFFFF); + } + } + + static void test_pack4(long[] p4, char[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l0 = (long)a1[i*4+0]; + long l1 = (long)a1[i*4+1]; + long l2 = (long)a1[i*4+2]; + long l3 = (long)a1[i*4+3]; + p4[i] = (l0 & 0xFFFFl) | + ((l1 & 0xFFFFl) << 16) | + ((l2 & 0xFFFFl) << 32) | + ((l3 & 0xFFFFl) << 48); + } + } + static void test_unpack4(char[] a0, long[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l = p4[i]; + a0[i*4+0] = (char)(l & 0xFFFFl); + a0[i*4+1] = (char)(l >> 16); + a0[i*4+2] = (char)(l >> 32); + a0[i*4+3] = (char)(l >> 48); + } + } + static void test_pack4_swap(long[] p4, char[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l0 = (long)a1[i*4+0]; + long l1 = (long)a1[i*4+1]; + long l2 = (long)a1[i*4+2]; + long l3 = (long)a1[i*4+3]; + p4[i] = (l3 & 0xFFFFl) | + ((l2 & 0xFFFFl) << 16) | + ((l1 & 0xFFFFl) << 32) | + ((l0 & 0xFFFFl) << 48); + } + } + static void test_unpack4_swap(char[] a0, long[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l = p4[i]; + a0[i*4+0] = (char)(l >> 48); + a0[i*4+1] = (char)(l >> 32); + a0[i*4+2] = (char)(l >> 16); + a0[i*4+3] = (char)(l & 0xFFFFl); + } + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val)); + return 1; + } + return 0; + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/floatingpoint/8165673/TestFloatJNIArgs.java 2021-01-25 19:32:22.126953546 +0000 @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2015, 2016 SAP SE. All rights reserved. + * Copyright (c) 2018 Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +public class TestFloatJNIArgs { + static { + try { + System.loadLibrary("TestFloatJNIArgs"); + } catch (UnsatisfiedLinkError e) { + System.out.println("could not load native lib: " + e); + } + } + + public static native float add15floats( + float f1, float f2, float f3, float f4, + float f5, float f6, float f7, float f8, + float f9, float f10, float f11, float f12, + float f13, float f14, float f15); + + public static native float add10floats( + float f1, float f2, float f3, float f4, + float f5, float f6, float f7, float f8, + float f9, float f10); + + public static native float addFloatsInts( + float f1, float f2, float f3, float f4, + float f5, float f6, float f7, float f8, + float f9, float f10, float f11, float f12, + float f13, float f14, float f15, int a16, int a17); + + public static native double add15doubles( + double d1, double d2, double d3, double d4, + double d5, double d6, double d7, double d8, + double d9, double d10, double d11, double d12, + double d13, double d14, double d15); + + static void test() throws Exception { + float sum = TestFloatJNIArgs.add15floats(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f); + if (sum != 15.0f) { + throw new Error("Passed 15 times 1.0f to jni function which didn't add them properly: " + sum); + } + + float sum1 = TestFloatJNIArgs.add10floats(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f); + if (sum1 != 10.0f) { + throw new Error("Passed 10 times 1.0f to jni function which didn't add them properly: " + sum1); + } + + float sum2 = TestFloatJNIArgs.addFloatsInts(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1, 1); + if (sum2 != 17.0f) { + throw new Error("Passed 17 times 1 to jni function which didn't add them properly: " + sum2); + } + + double dsum = TestFloatJNIArgs.add15doubles(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0); + if (dsum != 15.0) { + throw new Error("Passed 15 times 1.0 to jni function which didn't add them properly: " + dsum); + } + } + + public static void main(String[] args) throws Exception { + for (int i = 0; i < 200; ++i) { + test(); + } + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/floatingpoint/8165673/TestFloatJNIArgs.sh 2021-01-25 19:32:22.567958181 +0000 @@ -0,0 +1,105 @@ +#!/bin/sh + +# +# Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2018 Red Hat, Inc. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + +## +## @test +## @bug 8165673 +## @summary regression test for passing float args to a jni function. +## @run shell/timeout=30 TestFloatJNIArgs.sh +## + +if [ "${TESTSRC}" = "" ] +then + TESTSRC=${PWD} + echo "TESTSRC not set. Using "${TESTSRC}" as default" +fi +echo "TESTSRC=${TESTSRC}" +## Adding common setup Variables for running shell tests. +. ${TESTSRC}/../../../test_env.sh + +# set platform-dependent variables +if [ $VM_OS == "linux" -a $VM_CPU == "aarch64" ]; then + echo "Testing on linux-aarch64" + gcc_cmd=`which gcc` + if [ "x$gcc_cmd" == "x" ]; then + echo "WARNING: gcc not found. Cannot execute test." 2>&1 + exit 0; + fi +else + echo "Test passed; only valid for linux-aarch64" + exit 0; +fi + +THIS_DIR=. + +cp ${TESTSRC}${FS}*.java ${THIS_DIR} +${TESTJAVA}${FS}bin${FS}javac *.java + +$gcc_cmd -O1 -DLINUX -fPIC -shared \ + -o ${THIS_DIR}${FS}libTestFloatJNIArgs.so \ + -I${TESTJAVA}${FS}include \ + -I${TESTJAVA}${FS}include${FS}linux \ + ${TESTSRC}${FS}libTestFloatJNIArgs.c + +# run the java test in the background +cmd="${TESTJAVA}${FS}bin${FS}java -Xint \ + -Djava.library.path=${THIS_DIR}${FS} TestFloatJNIArgs" + +echo "$cmd" +eval $cmd + +if [ $? -ne 0 ] +then + echo "Test Failed" + exit 1 +fi + +cmd="${TESTJAVA}${FS}bin${FS}java -XX:+TieredCompilation -Xcomp \ + -Djava.library.path=${THIS_DIR}${FS} TestFloatJNIArgs" + +echo "$cmd" +eval $cmd + +if [ $? -ne 0 ] +then + echo "Test Failed" + exit 1 +fi + +cmd="${TESTJAVA}${FS}bin${FS}java -XX:-TieredCompilation -Xcomp \ + -Djava.library.path=${THIS_DIR}${FS} TestFloatJNIArgs" + +echo "$cmd" +eval $cmd + +if [ $? -ne 0 ] +then + echo "Test Failed" + exit 1 +fi + +echo "Test Passed" +exit 0 --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/floatingpoint/8165673/libTestFloatJNIArgs.c 2021-01-25 19:32:22.998962711 +0000 @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2015, 2016. All rights reserved. + * Copyright (c) 2018 Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +JNIEXPORT jfloat JNICALL Java_TestFloatJNIArgs_add15floats + (JNIEnv *env, jclass cls, + jfloat f1, jfloat f2, jfloat f3, jfloat f4, + jfloat f5, jfloat f6, jfloat f7, jfloat f8, + jfloat f9, jfloat f10, jfloat f11, jfloat f12, + jfloat f13, jfloat f14, jfloat f15) { + return f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14 + f15; +} + +JNIEXPORT jfloat JNICALL Java_TestFloatJNIArgs_add10floats + (JNIEnv *env, jclass cls, + jfloat f1, jfloat f2, jfloat f3, jfloat f4, + jfloat f5, jfloat f6, jfloat f7, jfloat f8, + jfloat f9, jfloat f10) { + return f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10; +} + +JNIEXPORT jfloat JNICALL Java_TestFloatJNIArgs_addFloatsInts + (JNIEnv *env, jclass cls, + jfloat f1, jfloat f2, jfloat f3, jfloat f4, + jfloat f5, jfloat f6, jfloat f7, jfloat f8, + jfloat f9, jfloat f10, jfloat f11, jfloat f12, + jfloat f13, jfloat f14, jfloat f15, jint a16, jint a17) { + return f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14 + f15 + a16 + a17; +} + +JNIEXPORT jdouble JNICALL Java_TestFloatJNIArgs_add15doubles + (JNIEnv *env, jclass cls, + jdouble f1, jdouble f2, jdouble f3, jdouble f4, + jdouble f5, jdouble f6, jdouble f7, jdouble f8, + jdouble f9, jdouble f10, jdouble f11, jdouble f12, + jdouble f13, jdouble f14, jdouble f15) { + return f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14 + f15; +} + + +#ifdef __cplusplus +} +#endif --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/floatingpoint/8207838/TestFloatSyncJNIArgs.java 2021-01-25 19:32:23.419967136 +0000 @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2015, 2016 SAP SE. All rights reserved. + * Copyright (c) 2018 Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +public class TestFloatSyncJNIArgs { + static { + try { + System.loadLibrary("TestFloatSyncJNIArgs"); + } catch (UnsatisfiedLinkError e) { + System.out.println("could not load native lib: " + e); + } + } + + private static final int numberOfThreads = 8; + + static volatile Error testFailed = null; + + public synchronized static native float combine15floats( + float f1, float f2, float f3, float f4, + float f5, float f6, float f7, float f8, + float f9, float f10, float f11, float f12, + float f13, float f14, float f15); + + public synchronized static native double combine15doubles( + double d1, double d2, double d3, double d4, + double d5, double d6, double d7, double d8, + double d9, double d10, double d11, double d12, + double d13, double d14, double d15); + + static void test() throws Exception { + Thread[] threads = new Thread[numberOfThreads]; + + for (int i = 0; i < numberOfThreads; i++) { + threads[i] = new Thread(() -> { + for (int j = 0; j < 10000; j++) { + float f = combine15floats(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, + 9, 10, 11, 12, 13, 14, 15); + if (f != 81720.0f) { + testFailed = new Error("jni function didn't combine 15 float args properly: " + f); + throw testFailed; + } + } + }); + } + for (int i = 0; i < numberOfThreads; i++) { + threads[i].start(); + } + for (int i = 0; i < numberOfThreads; i++) { + threads[i].join(); + } + if (testFailed != null) { + throw testFailed; + } + + for (int i = 0; i < numberOfThreads; i++) { + threads[i] = new Thread(() -> { + for (int j = 0; j < 10000; j++) { + double d = combine15doubles(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9, 10, 11, 12, 13, 14, 15); + if (d != 81720.0) { + testFailed = new Error("jni function didn't combine 15 double args properly: " + d); + throw testFailed; + } + } + }); + } + for (int i = 0; i < numberOfThreads; i++) { + threads[i].start(); + } + for (int i = 0; i < numberOfThreads; i++) { + threads[i].join(); + } + if (testFailed != null) { + throw testFailed; + } + } + + public static void main(String[] args) throws Exception { + for (int i = 0; i < 200; ++i) { + test(); + } + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/floatingpoint/8207838/TestFloatSyncJNIArgs.sh 2021-01-25 19:32:23.855971718 +0000 @@ -0,0 +1,105 @@ +#!/bin/sh + +# +# Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2018 Red Hat, Inc. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + +## +## @test +## @bug 8207838 +## @summary regression test for passing float args to a synchronized jni function. +## @run shell/timeout=300 TestFloatSyncJNIArgs.sh +## + +if [ "${TESTSRC}" = "" ] +then + TESTSRC=${PWD} + echo "TESTSRC not set. Using "${TESTSRC}" as default" +fi +echo "TESTSRC=${TESTSRC}" +## Adding common setup Variables for running shell tests. +. ${TESTSRC}/../../../test_env.sh + +# set platform-dependent variables +if [ $VM_OS == "linux" -a $VM_CPU == "aarch64" ]; then + echo "Testing on linux-aarch64" + gcc_cmd=`which gcc` + if [ "x$gcc_cmd" == "x" ]; then + echo "WARNING: gcc not found. Cannot execute test." 2>&1 + exit 0; + fi +else + echo "Test passed; only valid for linux-aarch64" + exit 0; +fi + +THIS_DIR=. + +cp ${TESTSRC}${FS}*.java ${THIS_DIR} +${TESTJAVA}${FS}bin${FS}javac *.java + +$gcc_cmd -O1 -DLINUX -fPIC -shared \ + -o ${THIS_DIR}${FS}libTestFloatSyncJNIArgs.so \ + -I${TESTJAVA}${FS}include \ + -I${TESTJAVA}${FS}include${FS}linux \ + ${TESTSRC}${FS}libTestFloatSyncJNIArgs.c + +# run the java test in the background +cmd="${TESTJAVA}${FS}bin${FS}java -Xint \ + -Djava.library.path=${THIS_DIR}${FS} TestFloatSyncJNIArgs" + +echo "$cmd" +eval $cmd + +if [ $? -ne 0 ] +then + echo "Test Failed" + exit 1 +fi + +cmd="${TESTJAVA}${FS}bin${FS}java -XX:+TieredCompilation \ + -Djava.library.path=${THIS_DIR}${FS} TestFloatSyncJNIArgs" + +echo "$cmd" +eval $cmd + +if [ $? -ne 0 ] +then + echo "Test Failed" + exit 1 +fi + +cmd="${TESTJAVA}${FS}bin${FS}java -XX:-TieredCompilation \ + -Djava.library.path=${THIS_DIR}${FS} TestFloatSyncJNIArgs" + +echo "$cmd" +eval $cmd + +if [ $? -ne 0 ] +then + echo "Test Failed" + exit 1 +fi + +echo "Test Passed" +exit 0 --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/floatingpoint/8207838/libTestFloatSyncJNIArgs.c 2021-01-25 19:32:24.272976101 +0000 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2018 Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Fletcher checksum. This is a nonlinear function which detects both */ +/* missing or otherwise incorrect arguments and arguments in the wrong */ +/* order. */ +static jfloat fcombine(jfloat f[], int len) { + int i; + jfloat sum = 0, sum_of_sums = 0; + for (i = 0; i < len; i++) { + sum += f[i]; + sum_of_sums += sum; + } + return sum + sum_of_sums * sum; +} + +static jdouble combine(jdouble f[], int len) { + int i; + double sum = 0, sum_of_sums = 0; + for (i = 0; i < len; i++) { + sum += f[i]; + sum_of_sums += sum; + } + return sum + sum_of_sums * sum; +} + +JNIEXPORT jfloat JNICALL Java_TestFloatSyncJNIArgs_combine15floats + (JNIEnv *env, jclass cls, + jfloat f1, jfloat f2, jfloat f3, jfloat f4, + jfloat f5, jfloat f6, jfloat f7, jfloat f8, + jfloat f9, jfloat f10, jfloat f11, jfloat f12, + jfloat f13, jfloat f14, jfloat f15) { + + jfloat f[15]; + f[0] = f1; f[1] = f2; f[2] = f3; f[3] = f4; f[4] = f5; + f[5] = f6; f[6] = f7; f[7] = f8; f[8] = f9; f[9] = f10; + f[10] = f11; f[11] = f12; f[12] = f13; f[13] = f14; f[14] = f15; + + return fcombine(f, sizeof f / sizeof f[0]); +} + +JNIEXPORT jdouble JNICALL Java_TestFloatSyncJNIArgs_combine15doubles + (JNIEnv *env, jclass cls, + jdouble f1, jdouble f2, jdouble f3, jdouble f4, + jdouble f5, jdouble f6, jdouble f7, jdouble f8, + jdouble f9, jdouble f10, jdouble f11, jdouble f12, + jdouble f13, jdouble f14, jdouble f15) { + + jdouble f[15]; + f[0] = f1; f[1] = f2; f[2] = f3; f[3] = f4; f[4] = f5; + f[5] = f6; f[6] = f7; f[7] = f8; f[8] = f9; f[9] = f10; + f[10] = f11; f[11] = f12; f[12] = f13; f[13] = f14; f[14] = f15; + + return combine(f, sizeof f / sizeof f[0]); +} + + +#ifdef __cplusplus +} +#endif --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForSupportedAArch64CPU.java 2021-01-25 19:32:24.714980747 +0000 @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import com.oracle.java.testlibrary.ExitCode; +import com.oracle.java.testlibrary.Platform; +import com.oracle.java.testlibrary.cli.CommandLineOptionTest; +import com.oracle.java.testlibrary.cli.predicate.AndPredicate; + +/** + * Generic test case for SHA-related options targeted to AArch64 CPUs which + * support instructions required by the tested option. + */ +public class GenericTestCaseForSupportedAArch64CPU extends + SHAOptionsBase.TestCase { + public GenericTestCaseForSupportedAArch64CPU(String optionName) { + super(optionName, new AndPredicate(Platform::isAArch64, + SHAOptionsBase.getPredicateForOption(optionName))); + } + + @Override + protected void verifyWarnings() throws Throwable { + // Verify that there are no warning when option is explicitly enabled. + CommandLineOptionTest.verifySameJVMStartup(null, new String[] { + SHAOptionsBase.getWarningForUnsupportedCPU(optionName) + }, ExitCode.OK, + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + + // Verify that option could be disabled even if +UseSHA was passed to + // JVM. + CommandLineOptionTest.verifySameJVMStartup(null, new String[] { + SHAOptionsBase.getWarningForUnsupportedCPU(optionName) + }, ExitCode.OK, + CommandLineOptionTest.prepareBooleanFlag( + SHAOptionsBase.USE_SHA_OPTION, true), + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + + // Verify that it is possible to enable the tested option and disable + // all SHA intrinsics via -UseSHA without any warnings. + CommandLineOptionTest.verifySameJVMStartup(null, new String[] { + SHAOptionsBase.getWarningForUnsupportedCPU(optionName) + }, ExitCode.OK, + CommandLineOptionTest.prepareBooleanFlag( + SHAOptionsBase.USE_SHA_OPTION, false), + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + } + + @Override + protected void verifyOptionValues() throws Throwable { + // Verify that on supported CPU option is enabled by default. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "true"); + + // Verify that it is possible to explicitly enable the option. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "true", + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + + // Verify that it is possible to explicitly disable the option. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + + // verify that option is disabled when -UseSHA was passed to JVM. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + CommandLineOptionTest.prepareBooleanFlag(optionName, true), + CommandLineOptionTest.prepareBooleanFlag( + SHAOptionsBase.USE_SHA_OPTION, false)); + + // Verify that it is possible to explicitly disable the tested option + // even if +UseSHA was passed to JVM. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + CommandLineOptionTest.prepareBooleanFlag( + SHAOptionsBase.USE_SHA_OPTION, true), + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedAArch64CPU.java 2021-01-25 19:32:25.144985266 +0000 @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import com.oracle.java.testlibrary.ExitCode; +import com.oracle.java.testlibrary.Platform; +import com.oracle.java.testlibrary.cli.CommandLineOptionTest; +import com.oracle.java.testlibrary.cli.predicate.AndPredicate; +import com.oracle.java.testlibrary.cli.predicate.NotPredicate; + +/** + * Generic test case for SHA-related options targeted to AArch64 CPUs which don't + * support instruction required by the tested option. + */ +public class GenericTestCaseForUnsupportedAArch64CPU extends + SHAOptionsBase.TestCase { + public GenericTestCaseForUnsupportedAArch64CPU(String optionName) { + super(optionName, new AndPredicate(Platform::isAArch64, + new NotPredicate(SHAOptionsBase.getPredicateForOption( + optionName)))); + } + + @Override + protected void verifyWarnings() throws Throwable { + //Verify that option could be disabled without any warnings. + CommandLineOptionTest.verifySameJVMStartup(null, new String[] { + SHAOptionsBase.getWarningForUnsupportedCPU(optionName) + }, ExitCode.OK, + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + } + + @Override + protected void verifyOptionValues() throws Throwable { + // Verify that option is disabled by default. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false"); + + // Verify that option is disabled even if it was explicitly enabled + // using CLI options. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + + // Verify that option is disabled when +UseSHA was passed to JVM. + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + CommandLineOptionTest.prepareBooleanFlag( + SHAOptionsBase.USE_SHA_OPTION, true)); + } +} --- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/vectorization/TestVectorUnalignedOffset.java 2021-01-25 19:32:25.641990490 +0000 @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8155612 + * @summary Aarch64: vector nodes need to support misaligned offset + * @run main/othervm -XX:-BackgroundCompilation TestVectorUnalignedOffset + * + */ + + +public class TestVectorUnalignedOffset { + + static void test1(int[] src_array, int[] dst_array, int l) { + for (int i = 0; i < l; i++) { + dst_array[i + 250] = src_array[i + 250]; + } + } + + static void test2(byte[] src_array, byte[] dst_array, int l) { + for (int i = 0; i < l; i++) { + dst_array[i + 250] = src_array[i + 250]; + } + } + + static public void main(String[] args) { + int[] int_array = new int[1000]; + byte[] byte_array = new byte[1000]; + for (int i = 0; i < 20000; i++) { + test1(int_array, int_array, int_array.length - 250); + test2(byte_array, byte_array, byte_array.length - 250); + } + } +}